diff options
Diffstat (limited to 'net')
443 files changed, 32112 insertions, 17254 deletions
diff --git a/net/802/fc.c b/net/802/fc.c index cb3475ea6fda..34cf1ee014b8 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -82,13 +82,13 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, static int fc_rebuild_header(struct sk_buff *skb) { +#ifdef CONFIG_INET struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); if(fcllc->ethertype != htons(ETH_P_IP)) { printk("fc_rebuild_header: Don't know how to resolve type %04X addresses ?\n", ntohs(fcllc->ethertype)); return 0; } -#ifdef CONFIG_INET return arp_find(fch->daddr, skb); #else return 0; diff --git a/net/802/psnap.c b/net/802/psnap.c index b3cfe5a14fca..70980baeb682 100644 --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -1,7 +1,7 @@ /* * SNAP data link layer. Derived from 802.2 * - * Alan Cox <Alan.Cox@linux.org>, + * Alan Cox <alan@lxorguk.ukuu.org.uk>, * from the 802.2 layer by Greg Page. * Merged in additions from Greg Page's psnap.c. * diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index b661f47bf10a..f0e335aa20df 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -394,6 +394,7 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->features &= ~dev->vlan_features; vlandev->features |= dev->features & dev->vlan_features; + vlandev->gso_max_size = dev->gso_max_size; if (old_features != vlandev->features) netdev_features_change(vlandev); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 916061f681b6..68ced4bf158c 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -3,11 +3,20 @@ #include <linux/if_vlan.h> #include "vlan.h" +struct vlan_hwaccel_cb { + struct net_device *dev; +}; + +static inline struct vlan_hwaccel_cb *vlan_hwaccel_cb(struct sk_buff *skb) +{ + return (struct vlan_hwaccel_cb *)skb->cb; +} + /* VLAN rx hw acceleration helper. This acts like netif_{rx,receive_skb}(). */ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, u16 vlan_tci, int polling) { - struct net_device_stats *stats; + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); if (skb_bond_should_drop(skb)) { dev_kfree_skb_any(skb); @@ -15,23 +24,35 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, } skb->vlan_tci = vlan_tci; + cb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); + + return (polling ? netif_receive_skb(skb) : netif_rx(skb)); +} +EXPORT_SYMBOL(__vlan_hwaccel_rx); + +int vlan_hwaccel_do_receive(struct sk_buff *skb) +{ + struct vlan_hwaccel_cb *cb = vlan_hwaccel_cb(skb); + struct net_device *dev = cb->dev; + struct net_device_stats *stats; + netif_nit_deliver(skb); - skb->dev = vlan_group_get_device(grp, vlan_tci & VLAN_VID_MASK); - if (skb->dev == NULL) { - dev_kfree_skb_any(skb); - /* Not NET_RX_DROP, this is not being dropped - * due to congestion. */ - return NET_RX_SUCCESS; + if (dev == NULL) { + kfree_skb(skb); + return -1; } - skb->dev->last_rx = jiffies; + + skb->dev = dev; + skb->priority = vlan_get_ingress_priority(dev, skb->vlan_tci); skb->vlan_tci = 0; - stats = &skb->dev->stats; + dev->last_rx = jiffies; + + stats = &dev->stats; stats->rx_packets++; stats->rx_bytes += skb->len; - skb->priority = vlan_get_ingress_priority(skb->dev, vlan_tci); switch (skb->pkt_type) { case PACKET_BROADCAST: break; @@ -43,13 +64,12 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, * This allows the VLAN to have a different MAC than the * underlying device, and still route correctly. */ if (!compare_ether_addr(eth_hdr(skb)->h_dest, - skb->dev->dev_addr)) + dev->dev_addr)) skb->pkt_type = PACKET_HOST; break; }; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return 0; } -EXPORT_SYMBOL(__vlan_hwaccel_rx); struct net_device *vlan_dev_real_dev(const struct net_device *dev) { diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 4bf014e51f8c..8883e9c8a223 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -48,7 +48,7 @@ static int vlan_dev_rebuild_header(struct sk_buff *skb) switch (veth->h_vlan_encapsulated_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): /* TODO: Confirm this will work with VLAN headers... */ return arp_find(veth->h_dest, skb); @@ -607,6 +607,7 @@ static int vlan_dev_init(struct net_device *dev) (1<<__LINK_STATE_PRESENT); dev->features |= real_dev->features & real_dev->vlan_features; + dev->gso_max_size = real_dev->gso_max_size; /* ipv6 shared card related stuff */ dev->dev_id = real_dev->dev_id; diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 0feefa4e1a4b..3628e0a81b40 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -314,7 +314,7 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset) dev_info->ingress_priority_map[6], dev_info->ingress_priority_map[7]); - seq_printf(seq, "EGRESSS priority Mappings: "); + seq_printf(seq, " EGRESS priority mappings: "); for (i = 0; i < 16; i++) { const struct vlan_priority_tci_mapping *mp = dev_info->egress_priority_map[i]; diff --git a/net/9p/Kconfig b/net/9p/Kconfig index ff34c5acc130..0663f99e977a 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -13,16 +13,24 @@ menuconfig NET_9P If unsure, say N. +if NET_9P + config NET_9P_VIRTIO - depends on NET_9P && EXPERIMENTAL && VIRTIO + depends on EXPERIMENTAL && VIRTIO tristate "9P Virtio Transport (Experimental)" help This builds support for a transports between guest partitions and a host partition. +config NET_9P_RDMA + depends on INET && INFINIBAND && EXPERIMENTAL + tristate "9P RDMA Transport (Experimental)" + help + This builds support for an RDMA transport. + config NET_9P_DEBUG bool "Debug information" - depends on NET_9P help Say Y if you want the 9P subsystem to log debug information. +endif diff --git a/net/9p/Makefile b/net/9p/Makefile index 519219480db1..198a640d53a6 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,14 +1,17 @@ obj-$(CONFIG_NET_9P) := 9pnet.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o +obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o 9pnet-objs := \ mod.o \ client.o \ - conv.o \ error.o \ - fcprint.o \ util.o \ + protocol.o \ trans_fd.o \ 9pnet_virtio-objs := \ trans_virtio.o \ + +9pnet_rdma-objs := \ + trans_rdma.o \ diff --git a/net/9p/client.c b/net/9p/client.c index 10e320307ec0..4b529454616d 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -33,12 +33,9 @@ #include <linux/uaccess.h> #include <net/9p/9p.h> #include <linux/parser.h> -#include <net/9p/transport.h> #include <net/9p/client.h> - -static struct p9_fid *p9_fid_create(struct p9_client *clnt); -static void p9_fid_destroy(struct p9_fid *fid); -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu); +#include <net/9p/transport.h> +#include "protocol.h" /* * Client Option Parsing (code inspired by NFS code) @@ -52,13 +49,16 @@ enum { Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_msize, "msize=%u"}, {Opt_legacy, "noextend"}, {Opt_trans, "trans=%s"}, {Opt_err, NULL}, }; +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); + /** * v9fs_parse_options - parse mount options into session structure * @options: options string passed from mount @@ -124,31 +124,586 @@ static int parse_opts(char *opts, struct p9_client *clnt) return ret; } +/** + * p9_tag_alloc - lookup/allocate a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + * this is a simple array lookup, but will grow the + * request_slots as necessary to accomodate transaction + * ids which did not previously have a slot. + * + * this code relies on the client spinlock to manage locks, its + * possible we should switch to something else, but I'd rather + * stick with something low-overhead for the common case. + * + */ + +static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +{ + unsigned long flags; + int row, col; + struct p9_req_t *req; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + if (tag >= c->max_tag) { + spin_lock_irqsave(&c->lock, flags); + /* check again since original check was outside of lock */ + while (tag >= c->max_tag) { + row = (tag / P9_ROW_MAXTAG); + c->reqs[row] = kcalloc(P9_ROW_MAXTAG, + sizeof(struct p9_req_t), GFP_ATOMIC); + + if (!c->reqs[row]) { + printk(KERN_ERR "Couldn't grow tag array\n"); + spin_unlock_irqrestore(&c->lock, flags); + return ERR_PTR(-ENOMEM); + } + for (col = 0; col < P9_ROW_MAXTAG; col++) { + c->reqs[row][col].status = REQ_STATUS_IDLE; + c->reqs[row][col].tc = NULL; + } + c->max_tag += P9_ROW_MAXTAG; + } + spin_unlock_irqrestore(&c->lock, flags); + } + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + req = &c->reqs[row][col]; + if (!req->tc) { + req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); + if (!req->wq) { + printk(KERN_ERR "Couldn't grow tag array\n"); + return ERR_PTR(-ENOMEM); + } + init_waitqueue_head(req->wq); + req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, + GFP_KERNEL); + if ((!req->tc) || (!req->rc)) { + printk(KERN_ERR "Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); + } + req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); + req->tc->capacity = c->msize; + req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); + req->rc->capacity = c->msize; + } + + p9pdu_reset(req->tc); + p9pdu_reset(req->rc); + + req->flush_tag = 0; + req->tc->tag = tag-1; + req->status = REQ_STATUS_ALLOC; + + return &c->reqs[row][col]; +} + +/** + * p9_tag_lookup - lookup a request by tag + * @c: client session to lookup tag within + * @tag: numeric id for transaction + * + */ + +struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag) +{ + int row, col; + + /* This looks up the original request by tag so we know which + * buffer to read the data into */ + tag++; + + BUG_ON(tag >= c->max_tag); + + row = tag / P9_ROW_MAXTAG; + col = tag % P9_ROW_MAXTAG; + + return &c->reqs[row][col]; +} +EXPORT_SYMBOL(p9_tag_lookup); + +/** + * p9_tag_init - setup tags structure and contents + * @tags: tags structure from the client struct + * + * This initializes the tags structure for each client instance. + * + */ + +static int p9_tag_init(struct p9_client *c) +{ + int err = 0; + + c->tagpool = p9_idpool_create(); + if (IS_ERR(c->tagpool)) { + err = PTR_ERR(c->tagpool); + c->tagpool = NULL; + goto error; + } + + p9_idpool_get(c->tagpool); /* reserve tag 0 */ + + c->max_tag = 0; +error: + return err; +} /** - * p9_client_rpc - sends 9P request and waits until a response is available. - * The function can be interrupted. - * @c: client data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored + * p9_tag_cleanup - cleans up tags structure and reclaims resources + * @tags: tags structure from the client struct + * + * This frees resources associated with the tags structure + * */ +static void p9_tag_cleanup(struct p9_client *c) +{ + int row, col; + + /* check to insure all requests are idle */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + if (c->reqs[row][col].status != REQ_STATUS_IDLE) { + P9_DPRINTK(P9_DEBUG_MUX, + "Attempting to cleanup non-free tag %d,%d\n", + row, col); + /* TODO: delay execution of cleanup */ + return; + } + } + } + + if (c->tagpool) + p9_idpool_destroy(c->tagpool); + + /* free requests associated with tags */ + for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) { + for (col = 0; col < P9_ROW_MAXTAG; col++) { + kfree(c->reqs[row][col].wq); + kfree(c->reqs[row][col].tc); + kfree(c->reqs[row][col].rc); + } + kfree(c->reqs[row]); + } + c->max_tag = 0; +} + +/** + * p9_free_req - free a request and clean-up as necessary + * c: client state + * r: request to release + * + */ + +static void p9_free_req(struct p9_client *c, struct p9_req_t *r) +{ + int tag = r->tc->tag; + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); + + r->status = REQ_STATUS_IDLE; + if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) + p9_idpool_put(tag, c->tagpool); +} + +/** + * p9_client_cb - call back from transport to client + * c: client state + * req: request received + * + */ +void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +{ + struct p9_req_t *other_req; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + if (req->status == REQ_STATUS_ERROR) + wake_up(req->wq); + + if (req->flush_tag) { /* flush receive path */ + P9_DPRINTK(P9_DEBUG_9P, "<<< RFLUSH %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + other_req = p9_tag_lookup(c, req->flush_tag); + if (other_req->status != REQ_STATUS_FLSH) /* stale flush */ + spin_unlock_irqrestore(&c->lock, flags); + else { + other_req->status = REQ_STATUS_FLSHD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(other_req->wq); + } + p9_free_req(c, req); + } else { /* normal receive path */ + P9_DPRINTK(P9_DEBUG_MUX, "normal: tag %d\n", req->tc->tag); + spin_lock_irqsave(&c->lock, flags); + if (req->status != REQ_STATUS_FLSHD) + req->status = REQ_STATUS_RCVD; + spin_unlock_irqrestore(&c->lock, flags); + wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); + } +} +EXPORT_SYMBOL(p9_client_cb); + +/** + * p9_parse_header - parse header arguments out of a packet + * @pdu: packet to parse + * @size: size of packet + * @type: type of request + * @tag: tag of packet + * @rewind: set if we need to rewind offset afterwards + */ + int -p9_client_rpc(struct p9_client *c, struct p9_fcall *tc, - struct p9_fcall **rc) +p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag, + int rewind) { - return c->trans->rpc(c->trans, tc, rc); + int8_t r_type; + int16_t r_tag; + int32_t r_size; + int offset = pdu->offset; + int err; + + pdu->offset = 0; + if (pdu->size == 0) + pdu->size = 7; + + err = p9pdu_readf(pdu, 0, "dbw", &r_size, &r_type, &r_tag); + if (err) + goto rewind_and_exit; + + pdu->size = r_size; + pdu->id = r_type; + pdu->tag = r_tag; + + P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + if (type) + *type = r_type; + if (tag) + *tag = r_tag; + if (size) + *size = r_size; + + +rewind_and_exit: + if (rewind) + pdu->offset = offset; + return err; +} +EXPORT_SYMBOL(p9_parse_header); + +/** + * p9_check_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ + +static int p9_check_errors(struct p9_client *c, struct p9_req_t *req) +{ + int8_t type; + int err; + + err = p9_parse_header(req->rc, NULL, &type, NULL, 0); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); + return err; + } + + if (type == P9_RERROR) { + int ecode; + char *ename; + + err = p9pdu_readf(req->rc, c->dotu, "s?d", &ename, &ecode); + if (err) { + P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", + err); + return err; + } + + if (c->dotu) + err = -ecode; + + if (!err) { + err = p9_errstr2errno(ename, strlen(ename)); + + /* string match failed */ + if (!err) + err = -ESERVERFAULT; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); + + kfree(ename); + } else + err = 0; + + return err; } +/** + * p9_client_flush - flush (cancel) a request + * c: client state + * req: request to cancel + * + * This sents a flush for a particular requests and links + * the flush request to the original request. The current + * code only supports a single flush request although the protocol + * allows for multiple flush requests to be sent for a single request. + * + */ + +static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) +{ + struct p9_req_t *req; + int16_t oldtag; + int err; + + err = p9_parse_header(oldreq->tc, NULL, NULL, &oldtag, 1); + if (err) + return err; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); + + req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag); + if (IS_ERR(req)) + return PTR_ERR(req); + + req->flush_tag = oldtag; + + /* we don't free anything here because RPC isn't complete */ + return 0; +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ + va_list ap; + int tag, err; + struct p9_req_t *req; + unsigned long flags; + int sigpending; + int flushed = 0; + + P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); + + if (c->status != Connected) + return ERR_PTR(-EIO); + + if (signal_pending(current)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + } else + sigpending = 0; + + tag = P9_NOTAG; + if (type != P9_TVERSION) { + tag = p9_idpool_get(c->tagpool); + if (tag < 0) + return ERR_PTR(-ENOMEM); + } + + req = p9_tag_alloc(c, tag); + if (IS_ERR(req)) + return req; + + /* marshall the data */ + p9pdu_prepare(req->tc, tag, type); + va_start(ap, fmt); + err = p9pdu_vwritef(req->tc, c->dotu, fmt, ap); + va_end(ap); + p9pdu_finalize(req->tc); + + err = c->trans_mod->request(c, req); + if (err < 0) { + c->status = Disconnected; + goto reterr; + } + + /* if it was a flush we just transmitted, return our tag */ + if (type == P9_TFLUSH) + return req; +again: + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); + err = wait_event_interruptible(*req->wq, + req->status >= REQ_STATUS_RCVD); + P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d (flushed=%d)\n", + req->wq, tag, err, flushed); + + if (req->status == REQ_STATUS_ERROR) { + P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); + err = req->t_err; + } else if (err == -ERESTARTSYS && flushed) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - going again\n"); + goto again; + } else if (req->status == REQ_STATUS_FLSHD) { + P9_DPRINTK(P9_DEBUG_MUX, "flushed - erestartsys\n"); + err = -ERESTARTSYS; + } + + if ((err == -ERESTARTSYS) && (c->status == Connected) && (!flushed)) { + P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); + spin_lock_irqsave(&c->lock, flags); + if (req->status == REQ_STATUS_SENT) + req->status = REQ_STATUS_FLSH; + spin_unlock_irqrestore(&c->lock, flags); + sigpending = 1; + flushed = 1; + clear_thread_flag(TIF_SIGPENDING); + + if (c->trans_mod->cancel(c, req)) { + err = p9_client_flush(c, req); + if (err == 0) + goto again; + } + } + + if (sigpending) { + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); + } + + if (err < 0) + goto reterr; + + err = p9_check_errors(c, req); + if (!err) { + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); + return req; + } + +reterr: + P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, + err); + p9_free_req(c, req); + return ERR_PTR(err); +} + +static struct p9_fid *p9_fid_create(struct p9_client *clnt) +{ + int ret; + struct p9_fid *fid; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); + fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); + if (!fid) + return ERR_PTR(-ENOMEM); + + ret = p9_idpool_get(clnt->fidpool); + if (fid->fid < 0) { + ret = -ENOSPC; + goto error; + } + fid->fid = ret; + + memset(&fid->qid, 0, sizeof(struct p9_qid)); + fid->mode = -1; + fid->rdir_fpos = 0; + fid->uid = current->fsuid; + fid->clnt = clnt; + fid->aux = NULL; + + spin_lock_irqsave(&clnt->lock, flags); + list_add(&fid->flist, &clnt->fidlist); + spin_unlock_irqrestore(&clnt->lock, flags); + + return fid; + +error: + kfree(fid); + return ERR_PTR(ret); +} + +static void p9_fid_destroy(struct p9_fid *fid) +{ + struct p9_client *clnt; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); + clnt = fid->clnt; + p9_idpool_put(fid->fid, clnt->fidpool); + spin_lock_irqsave(&clnt->lock, flags); + list_del(&fid->flist); + spin_unlock_irqrestore(&clnt->lock, flags); + kfree(fid); +} + +int p9_client_version(struct p9_client *c) +{ + int err = 0; + struct p9_req_t *req; + char *version; + int msize; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d extended %d\n", + c->msize, c->dotu); + req = p9_client_rpc(c, P9_TVERSION, "ds", c->msize, + c->dotu ? "9P2000.u" : "9P2000"); + if (IS_ERR(req)) + return PTR_ERR(req); + + err = p9pdu_readf(req->rc, c->dotu, "ds", &msize, &version); + if (err) { + P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); + p9pdu_dump(1, req->rc); + goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); + if (!memcmp(version, "9P2000.u", 8)) + c->dotu = 1; + else if (!memcmp(version, "9P2000", 6)) + c->dotu = 0; + else { + err = -EREMOTEIO; + goto error; + } + + if (msize < c->msize) + c->msize = msize; + +error: + kfree(version); + p9_free_req(c, req); + + return err; +} +EXPORT_SYMBOL(p9_client_version); + struct p9_client *p9_client_create(const char *dev_name, char *options) { - int err, n; + int err; struct p9_client *clnt; - struct p9_fcall *tc, *rc; - struct p9_str *version; err = 0; - tc = NULL; - rc = NULL; clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); if (!clnt) return ERR_PTR(-ENOMEM); @@ -164,6 +719,8 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto error; } + p9_tag_init(clnt); + err = parse_opts(options, clnt); if (err < 0) goto error; @@ -175,53 +732,23 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) goto error; } - P9_DPRINTK(P9_DEBUG_9P, "clnt %p trans %p msize %d dotu %d\n", + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d dotu %d\n", clnt, clnt->trans_mod, clnt->msize, clnt->dotu); - - clnt->trans = clnt->trans_mod->create(dev_name, options, clnt->msize, - clnt->dotu); - if (IS_ERR(clnt->trans)) { - err = PTR_ERR(clnt->trans); - clnt->trans = NULL; + err = clnt->trans_mod->create(clnt, dev_name, options); + if (err) goto error; - } if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; - tc = p9_create_tversion(clnt->msize, clnt->dotu?"9P2000.u":"9P2000"); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); + err = p9_client_version(clnt); if (err) goto error; - version = &rc->params.rversion.version; - if (version->len == 8 && !memcmp(version->str, "9P2000.u", 8)) - clnt->dotu = 1; - else if (version->len == 6 && !memcmp(version->str, "9P2000", 6)) - clnt->dotu = 0; - else { - err = -EREMOTEIO; - goto error; - } - - n = rc->params.rversion.msize; - if (n < clnt->msize) - clnt->msize = n; - - kfree(tc); - kfree(rc); return clnt; error: - kfree(tc); - kfree(rc); p9_client_destroy(clnt); return ERR_PTR(err); } @@ -231,13 +758,10 @@ void p9_client_destroy(struct p9_client *clnt) { struct p9_fid *fid, *fidptr; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); + P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); - if (clnt->trans) { - clnt->trans->close(clnt->trans); - kfree(clnt->trans); - clnt->trans = NULL; - } + if (clnt->trans_mod) + clnt->trans_mod->close(clnt); v9fs_put_trans(clnt->trans_mod); @@ -247,6 +771,8 @@ void p9_client_destroy(struct p9_client *clnt) if (clnt->fidpool) p9_idpool_destroy(clnt->fidpool); + p9_tag_cleanup(clnt); + kfree(clnt); } EXPORT_SYMBOL(p9_client_destroy); @@ -254,7 +780,7 @@ EXPORT_SYMBOL(p9_client_destroy); void p9_client_disconnect(struct p9_client *clnt) { P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - clnt->trans->status = Disconnected; + clnt->status = Disconnected; } EXPORT_SYMBOL(p9_client_disconnect); @@ -262,14 +788,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname) { int err; - struct p9_fcall *tc, *rc; + struct p9_req_t *req; struct p9_fid *fid; + struct p9_qid qid; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p afid %d uname %s aname %s\n", - clnt, afid?afid->fid:-1, uname, aname); + P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", + afid ? afid->fid : -1, uname, aname); err = 0; - tc = NULL; - rc = NULL; fid = p9_fid_create(clnt); if (IS_ERR(fid)) { @@ -278,73 +803,81 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - tc = p9_create_tattach(fid->fid, afid?afid->fid:P9_NOFID, uname, aname, - n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); goto error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", + qid.type, + (unsigned long long)qid.path, + qid.version); - memmove(&fid->qid, &rc->params.rattach.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); + memmove(&fid->qid, &qid, sizeof(struct p9_qid)); + + p9_free_req(clnt, req); return fid; error: - kfree(tc); - kfree(rc); if (fid) p9_fid_destroy(fid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname) +struct p9_fid * +p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) { int err; - struct p9_fcall *tc, *rc; - struct p9_fid *fid; + struct p9_req_t *req; + struct p9_qid qid; + struct p9_fid *afid; - P9_DPRINTK(P9_DEBUG_9P, "clnt %p uname %s aname %s\n", clnt, uname, - aname); + P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname); err = 0; - tc = NULL; - rc = NULL; - fid = p9_fid_create(clnt); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; + afid = p9_fid_create(clnt); + if (IS_ERR(afid)) { + err = PTR_ERR(afid); + afid = NULL; goto error; } - tc = p9_create_tauth(fid->fid, uname, aname, n_uname, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TAUTH, "dss?d", + afid ? afid->fid : P9_NOFID, uname, aname, n_uname); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) + err = p9pdu_readf(req->rc, clnt->dotu, "Q", &qid); + if (err) { + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); goto error; + } - memmove(&fid->qid, &rc->params.rauth.qid, sizeof(struct p9_qid)); - kfree(tc); - kfree(rc); - return fid; + P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", + qid.type, + (unsigned long long)qid.path, + qid.version); + + memmove(&afid->qid, &qid, sizeof(struct p9_qid)); + p9_free_req(clnt, req); + return afid; error: - kfree(tc); - kfree(rc); - if (fid) - p9_fid_destroy(fid); + if (afid) + p9_fid_destroy(afid); return ERR_PTR(err); } EXPORT_SYMBOL(p9_client_auth); @@ -353,15 +886,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; struct p9_fid *fid; + struct p9_qid *wqids; + struct p9_req_t *req; + int16_t nwqids, count; - P9_DPRINTK(P9_DEBUG_9P, "fid %d nwname %d wname[0] %s\n", - oldfid->fid, nwname, wnames?wnames[0]:NULL); err = 0; - tc = NULL; - rc = NULL; clnt = oldfid->clnt; if (clone) { fid = p9_fid_create(clnt); @@ -375,53 +906,50 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, } else fid = oldfid; - tc = p9_create_twalk(oldfid->fid, fid->fid, nwname, wnames); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", + oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); + + req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid, + nwname, wnames); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); + err = p9pdu_readf(req->rc, clnt->dotu, "R", &nwqids, &wqids); if (err) { - if (rc && rc->id == P9_RWALK) - goto clunk_fid; - else - goto error; + p9pdu_dump(1, req->rc); + p9_free_req(clnt, req); + goto clunk_fid; } + p9_free_req(clnt, req); - if (rc->params.rwalk.nwqid != nwname) { + P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); + + if (nwqids != nwname) { err = -ENOENT; goto clunk_fid; } + for (count = 0; count < nwqids; count++) + P9_DPRINTK(P9_DEBUG_9P, "<<< [%d] %x.%llx.%x\n", + count, wqids[count].type, + (unsigned long long)wqids[count].path, + wqids[count].version); + if (nwname) - memmove(&fid->qid, - &rc->params.rwalk.wqids[rc->params.rwalk.nwqid - 1], - sizeof(struct p9_qid)); + memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid)); else fid->qid = oldfid->qid; - kfree(tc); - kfree(rc); return fid; clunk_fid: - kfree(tc); - kfree(rc); - rc = NULL; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - p9_client_rpc(clnt, tc, &rc); + p9_client_clunk(fid); + fid = NULL; error: - kfree(tc); - kfree(rc); if (fid && (fid != oldfid)) p9_fid_destroy(fid); @@ -432,35 +960,41 @@ EXPORT_SYMBOL(p9_client_walk); int p9_client_open(struct p9_fid *fid, int mode) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d mode %d\n", fid->fid, mode); + P9_DPRINTK(P9_DEBUG_9P, ">>> TOPEN fid %d mode %d\n", fid->fid, mode); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; - tc = p9_create_topen(fid->fid, mode); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TOPEN, "db", fid->fid, mode); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< ROPEN qid %x.%llx.%x iounit %x\n", + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_open); @@ -469,37 +1003,43 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode, char *extension) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; + struct p9_qid qid; + int iounit; - P9_DPRINTK(P9_DEBUG_9P, "fid %d name %s perm %d mode %d\n", fid->fid, - name, perm, mode); + P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", + fid->fid, name, perm, mode); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; if (fid->mode != -1) return -EINVAL; - tc = p9_create_tcreate(fid->fid, name, perm, mode, extension, - clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TCREATE, "dsdb?s", fid->fid, name, perm, + mode, extension); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + err = p9pdu_readf(req->rc, clnt->dotu, "Qd", &qid, &iounit); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } + + P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", + qid.type, + (unsigned long long)qid.path, + qid.version, iounit); fid->mode = mode; - fid->iounit = rc->params.ropen.iounit; + fid->iounit = iounit; -done: - kfree(tc); - kfree(rc); +free_and_error: + p9_free_req(clnt, req); +error: return err; } EXPORT_SYMBOL(p9_client_fcreate); @@ -507,31 +1047,25 @@ EXPORT_SYMBOL(p9_client_fcreate); int p9_client_clunk(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_tclunk(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TCLUNK, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); + p9_free_req(clnt, req); p9_fid_destroy(fid); -done: - kfree(tc); - kfree(rc); +error: return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -539,157 +1073,41 @@ EXPORT_SYMBOL(p9_client_clunk); int p9_client_remove(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_tremove(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; + req = p9_client_rpc(clnt, P9_TREMOVE, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto done; + P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); + p9_free_req(clnt, req); p9_fid_destroy(fid); -done: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_remove); - -int p9_client_read(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int err, n, rsize, total; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu %d\n", fid->fid, - (long long unsigned) offset, count); - err = 0; - tc = NULL; - rc = NULL; - clnt = fid->clnt; - total = 0; - - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; - - do { - if (count < rsize) - rsize = count; - - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - if (n > count) - n = count; - - memmove(data, rc->params.rread.data, n); - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); - - return total; - error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_read); - -int p9_client_write(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int err, n, rsize, total; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); - err = 0; - tc = NULL; - rc = NULL; - clnt = fid->clnt; - total = 0; - - rsize = fid->iounit; - if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) - rsize = clnt->msize - P9_IOHDRSZ; - - do { - if (count < rsize) - rsize = count; - - tc = p9_create_twrite(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); - - return total; - -error: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_write); +EXPORT_SYMBOL(p9_client_remove); int -p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) +p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, + u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err, rsize, total; struct p9_client *clnt; + struct p9_req_t *req; + char *dataptr; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, + P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, (long long unsigned) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; total = 0; @@ -697,63 +1115,57 @@ p9_client_uread(struct p9_fid *fid, char __user *data, u64 offset, u32 count) if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; + if (count < rsize) + rsize = count; - tc = p9_create_tread(fid->fid, offset, rsize); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "D", &count, &dataptr); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } - n = rc->params.rread.count; - if (n > count) - n = count; + P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); - err = copy_to_user(data, rc->params.rread.data, n); + if (data) { + memmove(data, dataptr, count); + data += count; + } + + if (udata) { + err = copy_to_user(udata, dataptr, count); if (err) { err = -EFAULT; - goto error; + goto free_and_error; } + } - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0 && n == rsize); - - return total; + p9_free_req(clnt, req); + return count; +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uread); +EXPORT_SYMBOL(p9_client_read); int -p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, - u32 count) +p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, + u64 offset, u32 count) { - int err, n, rsize, total; - struct p9_fcall *tc, *rc; + int err, rsize, total; struct p9_client *clnt; + struct p9_req_t *req; - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); + P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", + fid->fid, (long long unsigned) offset, count); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; total = 0; @@ -761,325 +1173,114 @@ p9_client_uwrite(struct p9_fid *fid, const char __user *data, u64 offset, if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) rsize = clnt->msize - P9_IOHDRSZ; - do { - if (count < rsize) - rsize = count; - - tc = p9_create_twrite_u(fid->fid, offset, rsize, data); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } + if (count < rsize) + rsize = count; + if (data) + req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, + rsize, data); + else + req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, + rsize, udata); + if (IS_ERR(req)) { + err = PTR_ERR(req); + goto error; + } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "d", &count); + if (err) { + p9pdu_dump(1, req->rc); + goto free_and_error; + } - n = rc->params.rread.count; - count -= n; - data += n; - offset += n; - total += n; - kfree(tc); - tc = NULL; - kfree(rc); - rc = NULL; - } while (count > 0); + P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); - return total; + p9_free_req(clnt, req); + return count; +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); return err; } -EXPORT_SYMBOL(p9_client_uwrite); - -int p9_client_readn(struct p9_fid *fid, char *data, u64 offset, u32 count) -{ - int n, total; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu count %d\n", fid->fid, - (long long unsigned) offset, count); - n = 0; - total = 0; - while (count) { - n = p9_client_read(fid, data, offset, count); - if (n <= 0) - break; - - data += n; - offset += n; - count -= n; - total += n; - } - - if (n < 0) - total = n; - - return total; -} -EXPORT_SYMBOL(p9_client_readn); +EXPORT_SYMBOL(p9_client_write); -struct p9_stat *p9_client_stat(struct p9_fid *fid) +struct p9_wstat *p9_client_stat(struct p9_fid *fid) { int err; - struct p9_fcall *tc, *rc; struct p9_client *clnt; - struct p9_stat *ret; + struct p9_wstat *ret = kmalloc(sizeof(struct p9_wstat), GFP_KERNEL); + struct p9_req_t *req; + u16 ignored; + + P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); + + if (!ret) + return ERR_PTR(-ENOMEM); - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); err = 0; - tc = NULL; - rc = NULL; - ret = NULL; clnt = fid->clnt; - tc = p9_create_tstat(fid->fid); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; + req = p9_client_rpc(clnt, P9_TSTAT, "d", fid->fid); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - ret = p9_clone_stat(&rc->params.rstat.stat, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; - goto error; + err = p9pdu_readf(req->rc, clnt->dotu, "wS", &ignored, ret); + if (err) { + ret = ERR_PTR(err); + p9pdu_dump(1, req->rc); + goto free_and_error; } - kfree(tc); - kfree(rc); - return ret; - + P9_DPRINTK(P9_DEBUG_9P, + "<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + "<<< mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + "<<< name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + "<<< uid=%d gid=%d n_muid=%d\n", + ret->size, ret->type, ret->dev, ret->qid.type, + (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, + ret->atime, ret->mtime, (unsigned long long)ret->length, + ret->name, ret->uid, ret->gid, ret->muid, ret->extension, + ret->n_uid, ret->n_gid, ret->n_muid); + +free_and_error: + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); - kfree(ret); - return ERR_PTR(err); + return ret; } EXPORT_SYMBOL(p9_client_stat); int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) { int err; - struct p9_fcall *tc, *rc; + struct p9_req_t *req; struct p9_client *clnt; - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); + P9_DPRINTK(P9_DEBUG_9P, + " sz=%x type=%x dev=%x qid=%x.%llx.%x\n" + " mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n" + " name=%s uid=%s gid=%s muid=%s extension=(%s)\n" + " uid=%d gid=%d n_muid=%d\n", + wst->size, wst->type, wst->dev, wst->qid.type, + (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, + wst->atime, wst->mtime, (unsigned long long)wst->length, + wst->name, wst->uid, wst->gid, wst->muid, wst->extension, + wst->n_uid, wst->n_gid, wst->n_muid); err = 0; - tc = NULL; - rc = NULL; clnt = fid->clnt; - tc = p9_create_twstat(fid->fid, wst, clnt->dotu); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto done; - } - - err = p9_client_rpc(clnt, tc, &rc); - -done: - kfree(tc); - kfree(rc); - return err; -} -EXPORT_SYMBOL(p9_client_wstat); - -struct p9_stat *p9_client_dirread(struct p9_fid *fid, u64 offset) -{ - int err, n, m; - struct p9_fcall *tc, *rc; - struct p9_client *clnt; - struct p9_stat st, *ret; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d offset %llu\n", fid->fid, - (long long unsigned) offset); - err = 0; - tc = NULL; - rc = NULL; - ret = NULL; - clnt = fid->clnt; - - /* if the offset is below or above the current response, free it */ - if (offset < fid->rdir_fpos || (fid->rdir_fcall && - offset >= fid->rdir_fpos+fid->rdir_fcall->params.rread.count)) { - fid->rdir_pos = 0; - if (fid->rdir_fcall) - fid->rdir_fpos += fid->rdir_fcall->params.rread.count; - - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - if (offset < fid->rdir_fpos) - fid->rdir_fpos = 0; - } - - if (!fid->rdir_fcall) { - n = fid->iounit; - if (!n || n > clnt->msize-P9_IOHDRSZ) - n = clnt->msize - P9_IOHDRSZ; - - while (1) { - if (fid->rdir_fcall) { - fid->rdir_fpos += - fid->rdir_fcall->params.rread.count; - kfree(fid->rdir_fcall); - fid->rdir_fcall = NULL; - } - - tc = p9_create_tread(fid->fid, fid->rdir_fpos, n); - if (IS_ERR(tc)) { - err = PTR_ERR(tc); - tc = NULL; - goto error; - } - - err = p9_client_rpc(clnt, tc, &rc); - if (err) - goto error; - - n = rc->params.rread.count; - if (n == 0) - goto done; - - fid->rdir_fcall = rc; - rc = NULL; - if (offset >= fid->rdir_fpos && - offset < fid->rdir_fpos+n) - break; - } - - fid->rdir_pos = 0; - } - - m = offset - fid->rdir_fpos; - if (m < 0) - goto done; - - n = p9_deserialize_stat(fid->rdir_fcall->params.rread.data + m, - fid->rdir_fcall->params.rread.count - m, &st, clnt->dotu); - - if (!n) { - err = -EIO; - goto error; - } - - fid->rdir_pos += n; - st.size = n; - ret = p9_clone_stat(&st, clnt->dotu); - if (IS_ERR(ret)) { - err = PTR_ERR(ret); - ret = NULL; + req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, 0, wst); + if (IS_ERR(req)) { + err = PTR_ERR(req); goto error; } -done: - kfree(tc); - kfree(rc); - return ret; + P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); + p9_free_req(clnt, req); error: - kfree(tc); - kfree(rc); - kfree(ret); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_client_dirread); - -static struct p9_stat *p9_clone_stat(struct p9_stat *st, int dotu) -{ - int n; - char *p; - struct p9_stat *ret; - - n = sizeof(struct p9_stat) + st->name.len + st->uid.len + st->gid.len + - st->muid.len; - - if (dotu) - n += st->extension.len; - - ret = kmalloc(n, GFP_KERNEL); - if (!ret) - return ERR_PTR(-ENOMEM); - - memmove(ret, st, sizeof(struct p9_stat)); - p = ((char *) ret) + sizeof(struct p9_stat); - memmove(p, st->name.str, st->name.len); - ret->name.str = p; - p += st->name.len; - memmove(p, st->uid.str, st->uid.len); - ret->uid.str = p; - p += st->uid.len; - memmove(p, st->gid.str, st->gid.len); - ret->gid.str = p; - p += st->gid.len; - memmove(p, st->muid.str, st->muid.len); - ret->muid.str = p; - p += st->muid.len; - - if (dotu) { - memmove(p, st->extension.str, st->extension.len); - ret->extension.str = p; - p += st->extension.len; - } - - return ret; -} - -static struct p9_fid *p9_fid_create(struct p9_client *clnt) -{ - int err; - struct p9_fid *fid; - - P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); - fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL); - if (!fid) - return ERR_PTR(-ENOMEM); - - fid->fid = p9_idpool_get(clnt->fidpool); - if (fid->fid < 0) { - err = -ENOSPC; - goto error; - } - - memset(&fid->qid, 0, sizeof(struct p9_qid)); - fid->mode = -1; - fid->rdir_fpos = 0; - fid->rdir_pos = 0; - fid->rdir_fcall = NULL; - fid->uid = current->fsuid; - fid->clnt = clnt; - fid->aux = NULL; - - spin_lock(&clnt->lock); - list_add(&fid->flist, &clnt->fidlist); - spin_unlock(&clnt->lock); - - return fid; - -error: - kfree(fid); - return ERR_PTR(err); -} - -static void p9_fid_destroy(struct p9_fid *fid) -{ - struct p9_client *clnt; - - P9_DPRINTK(P9_DEBUG_9P, "fid %d\n", fid->fid); - clnt = fid->clnt; - p9_idpool_put(fid->fid, clnt->fidpool); - spin_lock(&clnt->lock); - list_del(&fid->flist); - spin_unlock(&clnt->lock); - kfree(fid->rdir_fcall); - kfree(fid); + return err; } +EXPORT_SYMBOL(p9_client_wstat); diff --git a/net/9p/conv.c b/net/9p/conv.c deleted file mode 100644 index 5ad3a3bd73b2..000000000000 --- a/net/9p/conv.c +++ /dev/null @@ -1,1054 +0,0 @@ -/* - * net/9p/conv.c - * - * 9P protocol conversion functions - * - * Copyright (C) 2004, 2005 by Latchesar Ionkov <lucho@ionkov.net> - * Copyright (C) 2004 by Eric Van Hensbergen <ericvh@gmail.com> - * Copyright (C) 2002 by Ron Minnich <rminnich@lanl.gov> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ - -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/idr.h> -#include <linux/uaccess.h> -#include <net/9p/9p.h> - -/* - * Buffer to help with string parsing - */ -struct cbuf { - unsigned char *sp; - unsigned char *p; - unsigned char *ep; -}; - -static inline void buf_init(struct cbuf *buf, void *data, int datalen) -{ - buf->sp = buf->p = data; - buf->ep = data + datalen; -} - -static inline int buf_check_overflow(struct cbuf *buf) -{ - return buf->p > buf->ep; -} - -static int buf_check_size(struct cbuf *buf, int len) -{ - if (buf->p + len > buf->ep) { - if (buf->p < buf->ep) { - P9_EPRINTK(KERN_ERR, - "buffer overflow: want %d has %d\n", len, - (int)(buf->ep - buf->p)); - dump_stack(); - buf->p = buf->ep + 1; - } - - return 0; - } - - return 1; -} - -static void *buf_alloc(struct cbuf *buf, int len) -{ - void *ret = NULL; - - if (buf_check_size(buf, len)) { - ret = buf->p; - buf->p += len; - } - - return ret; -} - -static void buf_put_int8(struct cbuf *buf, u8 val) -{ - if (buf_check_size(buf, 1)) { - buf->p[0] = val; - buf->p++; - } -} - -static void buf_put_int16(struct cbuf *buf, u16 val) -{ - if (buf_check_size(buf, 2)) { - *(__le16 *) buf->p = cpu_to_le16(val); - buf->p += 2; - } -} - -static void buf_put_int32(struct cbuf *buf, u32 val) -{ - if (buf_check_size(buf, 4)) { - *(__le32 *)buf->p = cpu_to_le32(val); - buf->p += 4; - } -} - -static void buf_put_int64(struct cbuf *buf, u64 val) -{ - if (buf_check_size(buf, 8)) { - *(__le64 *)buf->p = cpu_to_le64(val); - buf->p += 8; - } -} - -static char *buf_put_stringn(struct cbuf *buf, const char *s, u16 slen) -{ - char *ret; - - ret = NULL; - if (buf_check_size(buf, slen + 2)) { - buf_put_int16(buf, slen); - ret = buf->p; - memcpy(buf->p, s, slen); - buf->p += slen; - } - - return ret; -} - -static u8 buf_get_int8(struct cbuf *buf) -{ - u8 ret = 0; - - if (buf_check_size(buf, 1)) { - ret = buf->p[0]; - buf->p++; - } - - return ret; -} - -static u16 buf_get_int16(struct cbuf *buf) -{ - u16 ret = 0; - - if (buf_check_size(buf, 2)) { - ret = le16_to_cpu(*(__le16 *)buf->p); - buf->p += 2; - } - - return ret; -} - -static u32 buf_get_int32(struct cbuf *buf) -{ - u32 ret = 0; - - if (buf_check_size(buf, 4)) { - ret = le32_to_cpu(*(__le32 *)buf->p); - buf->p += 4; - } - - return ret; -} - -static u64 buf_get_int64(struct cbuf *buf) -{ - u64 ret = 0; - - if (buf_check_size(buf, 8)) { - ret = le64_to_cpu(*(__le64 *)buf->p); - buf->p += 8; - } - - return ret; -} - -static void buf_get_str(struct cbuf *buf, struct p9_str *vstr) -{ - vstr->len = buf_get_int16(buf); - if (!buf_check_overflow(buf) && buf_check_size(buf, vstr->len)) { - vstr->str = buf->p; - buf->p += vstr->len; - } else { - vstr->len = 0; - vstr->str = NULL; - } -} - -static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid) -{ - qid->type = buf_get_int8(bufp); - qid->version = buf_get_int32(bufp); - qid->path = buf_get_int64(bufp); -} - -/** - * p9_size_wstat - calculate the size of a variable length stat struct - * @wstat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static int p9_size_wstat(struct p9_wstat *wstat, int dotu) -{ - int size = 0; - - if (wstat == NULL) { - P9_EPRINTK(KERN_ERR, "p9_size_stat: got a NULL stat pointer\n"); - return 0; - } - - size = /* 2 + *//* size[2] */ - 2 + /* type[2] */ - 4 + /* dev[4] */ - 1 + /* qid.type[1] */ - 4 + /* qid.vers[4] */ - 8 + /* qid.path[8] */ - 4 + /* mode[4] */ - 4 + /* atime[4] */ - 4 + /* mtime[4] */ - 8 + /* length[8] */ - 8; /* minimum sum of string lengths */ - - if (wstat->name) - size += strlen(wstat->name); - if (wstat->uid) - size += strlen(wstat->uid); - if (wstat->gid) - size += strlen(wstat->gid); - if (wstat->muid) - size += strlen(wstat->muid); - - if (dotu) { - size += 4 + /* n_uid[4] */ - 4 + /* n_gid[4] */ - 4 + /* n_muid[4] */ - 2; /* string length of extension[4] */ - if (wstat->extension) - size += strlen(wstat->extension); - } - - return size; -} - -/** - * buf_get_stat - safely decode a recieved metadata (stat) structure - * @bufp: buffer to deserialize - * @stat: metadata (stat) structure - * @dotu: non-zero if 9P2000.u - * - */ - -static void -buf_get_stat(struct cbuf *bufp, struct p9_stat *stat, int dotu) -{ - stat->size = buf_get_int16(bufp); - stat->type = buf_get_int16(bufp); - stat->dev = buf_get_int32(bufp); - stat->qid.type = buf_get_int8(bufp); - stat->qid.version = buf_get_int32(bufp); - stat->qid.path = buf_get_int64(bufp); - stat->mode = buf_get_int32(bufp); - stat->atime = buf_get_int32(bufp); - stat->mtime = buf_get_int32(bufp); - stat->length = buf_get_int64(bufp); - buf_get_str(bufp, &stat->name); - buf_get_str(bufp, &stat->uid); - buf_get_str(bufp, &stat->gid); - buf_get_str(bufp, &stat->muid); - - if (dotu) { - buf_get_str(bufp, &stat->extension); - stat->n_uid = buf_get_int32(bufp); - stat->n_gid = buf_get_int32(bufp); - stat->n_muid = buf_get_int32(bufp); - } -} - -/** - * p9_deserialize_stat - decode a received metadata structure - * @buf: buffer to deserialize - * @buflen: length of received buffer - * @stat: metadata structure to decode into - * @dotu: non-zero if 9P2000.u - * - * Note: stat will point to the buf region. - */ - -int -p9_deserialize_stat(void *buf, u32 buflen, struct p9_stat *stat, - int dotu) -{ - struct cbuf buffer; - struct cbuf *bufp = &buffer; - unsigned char *p; - - buf_init(bufp, buf, buflen); - p = bufp->p; - buf_get_stat(bufp, stat, dotu); - - if (buf_check_overflow(bufp)) - return 0; - else - return bufp->p - p; -} -EXPORT_SYMBOL(p9_deserialize_stat); - -/** - * deserialize_fcall - unmarshal a response - * @buf: recieved buffer - * @buflen: length of received buffer - * @rcall: fcall structure to populate - * @rcalllen: length of fcall structure to populate - * @dotu: non-zero if 9P2000.u - * - */ - -int -p9_deserialize_fcall(void *buf, u32 buflen, struct p9_fcall *rcall, - int dotu) -{ - - struct cbuf buffer; - struct cbuf *bufp = &buffer; - int i = 0; - - buf_init(bufp, buf, buflen); - - rcall->size = buf_get_int32(bufp); - rcall->id = buf_get_int8(bufp); - rcall->tag = buf_get_int16(bufp); - - P9_DPRINTK(P9_DEBUG_CONV, "size %d id %d tag %d\n", rcall->size, - rcall->id, rcall->tag); - - switch (rcall->id) { - default: - P9_EPRINTK(KERN_ERR, "unknown message type: %d\n", rcall->id); - return -EPROTO; - case P9_RVERSION: - rcall->params.rversion.msize = buf_get_int32(bufp); - buf_get_str(bufp, &rcall->params.rversion.version); - break; - case P9_RFLUSH: - break; - case P9_RATTACH: - rcall->params.rattach.qid.type = buf_get_int8(bufp); - rcall->params.rattach.qid.version = buf_get_int32(bufp); - rcall->params.rattach.qid.path = buf_get_int64(bufp); - break; - case P9_RWALK: - rcall->params.rwalk.nwqid = buf_get_int16(bufp); - if (rcall->params.rwalk.nwqid > P9_MAXWELEM) { - P9_EPRINTK(KERN_ERR, - "Rwalk with more than %d qids: %d\n", - P9_MAXWELEM, rcall->params.rwalk.nwqid); - return -EPROTO; - } - - for (i = 0; i < rcall->params.rwalk.nwqid; i++) - buf_get_qid(bufp, &rcall->params.rwalk.wqids[i]); - break; - case P9_ROPEN: - buf_get_qid(bufp, &rcall->params.ropen.qid); - rcall->params.ropen.iounit = buf_get_int32(bufp); - break; - case P9_RCREATE: - buf_get_qid(bufp, &rcall->params.rcreate.qid); - rcall->params.rcreate.iounit = buf_get_int32(bufp); - break; - case P9_RREAD: - rcall->params.rread.count = buf_get_int32(bufp); - rcall->params.rread.data = bufp->p; - buf_check_size(bufp, rcall->params.rread.count); - break; - case P9_RWRITE: - rcall->params.rwrite.count = buf_get_int32(bufp); - break; - case P9_RCLUNK: - break; - case P9_RREMOVE: - break; - case P9_RSTAT: - buf_get_int16(bufp); - buf_get_stat(bufp, &rcall->params.rstat.stat, dotu); - break; - case P9_RWSTAT: - break; - case P9_RERROR: - buf_get_str(bufp, &rcall->params.rerror.error); - if (dotu) - rcall->params.rerror.errno = buf_get_int16(bufp); - break; - } - - if (buf_check_overflow(bufp)) { - P9_DPRINTK(P9_DEBUG_ERROR, "buffer overflow\n"); - return -EIO; - } - - return bufp->p - bufp->sp; -} -EXPORT_SYMBOL(p9_deserialize_fcall); - -static inline void p9_put_int8(struct cbuf *bufp, u8 val, u8 * p) -{ - *p = val; - buf_put_int8(bufp, val); -} - -static inline void p9_put_int16(struct cbuf *bufp, u16 val, u16 * p) -{ - *p = val; - buf_put_int16(bufp, val); -} - -static inline void p9_put_int32(struct cbuf *bufp, u32 val, u32 * p) -{ - *p = val; - buf_put_int32(bufp, val); -} - -static inline void p9_put_int64(struct cbuf *bufp, u64 val, u64 * p) -{ - *p = val; - buf_put_int64(bufp, val); -} - -static void -p9_put_str(struct cbuf *bufp, char *data, struct p9_str *str) -{ - int len; - char *s; - - if (data) - len = strlen(data); - else - len = 0; - - s = buf_put_stringn(bufp, data, len); - if (str) { - str->len = len; - str->str = s; - } -} - -static int -p9_put_data(struct cbuf *bufp, const char *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - if (*pdata == NULL) - return -ENOMEM; - memmove(*pdata, data, count); - return 0; -} - -static int -p9_put_user_data(struct cbuf *bufp, const char __user *data, int count, - unsigned char **pdata) -{ - *pdata = buf_alloc(bufp, count); - if (*pdata == NULL) - return -ENOMEM; - return copy_from_user(*pdata, data, count); -} - -static void -p9_put_wstat(struct cbuf *bufp, struct p9_wstat *wstat, - struct p9_stat *stat, int statsz, int dotu) -{ - p9_put_int16(bufp, statsz, &stat->size); - p9_put_int16(bufp, wstat->type, &stat->type); - p9_put_int32(bufp, wstat->dev, &stat->dev); - p9_put_int8(bufp, wstat->qid.type, &stat->qid.type); - p9_put_int32(bufp, wstat->qid.version, &stat->qid.version); - p9_put_int64(bufp, wstat->qid.path, &stat->qid.path); - p9_put_int32(bufp, wstat->mode, &stat->mode); - p9_put_int32(bufp, wstat->atime, &stat->atime); - p9_put_int32(bufp, wstat->mtime, &stat->mtime); - p9_put_int64(bufp, wstat->length, &stat->length); - - p9_put_str(bufp, wstat->name, &stat->name); - p9_put_str(bufp, wstat->uid, &stat->uid); - p9_put_str(bufp, wstat->gid, &stat->gid); - p9_put_str(bufp, wstat->muid, &stat->muid); - - if (dotu) { - p9_put_str(bufp, wstat->extension, &stat->extension); - p9_put_int32(bufp, wstat->n_uid, &stat->n_uid); - p9_put_int32(bufp, wstat->n_gid, &stat->n_gid); - p9_put_int32(bufp, wstat->n_muid, &stat->n_muid); - } -} - -static struct p9_fcall * -p9_create_common(struct cbuf *bufp, u32 size, u8 id) -{ - struct p9_fcall *fc; - - size += 4 + 1 + 2; /* size[4] id[1] tag[2] */ - fc = kmalloc(sizeof(struct p9_fcall) + size, GFP_KERNEL); - if (!fc) - return ERR_PTR(-ENOMEM); - - fc->sdata = (char *)fc + sizeof(*fc); - - buf_init(bufp, (char *)fc->sdata, size); - p9_put_int32(bufp, size, &fc->size); - p9_put_int8(bufp, id, &fc->id); - p9_put_int16(bufp, P9_NOTAG, &fc->tag); - - return fc; -} - -/** - * p9_set_tag - set the tag field of an &p9_fcall structure - * @fc: fcall structure to set tag within - * @tag: tag id to set - */ - -void p9_set_tag(struct p9_fcall *fc, u16 tag) -{ - fc->tag = tag; - *(__le16 *) (fc->sdata + 5) = cpu_to_le16(tag); -} -EXPORT_SYMBOL(p9_set_tag); - -/** - * p9_create_tversion - allocates and creates a T_VERSION request - * @msize: requested maximum data size - * @version: version string to negotiate - * - */ -struct p9_fcall *p9_create_tversion(u32 msize, char *version) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 2 + strlen(version); /* msize[4] version[s] */ - fc = p9_create_common(bufp, size, P9_TVERSION); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, msize, &fc->params.tversion.msize); - p9_put_str(bufp, version, &fc->params.tversion.version); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tversion); - -/** - * p9_create_tauth - allocates and creates a T_AUTH request - * @afid: handle to use for authentication protocol - * @uname: user name attempting to authenticate - * @aname: mount specifier for remote server - * @n_uname: numeric id for user attempting to authneticate - * @dotu: 9P2000.u extension flag - * - */ - -struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* afid[4] uname[s] aname[s] */ - size = 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TAUTH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, afid, &fc->params.tauth.afid); - p9_put_str(bufp, uname, &fc->params.tauth.uname); - p9_put_str(bufp, aname, &fc->params.tauth.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tauth.n_uname); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tauth); - -/** - * p9_create_tattach - allocates and creates a T_ATTACH request - * @fid: handle to use for the new mount point - * @afid: handle to use for authentication protocol - * @uname: user name attempting to attach - * @aname: mount specifier for remote server - * @n_uname: numeric id for user attempting to attach - * @n_uname: numeric id for user attempting to attach - * @dotu: 9P2000.u extension flag - * - */ - -struct p9_fcall * -p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname, - u32 n_uname, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] afid[4] uname[s] aname[s] */ - size = 4 + 4 + 2 + 2; - if (uname) - size += strlen(uname); - - if (aname) - size += strlen(aname); - - if (dotu) - size += 4; /* n_uname */ - - fc = p9_create_common(bufp, size, P9_TATTACH); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tattach.fid); - p9_put_int32(bufp, afid, &fc->params.tattach.afid); - p9_put_str(bufp, uname, &fc->params.tattach.uname); - p9_put_str(bufp, aname, &fc->params.tattach.aname); - if (dotu) - p9_put_int32(bufp, n_uname, &fc->params.tattach.n_uname); - -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tattach); - -/** - * p9_create_tflush - allocates and creates a T_FLUSH request - * @oldtag: tag id for the transaction we are attempting to cancel - * - */ - -struct p9_fcall *p9_create_tflush(u16 oldtag) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 2; /* oldtag[2] */ - fc = p9_create_common(bufp, size, P9_TFLUSH); - if (IS_ERR(fc)) - goto error; - - p9_put_int16(bufp, oldtag, &fc->params.tflush.oldtag); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tflush); - -/** - * p9_create_twalk - allocates and creates a T_FLUSH request - * @fid: handle we are traversing from - * @newfid: a new handle for this transaction - * @nwname: number of path elements to traverse - * @wnames: array of path elements - * - */ - -struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname, - char **wnames) -{ - int i, size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - if (nwname > P9_MAXWELEM) { - P9_DPRINTK(P9_DEBUG_ERROR, "nwname > %d\n", P9_MAXWELEM); - return NULL; - } - - size = 4 + 4 + 2; /* fid[4] newfid[4] nwname[2] ... */ - for (i = 0; i < nwname; i++) { - size += 2 + strlen(wnames[i]); /* wname[s] */ - } - - fc = p9_create_common(bufp, size, P9_TWALK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twalk.fid); - p9_put_int32(bufp, newfid, &fc->params.twalk.newfid); - p9_put_int16(bufp, nwname, &fc->params.twalk.nwname); - for (i = 0; i < nwname; i++) { - p9_put_str(bufp, wnames[i], &fc->params.twalk.wnames[i]); - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twalk); - -/** - * p9_create_topen - allocates and creates a T_OPEN request - * @fid: handle we are trying to open - * @mode: what mode we are trying to open the file in - * - */ - -struct p9_fcall *p9_create_topen(u32 fid, u8 mode) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 1; /* fid[4] mode[1] */ - fc = p9_create_common(bufp, size, P9_TOPEN); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.topen.fid); - p9_put_int8(bufp, mode, &fc->params.topen.mode); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_topen); - -/** - * p9_create_tcreate - allocates and creates a T_CREATE request - * @fid: handle of directory we are trying to create in - * @name: name of the file we are trying to create - * @perm: permissions for the file we are trying to create - * @mode: what mode we are trying to open the file in - * @extension: 9p2000.u extension string (for special files) - * @dotu: 9p2000.u enabled flag - * - * Note: Plan 9 create semantics include opening the resulting file - * which is why mode is included. - */ - -struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode, - char *extension, int dotu) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] name[s] perm[4] mode[1] */ - size = 4 + 2 + strlen(name) + 4 + 1; - if (dotu) { - size += 2 + /* extension[s] */ - (extension == NULL ? 0 : strlen(extension)); - } - - fc = p9_create_common(bufp, size, P9_TCREATE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tcreate.fid); - p9_put_str(bufp, name, &fc->params.tcreate.name); - p9_put_int32(bufp, perm, &fc->params.tcreate.perm); - p9_put_int8(bufp, mode, &fc->params.tcreate.mode); - if (dotu) - p9_put_str(bufp, extension, &fc->params.tcreate.extension); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tcreate); - -/** - * p9_create_tread - allocates and creates a T_READ request - * @fid: handle of the file we are trying to read - * @offset: offset to start reading from - * @count: how many bytes to read - */ - -struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4 + 8 + 4; /* fid[4] offset[8] count[4] */ - fc = p9_create_common(bufp, size, P9_TREAD); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tread.fid); - p9_put_int64(bufp, offset, &fc->params.tread.offset); - p9_put_int32(bufp, count, &fc->params.tread.count); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tread); - -/** - * p9_create_twrite - allocates and creates a T_WRITE request from the kernel - * @fid: handle of the file we are trying to write - * @offset: offset to start writing at - * @count: how many bytes to write - * @data: data to write - * - * This function will create a requst with data buffers from the kernel - * such as the page cache. - */ - -struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count, - const char *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite); - -/** - * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace - * @fid: handle of the file we are trying to write - * @offset: offset to start writing at - * @count: how many bytes to write - * @data: data to write - * - * This function will create a request with data buffers from userspace - */ - -struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count, - const char __user *data) -{ - int size, err; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - /* fid[4] offset[8] count[4] data[count] */ - size = 4 + 8 + 4 + count; - fc = p9_create_common(bufp, size, P9_TWRITE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twrite.fid); - p9_put_int64(bufp, offset, &fc->params.twrite.offset); - p9_put_int32(bufp, count, &fc->params.twrite.count); - err = p9_put_user_data(bufp, data, count, &fc->params.twrite.data); - if (err) { - kfree(fc); - fc = ERR_PTR(err); - goto error; - } - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twrite_u); - -/** - * p9_create_tclunk - allocate a request to forget about a file handle - * @fid: handle of the file we closing or forgetting about - * - * clunk is used both to close open files and to discard transient handles - * which may be created during meta-data operations and hierarchy traversal. - */ - -struct p9_fcall *p9_create_tclunk(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TCLUNK); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tclunk.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tclunk); - -/** - * p9_create_tremove - allocate and create a request to remove a file - * @fid: handle of the file or directory we are removing - * - */ - -struct p9_fcall *p9_create_tremove(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TREMOVE); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tremove.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tremove); - -/** - * p9_create_tstat - allocate and populate a request for attributes - * @fid: handle of the file or directory we are trying to get the attributes of - * - */ - -struct p9_fcall *p9_create_tstat(u32 fid) -{ - int size; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - size = 4; /* fid[4] */ - fc = p9_create_common(bufp, size, P9_TSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.tstat.fid); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_tstat); - -/** - * p9_create_tstat - allocate and populate a request to change attributes - * @fid: handle of the file or directory we are trying to change - * @wstat: &p9_stat structure with attributes we wish to set - * @dotu: 9p2000.u enabled flag - * - */ - -struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat, - int dotu) -{ - int size, statsz; - struct p9_fcall *fc; - struct cbuf buffer; - struct cbuf *bufp = &buffer; - - statsz = p9_size_wstat(wstat, dotu); - size = 4 + 2 + 2 + statsz; /* fid[4] stat[n] */ - fc = p9_create_common(bufp, size, P9_TWSTAT); - if (IS_ERR(fc)) - goto error; - - p9_put_int32(bufp, fid, &fc->params.twstat.fid); - buf_put_int16(bufp, statsz + 2); - p9_put_wstat(bufp, wstat, &fc->params.twstat.stat, statsz, dotu); - - if (buf_check_overflow(bufp)) { - kfree(fc); - fc = ERR_PTR(-ENOMEM); - } -error: - return fc; -} -EXPORT_SYMBOL(p9_create_twstat); - diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c deleted file mode 100644 index 53dd8e28dd8a..000000000000 --- a/net/9p/fcprint.c +++ /dev/null @@ -1,366 +0,0 @@ -/* - * net/9p/fcprint.c - * - * Print 9P call. - * - * Copyright (C) 2005 by Latchesar Ionkov <lucho@ionkov.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to: - * Free Software Foundation - * 51 Franklin Street, Fifth Floor - * Boston, MA 02111-1301 USA - * - */ -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/idr.h> -#include <net/9p/9p.h> - -#ifdef CONFIG_NET_9P_DEBUG - -static int -p9_printqid(char *buf, int buflen, struct p9_qid *q) -{ - int n; - char b[10]; - - n = 0; - if (q->type & P9_QTDIR) - b[n++] = 'd'; - if (q->type & P9_QTAPPEND) - b[n++] = 'a'; - if (q->type & P9_QTAUTH) - b[n++] = 'A'; - if (q->type & P9_QTEXCL) - b[n++] = 'l'; - if (q->type & P9_QTTMP) - b[n++] = 't'; - if (q->type & P9_QTSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "(%.16llx %x %s)", - (long long int) q->path, q->version, b); -} - -static int -p9_printperm(char *buf, int buflen, int perm) -{ - int n; - char b[15]; - - n = 0; - if (perm & P9_DMDIR) - b[n++] = 'd'; - if (perm & P9_DMAPPEND) - b[n++] = 'a'; - if (perm & P9_DMAUTH) - b[n++] = 'A'; - if (perm & P9_DMEXCL) - b[n++] = 'l'; - if (perm & P9_DMTMP) - b[n++] = 't'; - if (perm & P9_DMDEVICE) - b[n++] = 'D'; - if (perm & P9_DMSOCKET) - b[n++] = 'S'; - if (perm & P9_DMNAMEDPIPE) - b[n++] = 'P'; - if (perm & P9_DMSYMLINK) - b[n++] = 'L'; - b[n] = '\0'; - - return scnprintf(buf, buflen, "%s%03o", b, perm&077); -} - -static int -p9_printstat(char *buf, int buflen, struct p9_stat *st, int extended) -{ - int n; - - n = scnprintf(buf, buflen, "'%.*s' '%.*s'", st->name.len, - st->name.str, st->uid.len, st->uid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_uid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->gid.len, st->gid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_gid); - - n += scnprintf(buf+n, buflen-n, " '%.*s'", st->muid.len, st->muid.str); - if (extended) - n += scnprintf(buf+n, buflen-n, "(%d)", st->n_muid); - - n += scnprintf(buf+n, buflen-n, " q "); - n += p9_printqid(buf+n, buflen-n, &st->qid); - n += scnprintf(buf+n, buflen-n, " m "); - n += p9_printperm(buf+n, buflen-n, st->mode); - n += scnprintf(buf+n, buflen-n, " at %d mt %d l %lld", - st->atime, st->mtime, (long long int) st->length); - - if (extended) - n += scnprintf(buf+n, buflen-n, " ext '%.*s'", - st->extension.len, st->extension.str); - - return n; -} - -static int -p9_dumpdata(char *buf, int buflen, u8 *data, int datalen) -{ - int i, n; - - i = n = 0; - while (i < datalen) { - n += scnprintf(buf + n, buflen - n, "%02x", data[i]); - if (i%4 == 3) - n += scnprintf(buf + n, buflen - n, " "); - if (i%32 == 31) - n += scnprintf(buf + n, buflen - n, "\n"); - - i++; - } - n += scnprintf(buf + n, buflen - n, "\n"); - - return n; -} - -static int -p9_printdata(char *buf, int buflen, u8 *data, int datalen) -{ - return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16); -} - -/** - * p9_printfcall - decode and print a protocol structure into a buffer - * @buf: buffer to deposit decoded structure into - * @buflen: available space in buffer - * @fc: protocol rpc structure of type &p9_fcall - * @extended: whether or not session is operating with extended protocol - */ - -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - int i, ret, type, tag; - - if (!fc) - return scnprintf(buf, buflen, "<NULL>"); - - type = fc->id; - tag = fc->tag; - - ret = 0; - switch (type) { - case P9_TVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Tversion tag %u msize %u version '%.*s'", tag, - fc->params.tversion.msize, - fc->params.tversion.version.len, - fc->params.tversion.version.str); - break; - - case P9_RVERSION: - ret += scnprintf(buf+ret, buflen-ret, - "Rversion tag %u msize %u version '%.*s'", tag, - fc->params.rversion.msize, - fc->params.rversion.version.len, - fc->params.rversion.version.str); - break; - - case P9_TAUTH: - ret += scnprintf(buf+ret, buflen-ret, - "Tauth tag %u afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tauth.afid, fc->params.tauth.uname.len, - fc->params.tauth.uname.str, fc->params.tauth.aname.len, - fc->params.tauth.aname.str); - break; - - case P9_RAUTH: - ret += scnprintf(buf+ret, buflen-ret, "Rauth tag %u qid ", tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rauth.qid); - break; - - case P9_TATTACH: - ret += scnprintf(buf+ret, buflen-ret, - "Tattach tag %u fid %d afid %d uname '%.*s' aname '%.*s'", tag, - fc->params.tattach.fid, fc->params.tattach.afid, - fc->params.tattach.uname.len, fc->params.tattach.uname.str, - fc->params.tattach.aname.len, fc->params.tattach.aname.str); - break; - - case P9_RATTACH: - ret += scnprintf(buf+ret, buflen-ret, "Rattach tag %u qid ", - tag); - p9_printqid(buf+ret, buflen-ret, &fc->params.rattach.qid); - break; - - case P9_RERROR: - ret += scnprintf(buf+ret, buflen-ret, - "Rerror tag %u ename '%.*s'", tag, - fc->params.rerror.error.len, - fc->params.rerror.error.str); - if (extended) - ret += scnprintf(buf+ret, buflen-ret, " ecode %d\n", - fc->params.rerror.errno); - break; - - case P9_TFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Tflush tag %u oldtag %u", - tag, fc->params.tflush.oldtag); - break; - - case P9_RFLUSH: - ret += scnprintf(buf+ret, buflen-ret, "Rflush tag %u", tag); - break; - - case P9_TWALK: - ret += scnprintf(buf+ret, buflen-ret, - "Twalk tag %u fid %d newfid %d nwname %d", tag, - fc->params.twalk.fid, fc->params.twalk.newfid, - fc->params.twalk.nwname); - for (i = 0; i < fc->params.twalk.nwname; i++) - ret += scnprintf(buf+ret, buflen-ret, " '%.*s'", - fc->params.twalk.wnames[i].len, - fc->params.twalk.wnames[i].str); - break; - - case P9_RWALK: - ret += scnprintf(buf+ret, buflen-ret, "Rwalk tag %u nwqid %d", - tag, fc->params.rwalk.nwqid); - for (i = 0; i < fc->params.rwalk.nwqid; i++) - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rwalk.wqids[i]); - break; - - case P9_TOPEN: - ret += scnprintf(buf+ret, buflen-ret, - "Topen tag %u fid %d mode %d", tag, - fc->params.topen.fid, fc->params.topen.mode); - break; - - case P9_ROPEN: - ret += scnprintf(buf+ret, buflen-ret, "Ropen tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, &fc->params.ropen.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.ropen.iounit); - break; - - case P9_TCREATE: - ret += scnprintf(buf+ret, buflen-ret, - "Tcreate tag %u fid %d name '%.*s' perm ", tag, - fc->params.tcreate.fid, fc->params.tcreate.name.len, - fc->params.tcreate.name.str); - - ret += p9_printperm(buf+ret, buflen-ret, - fc->params.tcreate.perm); - ret += scnprintf(buf+ret, buflen-ret, " mode %d", - fc->params.tcreate.mode); - break; - - case P9_RCREATE: - ret += scnprintf(buf+ret, buflen-ret, "Rcreate tag %u", tag); - ret += p9_printqid(buf+ret, buflen-ret, - &fc->params.rcreate.qid); - ret += scnprintf(buf+ret, buflen-ret, " iounit %d", - fc->params.rcreate.iounit); - break; - - case P9_TREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Tread tag %u fid %d offset %lld count %u", tag, - fc->params.tread.fid, - (long long int) fc->params.tread.offset, - fc->params.tread.count); - break; - - case P9_RREAD: - ret += scnprintf(buf+ret, buflen-ret, - "Rread tag %u count %u data ", tag, - fc->params.rread.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.rread.data, - fc->params.rread.count); - break; - - case P9_TWRITE: - ret += scnprintf(buf+ret, buflen-ret, - "Twrite tag %u fid %d offset %lld count %u data ", - tag, fc->params.twrite.fid, - (long long int) fc->params.twrite.offset, - fc->params.twrite.count); - ret += p9_printdata(buf+ret, buflen-ret, fc->params.twrite.data, - fc->params.twrite.count); - break; - - case P9_RWRITE: - ret += scnprintf(buf+ret, buflen-ret, "Rwrite tag %u count %u", - tag, fc->params.rwrite.count); - break; - - case P9_TCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Tclunk tag %u fid %d", - tag, fc->params.tclunk.fid); - break; - - case P9_RCLUNK: - ret += scnprintf(buf+ret, buflen-ret, "Rclunk tag %u", tag); - break; - - case P9_TREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Tremove tag %u fid %d", - tag, fc->params.tremove.fid); - break; - - case P9_RREMOVE: - ret += scnprintf(buf+ret, buflen-ret, "Rremove tag %u", tag); - break; - - case P9_TSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Tstat tag %u fid %d", - tag, fc->params.tstat.fid); - break; - - case P9_RSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rstat tag %u ", tag); - ret += p9_printstat(buf+ret, buflen-ret, &fc->params.rstat.stat, - extended); - break; - - case P9_TWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Twstat tag %u fid %d ", - tag, fc->params.twstat.fid); - ret += p9_printstat(buf+ret, buflen-ret, - &fc->params.twstat.stat, extended); - break; - - case P9_RWSTAT: - ret += scnprintf(buf+ret, buflen-ret, "Rwstat tag %u", tag); - break; - - default: - ret += scnprintf(buf+ret, buflen-ret, "unknown type %d", type); - break; - } - - return ret; -} -#else -int -p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended) -{ - return 0; -} -#endif /* CONFIG_NET_9P_DEBUG */ -EXPORT_SYMBOL(p9_printfcall); - diff --git a/net/9p/mod.c b/net/9p/mod.c index 1084feb24cb0..cf8a4128cd5c 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -29,6 +29,7 @@ #include <net/9p/9p.h> #include <linux/fs.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/list.h> #include <linux/spinlock.h> diff --git a/net/9p/protocol.c b/net/9p/protocol.c new file mode 100644 index 000000000000..dcd7666824ba --- /dev/null +++ b/net/9p/protocol.c @@ -0,0 +1,567 @@ +/* + * net/9p/protocol.c + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include "protocol.h" + +#ifndef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif + +#ifndef MAX +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#endif + +#ifndef offset_of +#define offset_of(type, memb) \ + ((unsigned long)(&((type *)0)->memb)) +#endif +#ifndef container_of +#define container_of(obj, type, memb) \ + ((type *)(((char *)obj) - offset_of(type, memb))) +#endif + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...); + +#ifdef CONFIG_NET_9P_DEBUG +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ + int i, n; + u8 *data = pdu->sdata; + int datalen = pdu->size; + char buf[255]; + int buflen = 255; + + i = n = 0; + if (datalen > (buflen-16)) + datalen = buflen-16; + while (i < datalen) { + n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); + if (i%4 == 3) + n += scnprintf(buf + n, buflen - n, " "); + if (i%32 == 31) + n += scnprintf(buf + n, buflen - n, "\n"); + + i++; + } + n += scnprintf(buf + n, buflen - n, "\n"); + + if (way) + P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); + else + P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); +} +#else +void +p9pdu_dump(int way, struct p9_fcall *pdu) +{ +} +#endif +EXPORT_SYMBOL(p9pdu_dump); + +void p9stat_free(struct p9_wstat *stbuf) +{ + kfree(stbuf->name); + kfree(stbuf->uid); + kfree(stbuf->gid); + kfree(stbuf->muid); + kfree(stbuf->extension); +} +EXPORT_SYMBOL(p9stat_free); + +static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +{ + size_t len = MIN(pdu->size - pdu->offset, size); + memcpy(data, &pdu->sdata[pdu->offset], len); + pdu->offset += len; + return size - len; +} + +static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + memcpy(&pdu->sdata[pdu->size], data, len); + pdu->size += len; + return size - len; +} + +static size_t +pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) +{ + size_t len = MIN(pdu->capacity - pdu->size, size); + int err = copy_from_user(&pdu->sdata[pdu->size], udata, len); + if (err) + printk(KERN_WARNING "pdu_write_u returning: %d\n", err); + + pdu->size += len; + return size - len; +} + +/* + b - int8_t + w - int16_t + d - int32_t + q - int64_t + s - string + S - stat + Q - qid + D - data blob (int32_t size followed by void *, results are not freed) + T - array of strings (int16_t count, followed by strings) + R - array of qids (int16_t count, followed by qids) + ? - if optional = 1, continue parsing +*/ + +static int +p9pdu_vreadf(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t *val = va_arg(ap, int8_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + } + break; + case 'w':{ + int16_t *val = va_arg(ap, int16_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le16(*val); + } + break; + case 'd':{ + int32_t *val = va_arg(ap, int32_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le32(*val); + } + break; + case 'q':{ + int64_t *val = va_arg(ap, int64_t *); + if (pdu_read(pdu, val, sizeof(*val))) { + errcode = -EFAULT; + break; + } + *val = cpu_to_le64(*val); + } + break; + case 's':{ + char **sptr = va_arg(ap, char **); + int16_t len; + int size; + + errcode = p9pdu_readf(pdu, optional, "w", &len); + if (errcode) + break; + + size = MAX(len, 0); + + *sptr = kmalloc(size + 1, GFP_KERNEL); + if (*sptr == NULL) { + errcode = -EFAULT; + break; + } + if (pdu_read(pdu, *sptr, size)) { + errcode = -EFAULT; + kfree(*sptr); + *sptr = NULL; + } else + (*sptr)[size] = 0; + } + break; + case 'Q':{ + struct p9_qid *qid = + va_arg(ap, struct p9_qid *); + + errcode = p9pdu_readf(pdu, optional, "bdq", + &qid->type, &qid->version, + &qid->path); + } + break; + case 'S':{ + struct p9_wstat *stbuf = + va_arg(ap, struct p9_wstat *); + + memset(stbuf, 0, sizeof(struct p9_wstat)); + stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = + -1; + errcode = + p9pdu_readf(pdu, optional, + "wwdQdddqssss?sddd", + &stbuf->size, &stbuf->type, + &stbuf->dev, &stbuf->qid, + &stbuf->mode, &stbuf->atime, + &stbuf->mtime, &stbuf->length, + &stbuf->name, &stbuf->uid, + &stbuf->gid, &stbuf->muid, + &stbuf->extension, + &stbuf->n_uid, &stbuf->n_gid, + &stbuf->n_muid); + if (errcode) + p9stat_free(stbuf); + } + break; + case 'D':{ + int32_t *count = va_arg(ap, int32_t *); + void **data = va_arg(ap, void **); + + errcode = + p9pdu_readf(pdu, optional, "d", count); + if (!errcode) { + *count = + MIN(*count, + pdu->size - pdu->offset); + *data = &pdu->sdata[pdu->offset]; + } + } + break; + case 'T':{ + int16_t *nwname = va_arg(ap, int16_t *); + char ***wnames = va_arg(ap, char ***); + + errcode = + p9pdu_readf(pdu, optional, "w", nwname); + if (!errcode) { + *wnames = + kmalloc(sizeof(char *) * *nwname, + GFP_KERNEL); + if (!*wnames) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwname; i++) { + errcode = + p9pdu_readf(pdu, optional, + "s", + &(*wnames)[i]); + if (errcode) + break; + } + } + + if (errcode) { + if (*wnames) { + int i; + + for (i = 0; i < *nwname; i++) + kfree((*wnames)[i]); + } + kfree(*wnames); + *wnames = NULL; + } + } + break; + case 'R':{ + int16_t *nwqid = va_arg(ap, int16_t *); + struct p9_qid **wqids = + va_arg(ap, struct p9_qid **); + + *wqids = NULL; + + errcode = + p9pdu_readf(pdu, optional, "w", nwqid); + if (!errcode) { + *wqids = + kmalloc(*nwqid * + sizeof(struct p9_qid), + GFP_KERNEL); + if (*wqids == NULL) + errcode = -ENOMEM; + } + + if (!errcode) { + int i; + + for (i = 0; i < *nwqid; i++) { + errcode = + p9pdu_readf(pdu, optional, + "Q", + &(*wqids)[i]); + if (errcode) + break; + } + } + + if (errcode) { + kfree(*wqids); + *wqids = NULL; + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap) +{ + const char *ptr; + int errcode = 0; + + for (ptr = fmt; *ptr; ptr++) { + switch (*ptr) { + case 'b':{ + int8_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'w':{ + int16_t val = va_arg(ap, int); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'd':{ + int32_t val = va_arg(ap, int32_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 'q':{ + int64_t val = va_arg(ap, int64_t); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } + break; + case 's':{ + const char *sptr = va_arg(ap, const char *); + int16_t len = 0; + if (sptr) + len = MIN(strlen(sptr), USHORT_MAX); + + errcode = p9pdu_writef(pdu, optional, "w", len); + if (!errcode && pdu_write(pdu, sptr, len)) + errcode = -EFAULT; + } + break; + case 'Q':{ + const struct p9_qid *qid = + va_arg(ap, const struct p9_qid *); + errcode = + p9pdu_writef(pdu, optional, "bdq", + qid->type, qid->version, + qid->path); + } break; + case 'S':{ + const struct p9_wstat *stbuf = + va_arg(ap, const struct p9_wstat *); + errcode = + p9pdu_writef(pdu, optional, + "wwdQdddqssss?sddd", + stbuf->size, stbuf->type, + stbuf->dev, &stbuf->qid, + stbuf->mode, stbuf->atime, + stbuf->mtime, stbuf->length, + stbuf->name, stbuf->uid, + stbuf->gid, stbuf->muid, + stbuf->extension, stbuf->n_uid, + stbuf->n_gid, stbuf->n_muid); + } break; + case 'D':{ + int32_t count = va_arg(ap, int32_t); + const void *data = va_arg(ap, const void *); + + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write(pdu, data, count)) + errcode = -EFAULT; + } + break; + case 'U':{ + int32_t count = va_arg(ap, int32_t); + const char __user *udata = + va_arg(ap, const void __user *); + errcode = + p9pdu_writef(pdu, optional, "d", count); + if (!errcode && pdu_write_u(pdu, udata, count)) + errcode = -EFAULT; + } + break; + case 'T':{ + int16_t nwname = va_arg(ap, int); + const char **wnames = va_arg(ap, const char **); + + errcode = + p9pdu_writef(pdu, optional, "w", nwname); + if (!errcode) { + int i; + + for (i = 0; i < nwname; i++) { + errcode = + p9pdu_writef(pdu, optional, + "s", + wnames[i]); + if (errcode) + break; + } + } + } + break; + case 'R':{ + int16_t nwqid = va_arg(ap, int); + struct p9_qid *wqids = + va_arg(ap, struct p9_qid *); + + errcode = + p9pdu_writef(pdu, optional, "w", nwqid); + if (!errcode) { + int i; + + for (i = 0; i < nwqid; i++) { + errcode = + p9pdu_writef(pdu, optional, + "Q", + &wqids[i]); + if (errcode) + break; + } + } + } + break; + case '?': + if (!optional) + return 0; + break; + default: + BUG(); + break; + } + + if (errcode) + break; + } + + return errcode; +} + +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vreadf(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +static int +p9pdu_writef(struct p9_fcall *pdu, int optional, const char *fmt, ...) +{ + va_list ap; + int ret; + + va_start(ap, fmt); + ret = p9pdu_vwritef(pdu, optional, fmt, ap); + va_end(ap); + + return ret; +} + +int p9stat_read(char *buf, int len, struct p9_wstat *st, int dotu) +{ + struct p9_fcall fake_pdu; + int ret; + + fake_pdu.size = len; + fake_pdu.capacity = len; + fake_pdu.sdata = buf; + fake_pdu.offset = 0; + + ret = p9pdu_readf(&fake_pdu, dotu, "S", st); + if (ret) { + P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); + p9pdu_dump(1, &fake_pdu); + } + + return ret; +} +EXPORT_SYMBOL(p9stat_read); + +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type) +{ + return p9pdu_writef(pdu, 0, "dbw", 0, type, tag); +} + +int p9pdu_finalize(struct p9_fcall *pdu) +{ + int size = pdu->size; + int err; + + pdu->size = 0; + err = p9pdu_writef(pdu, 0, "d", size); + pdu->size = size; + +#ifdef CONFIG_NET_9P_DEBUG + if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) + p9pdu_dump(0, pdu); +#endif + + P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, + pdu->id, pdu->tag); + + return err; +} + +void p9pdu_reset(struct p9_fcall *pdu) +{ + pdu->offset = 0; + pdu->size = 0; +} diff --git a/net/9p/protocol.h b/net/9p/protocol.h new file mode 100644 index 000000000000..ccde462e7ac5 --- /dev/null +++ b/net/9p/protocol.h @@ -0,0 +1,34 @@ +/* + * net/9p/protocol.h + * + * 9P Protocol Support Code + * + * Copyright (C) 2008 by Eric Van Hensbergen <ericvh@gmail.com> + * + * Base on code from Anthony Liguori <aliguori@us.ibm.com> + * Copyright (C) 2008 by IBM, Corp. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +int +p9pdu_vwritef(struct p9_fcall *pdu, int optional, const char *fmt, va_list ap); +int p9pdu_readf(struct p9_fcall *pdu, int optional, const char *fmt, ...); +int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); +int p9pdu_finalize(struct p9_fcall *pdu); +void p9pdu_dump(int, struct p9_fcall *); +void p9pdu_reset(struct p9_fcall *pdu); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index d652baf5ff91..1df0356f242b 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -39,12 +39,11 @@ #include <linux/file.h> #include <linux/parser.h> #include <net/9p/9p.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #define P9_PORT 564 #define MAX_SOCK_BUF (64*1024) -#define ERREQFLUSH 1 -#define SCHED_TIMEOUT 10 #define MAXPOLLWADDR 2 /** @@ -61,7 +60,6 @@ struct p9_fd_opts { u16 port; }; - /** * struct p9_trans_fd - transport state * @rd: reference to file to read from @@ -86,7 +84,7 @@ enum { Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, }; -static match_table_t tokens = { +static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, @@ -100,60 +98,22 @@ enum { Wpending = 8, /* can write */ }; -enum { - None, - Flushing, - Flushed, -}; - -struct p9_req; -typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a); - -/** - * struct p9_req - fd mux encoding of an rpc transaction - * @lock: protects req_list - * @tag: numeric tag for rpc transaction - * @tcall: request &p9_fcall structure - * @rcall: response &p9_fcall structure - * @err: error state - * @cb: callback for when response is received - * @cba: argument to pass to callback - * @flush: flag to indicate RPC has been flushed - * @req_list: list link for higher level objects to chain requests - * - */ - -struct p9_req { - spinlock_t lock; - int tag; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - int err; - p9_conn_req_callback cb; - void *cba; - int flush; - struct list_head req_list; -}; - -struct p9_mux_poll_task { - struct task_struct *task; - struct list_head mux_list; - int muxnum; +struct p9_poll_wait { + struct p9_conn *conn; + wait_queue_t wait; + wait_queue_head_t *wait_addr; }; /** * struct p9_conn - fd mux connection state information - * @lock: protects mux_list (?) * @mux_list: list link for mux to manage multiple connections (?) - * @poll_task: task polling on this connection - * @msize: maximum size for connection (dup) - * @extended: 9p2000.u flag (dup) - * @trans: reference to transport instance for this connection - * @tagpool: id accounting for transactions + * @client: reference to client instance for this connection * @err: error state * @req_list: accounting for requests which have been sent * @unsent_req_list: accounting for requests that haven't been sent - * @rcall: current response &p9_fcall structure + * @req: current request being processed (if any) + * @tmp_buf: temporary buffer to read in header + * @rsize: amount to read for current frame * @rpos: read position in current frame * @rbuf: current read buffer * @wpos: write position for current frame @@ -169,409 +129,300 @@ struct p9_mux_poll_task { */ struct p9_conn { - spinlock_t lock; /* protect lock structure */ struct list_head mux_list; - struct p9_mux_poll_task *poll_task; - int msize; - unsigned char extended; - struct p9_trans *trans; - struct p9_idpool *tagpool; + struct p9_client *client; int err; struct list_head req_list; struct list_head unsent_req_list; - struct p9_fcall *rcall; + struct p9_req_t *req; + char tmp_buf[7]; + int rsize; int rpos; char *rbuf; int wpos; int wsize; char *wbuf; - wait_queue_t poll_wait[MAXPOLLWADDR]; - wait_queue_head_t *poll_waddr[MAXPOLLWADDR]; + struct list_head poll_pending_link; + struct p9_poll_wait poll_wait[MAXPOLLWADDR]; poll_table pt; struct work_struct rq; struct work_struct wq; unsigned long wsched; }; -/** - * struct p9_mux_rpc - fd mux rpc accounting structure - * @m: connection this request was issued on - * @err: error state - * @tcall: request &p9_fcall - * @rcall: response &p9_fcall - * @wqueue: wait queue that client is blocked on for this rpc - * - * Bug: isn't this information duplicated elsewhere like &p9_req - */ - -struct p9_mux_rpc { - struct p9_conn *m; - int err; - struct p9_fcall *tcall; - struct p9_fcall *rcall; - wait_queue_head_t wqueue; -}; - -static int p9_poll_proc(void *); -static void p9_read_work(struct work_struct *work); -static void p9_write_work(struct work_struct *work); -static void p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, - poll_table *p); -static int p9_fd_write(struct p9_trans *trans, void *v, int len); -static int p9_fd_read(struct p9_trans *trans, void *v, int len); - -static DEFINE_MUTEX(p9_mux_task_lock); +static DEFINE_SPINLOCK(p9_poll_lock); +static LIST_HEAD(p9_poll_pending_list); static struct workqueue_struct *p9_mux_wq; +static struct task_struct *p9_poll_task; -static int p9_mux_num; -static int p9_mux_poll_task_num; -static struct p9_mux_poll_task p9_mux_poll_tasks[100]; - -static void p9_conn_destroy(struct p9_conn *); -static unsigned int p9_fd_poll(struct p9_trans *trans, - struct poll_table_struct *pt); - -#ifdef P9_NONBLOCK -static int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a); -#endif /* P9_NONBLOCK */ - -static void p9_conn_cancel(struct p9_conn *m, int err); - -static u16 p9_mux_get_tag(struct p9_conn *m) +static void p9_mux_poll_stop(struct p9_conn *m) { - int tag; + unsigned long flags; + int i; - tag = p9_idpool_get(m->tagpool); - if (tag < 0) - return P9_NOTAG; - else - return (u16) tag; -} + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + struct p9_poll_wait *pwait = &m->poll_wait[i]; -static void p9_mux_put_tag(struct p9_conn *m, u16 tag) -{ - if (tag != P9_NOTAG && p9_idpool_check(tag, m->tagpool)) - p9_idpool_put(tag, m->tagpool); + if (pwait->wait_addr) { + remove_wait_queue(pwait->wait_addr, &pwait->wait); + pwait->wait_addr = NULL; + } + } + + spin_lock_irqsave(&p9_poll_lock, flags); + list_del_init(&m->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); } /** - * p9_mux_calc_poll_procs - calculates the number of polling procs - * @muxnum: number of mounts + * p9_conn_cancel - cancel all pending requests with error + * @m: mux data + * @err: error code * - * Calculation is based on the number of mounted v9fs filesystems. - * The current implementation returns sqrt of the number of mounts. */ -static int p9_mux_calc_poll_procs(int muxnum) +static void p9_conn_cancel(struct p9_conn *m, int err) { - int n; - - if (p9_mux_poll_task_num) - n = muxnum / p9_mux_poll_task_num + - (muxnum % p9_mux_poll_task_num ? 1 : 0); - else - n = 1; - - if (n > ARRAY_SIZE(p9_mux_poll_tasks)) - n = ARRAY_SIZE(p9_mux_poll_tasks); - - return n; -} + struct p9_req_t *req, *rtmp; + unsigned long flags; + LIST_HEAD(cancel_list); -static int p9_mux_poll_start(struct p9_conn *m) -{ - int i, n; - struct p9_mux_poll_task *vpt, *vptlast; - struct task_struct *pproc; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p muxnum %d procnum %d\n", m, p9_mux_num, - p9_mux_poll_task_num); - mutex_lock(&p9_mux_task_lock); - - n = p9_mux_calc_poll_procs(p9_mux_num + 1); - if (n > p9_mux_poll_task_num) { - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - if (p9_mux_poll_tasks[i].task == NULL) { - vpt = &p9_mux_poll_tasks[i]; - P9_DPRINTK(P9_DEBUG_MUX, "create proc %p\n", - vpt); - pproc = kthread_create(p9_poll_proc, vpt, - "v9fs-poll"); - - if (!IS_ERR(pproc)) { - vpt->task = pproc; - INIT_LIST_HEAD(&vpt->mux_list); - vpt->muxnum = 0; - p9_mux_poll_task_num++; - wake_up_process(vpt->task); - } - break; - } - } + P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) - P9_DPRINTK(P9_DEBUG_ERROR, - "warning: no free poll slots\n"); - } + spin_lock_irqsave(&m->client->lock, flags); - n = (p9_mux_num + 1) / p9_mux_poll_task_num + - ((p9_mux_num + 1) % p9_mux_poll_task_num ? 1 : 0); - - vptlast = NULL; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) { - vpt = &p9_mux_poll_tasks[i]; - if (vpt->task != NULL) { - vptlast = vpt; - if (vpt->muxnum < n) { - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vpt->mux_list); - vpt->muxnum++; - m->poll_task = vpt; - memset(&m->poll_waddr, 0, - sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); - break; - } - } + if (m->err) { + spin_unlock_irqrestore(&m->client->lock, flags); + return; } - if (i >= ARRAY_SIZE(p9_mux_poll_tasks)) { - if (vptlast == NULL) { - mutex_unlock(&p9_mux_task_lock); - return -ENOMEM; - } + m->err = err; - P9_DPRINTK(P9_DEBUG_MUX, "put in proc %d\n", i); - list_add(&m->mux_list, &vptlast->mux_list); - vptlast->muxnum++; - m->poll_task = vptlast; - memset(&m->poll_waddr, 0, sizeof(m->poll_waddr)); - init_poll_funcptr(&m->pt, p9_pollwait); + list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); } - - p9_mux_num++; - mutex_unlock(&p9_mux_task_lock); - - return 0; -} - -static void p9_mux_poll_stop(struct p9_conn *m) -{ - int i; - struct p9_mux_poll_task *vpt; - - mutex_lock(&p9_mux_task_lock); - vpt = m->poll_task; - list_del(&m->mux_list); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (m->poll_waddr[i] != NULL) { - remove_wait_queue(m->poll_waddr[i], &m->poll_wait[i]); - m->poll_waddr[i] = NULL; - } + list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { + req->status = REQ_STATUS_ERROR; + if (!req->t_err) + req->t_err = err; + list_move(&req->req_list, &cancel_list); } - vpt->muxnum--; - if (!vpt->muxnum) { - P9_DPRINTK(P9_DEBUG_MUX, "destroy proc %p\n", vpt); - kthread_stop(vpt->task); - vpt->task = NULL; - p9_mux_poll_task_num--; + spin_unlock_irqrestore(&m->client->lock, flags); + + list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { + list_del(&req->req_list); + P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); + p9_client_cb(m->client, req); } - p9_mux_num--; - mutex_unlock(&p9_mux_task_lock); } -/** - * p9_conn_create - allocate and initialize the per-session mux data - * @trans: transport structure - * - * Note: Creates the polling task if this is the first session. - */ - -static struct p9_conn *p9_conn_create(struct p9_trans *trans) +static unsigned int +p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) { - int i, n; - struct p9_conn *m; + int ret, n; + struct p9_trans_fd *ts = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "transport %p msize %d\n", trans, - trans->msize); - m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); + if (client && client->status == Connected) + ts = client->trans; - spin_lock_init(&m->lock); - INIT_LIST_HEAD(&m->mux_list); - m->msize = trans->msize; - m->extended = trans->extended; - m->trans = trans; - m->tagpool = p9_idpool_create(); - if (IS_ERR(m->tagpool)) { - kfree(m); - return ERR_PTR(-ENOMEM); - } + if (!ts) + return -EREMOTEIO; - INIT_LIST_HEAD(&m->req_list); - INIT_LIST_HEAD(&m->unsent_req_list); - INIT_WORK(&m->rq, p9_read_work); - INIT_WORK(&m->wq, p9_write_work); - n = p9_mux_poll_start(m); - if (n) { - kfree(m); - return ERR_PTR(n); - } + if (!ts->rd->f_op || !ts->rd->f_op->poll) + return -EIO; - n = p9_fd_poll(trans, &m->pt); - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - set_bit(Rpending, &m->wsched); - } + if (!ts->wr->f_op || !ts->wr->f_op->poll) + return -EIO; - if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - set_bit(Wpending, &m->wsched); - } + ret = ts->rd->f_op->poll(ts->rd, pt); + if (ret < 0) + return ret; - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) { - if (IS_ERR(m->poll_waddr[i])) { - p9_mux_poll_stop(m); - kfree(m); - return (void *)m->poll_waddr; /* the error code */ - } + if (ts->rd != ts->wr) { + n = ts->wr->f_op->poll(ts->wr, pt); + if (n < 0) + return n; + ret = (ret & ~POLLOUT) | (n & ~POLLIN); } - return m; + return ret; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources - * @m: mux to destroy + * p9_fd_read- read from a fd + * @client: client instance + * @v: buffer to receive data into + * @len: size of receive buffer * */ -static void p9_conn_destroy(struct p9_conn *m) +static int p9_fd_read(struct p9_client *client, void *v, int len) { - P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m, - m->mux_list.prev, m->mux_list.next); + int ret; + struct p9_trans_fd *ts = NULL; - p9_mux_poll_stop(m); - cancel_work_sync(&m->rq); - cancel_work_sync(&m->wq); + if (client && client->status != Disconnected) + ts = client->trans; - p9_conn_cancel(m, -ECONNRESET); + if (!ts) + return -EREMOTEIO; - m->trans = NULL; - p9_idpool_destroy(m->tagpool); - kfree(m); + if (!(ts->rd->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); + + ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** - * p9_pollwait - add poll task to the wait queue - * @filp: file pointer being polled - * @wait_address: wait_q to block on - * @p: poll state + * p9_read_work - called when there is some data to be read from a transport + * @work: container of work to be done * - * called by files poll operation to add v9fs-poll task to files wait queue */ -static void -p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) +static void p9_read_work(struct work_struct *work) { - int i; + int n, err; struct p9_conn *m; - m = container_of(p, struct p9_conn, pt); - for (i = 0; i < ARRAY_SIZE(m->poll_waddr); i++) - if (m->poll_waddr[i] == NULL) - break; + m = container_of(work, struct p9_conn, rq); - if (i >= ARRAY_SIZE(m->poll_waddr)) { - P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); + if (m->err < 0) return; - } - m->poll_waddr[i] = wait_address; + P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); - if (!wait_address) { - P9_DPRINTK(P9_DEBUG_ERROR, "no wait_address\n"); - m->poll_waddr[i] = ERR_PTR(-EIO); + if (!m->rbuf) { + m->rbuf = m->tmp_buf; + m->rpos = 0; + m->rsize = 7; /* start by reading header */ + } + + clear_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, + m->rpos, m->rsize, m->rsize-m->rpos); + err = p9_fd_read(m->client, m->rbuf + m->rpos, + m->rsize - m->rpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); + if (err == -EAGAIN) { + clear_bit(Rworksched, &m->wsched); return; } - init_waitqueue_entry(&m->poll_wait[i], m->poll_task->task); - add_wait_queue(wait_address, &m->poll_wait[i]); -} + if (err <= 0) + goto error; -/** - * p9_poll_mux - polls a mux and schedules read or write works if necessary - * @m: connection to poll - * - */ + m->rpos += err; -static void p9_poll_mux(struct p9_conn *m) -{ - int n; + if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */ + u16 tag; + P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); - if (m->err < 0) - return; + n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */ + if (n >= m->client->msize) { + P9_DPRINTK(P9_DEBUG_ERROR, + "requested packet size too big: %d\n", n); + err = -EIO; + goto error; + } - n = p9_fd_poll(m->trans, NULL); - if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { - P9_DPRINTK(P9_DEBUG_MUX, "error mux %p err %d\n", m, n); - if (n >= 0) - n = -ECONNRESET; - p9_conn_cancel(m, n); - } + tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ + P9_DPRINTK(P9_DEBUG_TRANS, + "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); - if (n & POLLIN) { - set_bit(Rpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can read\n", m); - if (!test_and_set_bit(Rworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); + m->req = p9_tag_lookup(m->client, tag); + if (!m->req) { + P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", + tag); + err = -EIO; + goto error; } - } - if (n & POLLOUT) { - set_bit(Wpending, &m->wsched); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p can write\n", m); - if ((m->wsize || !list_empty(&m->unsent_req_list)) - && !test_and_set_bit(Wworksched, &m->wsched)) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); - queue_work(p9_mux_wq, &m->wq); + if (m->req->rc == NULL) { + m->req->rc = kmalloc(sizeof(struct p9_fcall) + + m->client->msize, GFP_KERNEL); + if (!m->req->rc) { + m->req = NULL; + err = -ENOMEM; + goto error; + } } + m->rbuf = (char *)m->req->rc + sizeof(struct p9_fcall); + memcpy(m->rbuf, m->tmp_buf, m->rsize); + m->rsize = n; } + + /* not an else because some packets (like clunk) have no payload */ + if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ + P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); + spin_lock(&m->client->lock); + list_del(&m->req->req_list); + spin_unlock(&m->client->lock); + p9_client_cb(m->client, m->req); + + m->rbuf = NULL; + m->rpos = 0; + m->rsize = 0; + m->req = NULL; + } + + if (!list_empty(&m->req_list)) { + if (test_and_clear_bit(Rpending, &m->wsched)) + n = POLLIN; + else + n = p9_fd_poll(m->client, NULL); + + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } else + clear_bit(Rworksched, &m->wsched); + } else + clear_bit(Rworksched, &m->wsched); + + return; +error: + p9_conn_cancel(m, err); + clear_bit(Rworksched, &m->wsched); } /** - * p9_poll_proc - poll worker thread - * @a: thread state and arguments - * - * polls all v9fs transports for new events and queues the appropriate - * work to the work queue + * p9_fd_write - write to a socket + * @client: client instance + * @v: buffer to send data from + * @len: size of send buffer * */ -static int p9_poll_proc(void *a) +static int p9_fd_write(struct p9_client *client, void *v, int len) { - struct p9_conn *m, *mtmp; - struct p9_mux_poll_task *vpt; + int ret; + mm_segment_t oldfs; + struct p9_trans_fd *ts = NULL; - vpt = a; - P9_DPRINTK(P9_DEBUG_MUX, "start %p %p\n", current, vpt); - while (!kthread_should_stop()) { - set_current_state(TASK_INTERRUPTIBLE); + if (client && client->status != Disconnected) + ts = client->trans; - list_for_each_entry_safe(m, mtmp, &vpt->mux_list, mux_list) { - p9_poll_mux(m); - } + if (!ts) + return -EREMOTEIO; - P9_DPRINTK(P9_DEBUG_MUX, "sleeping...\n"); - schedule_timeout(SCHED_TIMEOUT * HZ); - } + if (!(ts->wr->f_flags & O_NONBLOCK)) + P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); - __set_current_state(TASK_RUNNING); - P9_DPRINTK(P9_DEBUG_MUX, "finish\n"); - return 0; + oldfs = get_fs(); + set_fs(get_ds()); + /* The cast to a user pointer is valid due to the set_fs() */ + ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); + set_fs(oldfs); + + if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) + client->status = Disconnected; + return ret; } /** @@ -584,7 +435,7 @@ static void p9_write_work(struct work_struct *work) { int n, err; struct p9_conn *m; - struct p9_req *req; + struct p9_req_t *req; m = container_of(work, struct p9_conn, wq); @@ -599,25 +450,23 @@ static void p9_write_work(struct work_struct *work) return; } - spin_lock(&m->lock); -again: - req = list_entry(m->unsent_req_list.next, struct p9_req, + spin_lock(&m->client->lock); + req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); + req->status = REQ_STATUS_SENT; list_move_tail(&req->req_list, &m->req_list); - if (req->err == ERREQFLUSH) - goto again; - m->wbuf = req->tcall->sdata; - m->wsize = req->tcall->size; + m->wbuf = req->tc->sdata; + m->wsize = req->tc->size; m->wpos = 0; - spin_unlock(&m->lock); + spin_unlock(&m->client->lock); } - P9_DPRINTK(P9_DEBUG_MUX, "mux %p pos %d size %d\n", m, m->wpos, + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, m->wsize); clear_bit(Wpending, &m->wsched); - err = p9_fd_write(m->trans, m->wbuf + m->wpos, m->wsize - m->wpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p sent %d bytes\n", m, err); + err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); if (err == -EAGAIN) { clear_bit(Wworksched, &m->wsched); return; @@ -638,10 +487,10 @@ again: if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else - n = p9_fd_poll(m->trans, NULL); + n = p9_fd_poll(m->client, NULL); if (n & POLLOUT) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule write work %p\n", m); + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); queue_work(p9_mux_wq, &m->wq); } else clear_bit(Wworksched, &m->wsched); @@ -655,504 +504,195 @@ error: clear_bit(Wworksched, &m->wsched); } -static void process_request(struct p9_conn *m, struct p9_req *req) +static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) { - int ecode; - struct p9_str *ename; - - if (!req->err && req->rcall->id == P9_RERROR) { - ecode = req->rcall->params.rerror.errno; - ename = &req->rcall->params.rerror.error; - - P9_DPRINTK(P9_DEBUG_MUX, "Rerror %.*s\n", ename->len, - ename->str); - - if (m->extended) - req->err = -ecode; + struct p9_poll_wait *pwait = + container_of(wait, struct p9_poll_wait, wait); + struct p9_conn *m = pwait->conn; + unsigned long flags; + DECLARE_WAITQUEUE(dummy_wait, p9_poll_task); - if (!req->err) { - req->err = p9_errstr2errno(ename->str, ename->len); + spin_lock_irqsave(&p9_poll_lock, flags); + if (list_empty(&m->poll_pending_link)) + list_add_tail(&m->poll_pending_link, &p9_poll_pending_list); + spin_unlock_irqrestore(&p9_poll_lock, flags); - /* string match failed */ - if (!req->err) { - PRINT_FCALL_ERROR("unknown error", req->rcall); - req->err = -ESERVERFAULT; - } - } - } else if (req->tcall && req->rcall->id != req->tcall->id + 1) { - P9_DPRINTK(P9_DEBUG_ERROR, - "fcall mismatch: expected %d, got %d\n", - req->tcall->id + 1, req->rcall->id); - if (!req->err) - req->err = -EIO; - } + /* perform the default wake up operation */ + return default_wake_function(&dummy_wait, mode, sync, key); } /** - * p9_read_work - called when there is some data to be read from a transport - * @work: container of work to be done + * p9_pollwait - add poll task to the wait queue + * @filp: file pointer being polled + * @wait_address: wait_q to block on + * @p: poll state * + * called by files poll operation to add v9fs-poll task to files wait queue */ -static void p9_read_work(struct work_struct *work) +static void +p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) { - int n, err; - struct p9_conn *m; - struct p9_req *req, *rptr, *rreq; - struct p9_fcall *rcall; - char *rbuf; - - m = container_of(work, struct p9_conn, rq); - - if (m->err < 0) - return; - - rcall = NULL; - P9_DPRINTK(P9_DEBUG_MUX, "start mux %p pos %d\n", m, m->rpos); + struct p9_conn *m = container_of(p, struct p9_conn, pt); + struct p9_poll_wait *pwait = NULL; + int i; - if (!m->rcall) { - m->rcall = - kmalloc(sizeof(struct p9_fcall) + m->msize, GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; + for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { + if (m->poll_wait[i].wait_addr == NULL) { + pwait = &m->poll_wait[i]; + break; } - - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - m->rpos = 0; } - clear_bit(Rpending, &m->wsched); - err = p9_fd_read(m->trans, m->rbuf + m->rpos, m->msize - m->rpos); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p got %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); + if (!pwait) { + P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); return; } - if (err <= 0) - goto error; - - m->rpos += err; - while (m->rpos > 4) { - n = le32_to_cpu(*(__le32 *) m->rbuf); - if (n >= m->msize) { - P9_DPRINTK(P9_DEBUG_ERROR, - "requested packet size too big: %d\n", n); - err = -EIO; - goto error; - } - - if (m->rpos < n) - break; - - err = - p9_deserialize_fcall(m->rbuf, n, m->rcall, m->extended); - if (err < 0) - goto error; - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), m->rcall, - m->extended); - printk(KERN_NOTICE ">>> %p %s\n", m, buf); - } -#endif - - rcall = m->rcall; - rbuf = m->rbuf; - if (m->rpos > n) { - m->rcall = kmalloc(sizeof(struct p9_fcall) + m->msize, - GFP_KERNEL); - if (!m->rcall) { - err = -ENOMEM; - goto error; - } - - m->rbuf = (char *)m->rcall + sizeof(struct p9_fcall); - memmove(m->rbuf, rbuf + n, m->rpos - n); - m->rpos -= n; - } else { - m->rcall = NULL; - m->rbuf = NULL; - m->rpos = 0; - } - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p fcall id %d tag %d\n", m, - rcall->id, rcall->tag); - - req = NULL; - spin_lock(&m->lock); - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == rcall->tag) { - req = rreq; - if (req->flush != Flushing) - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); - - if (req) { - req->rcall = rcall; - process_request(m, req); - - if (req->flush != Flushing) { - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } - } else { - if (err >= 0 && rcall->id != P9_RFLUSH) - P9_DPRINTK(P9_DEBUG_ERROR, - "unexpected response mux %p id %d tag %d\n", - m, rcall->id, rcall->tag); - kfree(rcall); - } - } - - if (!list_empty(&m->req_list)) { - if (test_and_clear_bit(Rpending, &m->wsched)) - n = POLLIN; - else - n = p9_fd_poll(m->trans, NULL); - - if (n & POLLIN) { - P9_DPRINTK(P9_DEBUG_MUX, "schedule read work %p\n", m); - queue_work(p9_mux_wq, &m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); - - return; - -error: - p9_conn_cancel(m, err); - clear_bit(Rworksched, &m->wsched); + pwait->conn = m; + pwait->wait_addr = wait_address; + init_waitqueue_func_entry(&pwait->wait, p9_pollwake); + add_wait_queue(wait_address, &pwait->wait); } /** - * p9_send_request - send 9P request - * The function can sleep until the request is scheduled for sending. - * The function can be interrupted. Return from the function is not - * a guarantee that the request is sent successfully. Can return errors - * that can be retrieved by PTR_ERR macros. - * - * @m: mux data - * @tc: request to be sent - * @cb: callback function to call when response is received - * @cba: parameter to pass to the callback function + * p9_conn_create - allocate and initialize the per-session mux data + * @client: client instance * + * Note: Creates the polling task if this is the first session. */ -static struct p9_req *p9_send_request(struct p9_conn *m, - struct p9_fcall *tc, - p9_conn_req_callback cb, void *cba) +static struct p9_conn *p9_conn_create(struct p9_client *client) { int n; - struct p9_req *req; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p task %p tcall %p id %d\n", m, current, - tc, tc->id); - if (m->err < 0) - return ERR_PTR(m->err); - - req = kmalloc(sizeof(struct p9_req), GFP_KERNEL); - if (!req) - return ERR_PTR(-ENOMEM); - - if (tc->id == P9_TVERSION) - n = P9_NOTAG; - else - n = p9_mux_get_tag(m); + struct p9_conn *m; - if (n < 0) { - kfree(req); + P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, + client->msize); + m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); + if (!m) return ERR_PTR(-ENOMEM); - } - p9_set_tag(tc, n); + INIT_LIST_HEAD(&m->mux_list); + m->client = client; -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; + INIT_LIST_HEAD(&m->req_list); + INIT_LIST_HEAD(&m->unsent_req_list); + INIT_WORK(&m->rq, p9_read_work); + INIT_WORK(&m->wq, p9_write_work); + INIT_LIST_HEAD(&m->poll_pending_link); + init_poll_funcptr(&m->pt, p9_pollwait); - p9_printfcall(buf, sizeof(buf), tc, m->extended); - printk(KERN_NOTICE "<<< %p %s\n", m, buf); + n = p9_fd_poll(client, &m->pt); + if (n & POLLIN) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + set_bit(Rpending, &m->wsched); } -#endif - - spin_lock_init(&req->lock); - req->tag = n; - req->tcall = tc; - req->rcall = NULL; - req->err = 0; - req->cb = cb; - req->cba = cba; - req->flush = None; - - spin_lock(&m->lock); - list_add_tail(&req->req_list, &m->unsent_req_list); - spin_unlock(&m->lock); - - if (test_and_clear_bit(Wpending, &m->wsched)) - n = POLLOUT; - else - n = p9_fd_poll(m->trans, NULL); - if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) - queue_work(p9_mux_wq, &m->wq); + if (n & POLLOUT) { + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + set_bit(Wpending, &m->wsched); + } - return req; + return m; } -static void p9_mux_free_request(struct p9_conn *m, struct p9_req *req) -{ - p9_mux_put_tag(m, req->tag); - kfree(req); -} +/** + * p9_poll_mux - polls a mux and schedules read or write works if necessary + * @m: connection to poll + * + */ -static void p9_mux_flush_cb(struct p9_req *freq, void *a) +static void p9_poll_mux(struct p9_conn *m) { - int tag; - struct p9_conn *m; - struct p9_req *req, *rreq, *rptr; - - m = a; - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p rc %p err %d oldtag %d\n", m, - freq->tcall, freq->rcall, freq->err, - freq->tcall->params.tflush.oldtag); - - spin_lock(&m->lock); - tag = freq->tcall->params.tflush.oldtag; - req = NULL; - list_for_each_entry_safe(rreq, rptr, &m->req_list, req_list) { - if (rreq->tag == tag) { - req = rreq; - list_del(&req->req_list); - break; - } - } - spin_unlock(&m->lock); + int n; - if (req) { - spin_lock(&req->lock); - req->flush = Flushed; - spin_unlock(&req->lock); + if (m->err < 0) + return; - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); + n = p9_fd_poll(m->client, NULL); + if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { + P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); + if (n >= 0) + n = -ECONNRESET; + p9_conn_cancel(m, n); } - kfree(freq->tcall); - kfree(freq->rcall); - p9_mux_free_request(m, freq); -} - -static int -p9_mux_flush_request(struct p9_conn *m, struct p9_req *req) -{ - struct p9_fcall *fc; - struct p9_req *rreq, *rptr; - - P9_DPRINTK(P9_DEBUG_MUX, "mux %p req %p tag %d\n", m, req, req->tag); - - /* if a response was received for a request, do nothing */ - spin_lock(&req->lock); - if (req->rcall || req->err) { - spin_unlock(&req->lock); - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p response already received\n", m, req); - return 0; + if (n & POLLIN) { + set_bit(Rpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); + if (!test_and_set_bit(Rworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); + queue_work(p9_mux_wq, &m->rq); + } } - req->flush = Flushing; - spin_unlock(&req->lock); - - spin_lock(&m->lock); - /* if the request is not sent yet, just remove it from the list */ - list_for_each_entry_safe(rreq, rptr, &m->unsent_req_list, req_list) { - if (rreq->tag == req->tag) { - P9_DPRINTK(P9_DEBUG_MUX, - "mux %p req %p request is not sent yet\n", m, req); - list_del(&rreq->req_list); - req->flush = Flushed; - spin_unlock(&m->lock); - if (req->cb) - (*req->cb) (req, req->cba); - return 0; + if (n & POLLOUT) { + set_bit(Wpending, &m->wsched); + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); + if ((m->wsize || !list_empty(&m->unsent_req_list)) + && !test_and_set_bit(Wworksched, &m->wsched)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); + queue_work(p9_mux_wq, &m->wq); } } - spin_unlock(&m->lock); - - clear_thread_flag(TIF_SIGPENDING); - fc = p9_create_tflush(req->tag); - p9_send_request(m, fc, p9_mux_flush_cb, m); - return 1; -} - -static void -p9_conn_rpc_cb(struct p9_req *req, void *a) -{ - struct p9_mux_rpc *r; - - P9_DPRINTK(P9_DEBUG_MUX, "req %p r %p\n", req, a); - r = a; - r->rcall = req->rcall; - r->err = req->err; - - if (req->flush != None && !req->err) - r->err = -ERESTARTSYS; - - wake_up(&r->wqueue); } /** - * p9_fd_rpc- sends 9P request and waits until a response is available. - * The function can be interrupted. - * @t: transport data - * @tc: request to be sent - * @rc: pointer where a pointer to the response is stored + * p9_fd_request - send 9P request + * The function can sleep until the request is scheduled for sending. + * The function can be interrupted. Return from the function is not + * a guarantee that the request is sent successfully. + * + * @client: client instance + * @req: request to be sent * */ -int -p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { - struct p9_trans_fd *p = t->priv; - struct p9_conn *m = p->conn; - int err, sigpending; - unsigned long flags; - struct p9_req *req; - struct p9_mux_rpc r; - - r.err = 0; - r.tcall = tc; - r.rcall = NULL; - r.m = m; - init_waitqueue_head(&r.wqueue); - - if (rc) - *rc = NULL; - - sigpending = 0; - if (signal_pending(current)) { - sigpending = 1; - clear_thread_flag(TIF_SIGPENDING); - } - - req = p9_send_request(m, tc, p9_conn_rpc_cb, &r); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return err; - } + int n; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = ts->conn; - err = wait_event_interruptible(r.wqueue, r.rcall != NULL || r.err < 0); - if (r.err < 0) - err = r.err; - - if (err == -ERESTARTSYS && m->trans->status == Connected - && m->err == 0) { - if (p9_mux_flush_request(m, req)) { - /* wait until we get response of the flush message */ - do { - clear_thread_flag(TIF_SIGPENDING); - err = wait_event_interruptible(r.wqueue, - r.rcall || r.err); - } while (!r.rcall && !r.err && err == -ERESTARTSYS && - m->trans->status == Connected && !m->err); - - err = -ERESTARTSYS; - } - sigpending = 1; - } + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, + current, req->tc, req->tc->id); + if (m->err < 0) + return m->err; - if (sigpending) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); - } + spin_lock(&client->lock); + req->status = REQ_STATUS_UNSENT; + list_add_tail(&req->req_list, &m->unsent_req_list); + spin_unlock(&client->lock); - if (rc) - *rc = r.rcall; + if (test_and_clear_bit(Wpending, &m->wsched)) + n = POLLOUT; else - kfree(r.rcall); + n = p9_fd_poll(m->client, NULL); - p9_mux_free_request(m, req); - if (err > 0) - err = -EIO; + if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) + queue_work(p9_mux_wq, &m->wq); - return err; + return 0; } -#ifdef P9_NONBLOCK -/** - * p9_conn_rpcnb - sends 9P request without waiting for response. - * @m: mux data - * @tc: request to be sent - * @cb: callback function to be called when response arrives - * @a: value to pass to the callback function - * - */ - -int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc, - p9_conn_req_callback cb, void *a) +static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) { - int err; - struct p9_req *req; + int ret = 1; - req = p9_send_request(m, tc, cb, a); - if (IS_ERR(req)) { - err = PTR_ERR(req); - P9_DPRINTK(P9_DEBUG_MUX, "error %d\n", err); - return PTR_ERR(req); - } + P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); - P9_DPRINTK(P9_DEBUG_MUX, "mux %p tc %p tag %d\n", m, tc, req->tag); - return 0; -} -#endif /* P9_NONBLOCK */ + spin_lock(&client->lock); + list_del(&req->req_list); -/** - * p9_conn_cancel - cancel all pending requests with error - * @m: mux data - * @err: error code - * - */ - -void p9_conn_cancel(struct p9_conn *m, int err) -{ - struct p9_req *req, *rtmp; - LIST_HEAD(cancel_list); - - P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); - m->err = err; - spin_lock(&m->lock); - list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - list_move(&req->req_list, &cancel_list); + if (req->status == REQ_STATUS_UNSENT) { + req->status = REQ_STATUS_FLSHD; + ret = 0; } - list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - list_move(&req->req_list, &cancel_list); - } - spin_unlock(&m->lock); - list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { - list_del(&req->req_list); - if (!req->err) - req->err = err; + spin_unlock(&client->lock); - if (req->cb) - (*req->cb) (req, req->cba); - else - kfree(req->rcall); - } + return ret; } /** @@ -1216,7 +756,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) return 0; } -static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) +static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); @@ -1234,13 +774,13 @@ static int p9_fd_open(struct p9_trans *trans, int rfd, int wfd) return -EIO; } - trans->priv = ts; - trans->status = Connected; + client->trans = ts; + client->status = Connected; return 0; } -static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) +static int p9_socket_open(struct p9_client *client, struct socket *csocket) { int fd, ret; @@ -1251,137 +791,65 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket) return fd; } - ret = p9_fd_open(trans, fd, fd); + ret = p9_fd_open(client, fd, fd); if (ret < 0) { P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to open fd\n"); sockfd_put(csocket); return ret; } - ((struct p9_trans_fd *)trans->priv)->rd->f_flags |= O_NONBLOCK; + ((struct p9_trans_fd *)client->trans)->rd->f_flags |= O_NONBLOCK; return 0; } /** - * p9_fd_read- read from a fd - * @trans: transport instance state - * @v: buffer to receive data into - * @len: size of receive buffer - * - */ - -static int p9_fd_read(struct p9_trans *trans, void *v, int len) -{ - int ret; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!(ts->rd->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); - - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -/** - * p9_fd_write - write to a socket - * @trans: transport instance state - * @v: buffer to send data from - * @len: size of send buffer + * p9_mux_destroy - cancels all pending requests and frees mux resources + * @m: mux to destroy * */ -static int p9_fd_write(struct p9_trans *trans, void *v, int len) -{ - int ret; - mm_segment_t oldfs; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status != Disconnected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!(ts->wr->f_flags & O_NONBLOCK)) - P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); - - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - - if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) - trans->status = Disconnected; - return ret; -} - -static unsigned int -p9_fd_poll(struct p9_trans *trans, struct poll_table_struct *pt) +static void p9_conn_destroy(struct p9_conn *m) { - int ret, n; - struct p9_trans_fd *ts = NULL; - - if (trans && trans->status == Connected) - ts = trans->priv; - - if (!ts) - return -EREMOTEIO; - - if (!ts->rd->f_op || !ts->rd->f_op->poll) - return -EIO; - - if (!ts->wr->f_op || !ts->wr->f_op->poll) - return -EIO; + P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, + m->mux_list.prev, m->mux_list.next); - ret = ts->rd->f_op->poll(ts->rd, pt); - if (ret < 0) - return ret; + p9_mux_poll_stop(m); + cancel_work_sync(&m->rq); + cancel_work_sync(&m->wq); - if (ts->rd != ts->wr) { - n = ts->wr->f_op->poll(ts->wr, pt); - if (n < 0) - return n; - ret = (ret & ~POLLOUT) | (n & ~POLLIN); - } + p9_conn_cancel(m, -ECONNRESET); - return ret; + m->client = NULL; + kfree(m); } /** - * p9_fd_close - shutdown socket - * @trans: private socket structure + * p9_fd_close - shutdown file descriptor transport + * @client: client instance * */ -static void p9_fd_close(struct p9_trans *trans) +static void p9_fd_close(struct p9_client *client) { struct p9_trans_fd *ts; - if (!trans) + if (!client) return; - ts = xchg(&trans->priv, NULL); - + ts = client->trans; if (!ts) return; + client->status = Disconnected; + p9_conn_destroy(ts->conn); - trans->status = Disconnected; if (ts->rd) fput(ts->rd); if (ts->wr) fput(ts->wr); + kfree(ts); } @@ -1402,31 +870,23 @@ static inline int valid_ipaddr4(const char *buf) return 0; } -static struct p9_trans * -p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) +static int +p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct socket *csocket; struct sockaddr_in sin_server; struct p9_fd_opts opts; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ err = parse_opts(args, &opts); if (err < 0) - return ERR_PTR(err); + return err; if (valid_ipaddr4(addr) < 0) - return ERR_PTR(-EINVAL); + return -EINVAL; csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - trans->msize = msize; - trans->extended = dotu; - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; sin_server.sin_family = AF_INET; sin_server.sin_addr.s_addr = in_aton(addr); @@ -1449,45 +909,38 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu) goto error; } - err = p9_socket_open(trans, csocket); + err = p9_socket_open(client, csocket); if (err < 0) goto error; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: if (csocket) sock_release(csocket); - kfree(trans); - return ERR_PTR(err); + kfree(p); + + return err; } -static struct p9_trans * -p9_trans_create_unix(const char *addr, char *args, int msize, - unsigned char dotu) +static int +p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) { int err; struct socket *csocket; struct sockaddr_un sun_server; - struct p9_trans *trans; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this gets allocated in p9_fd_open */ csocket = NULL; - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; if (strlen(addr) > UNIX_PATH_MAX) { P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", @@ -1508,79 +961,69 @@ p9_trans_create_unix(const char *addr, char *args, int msize, goto error; } - err = p9_socket_open(trans, csocket); + err = p9_socket_open(client, csocket); if (err < 0) goto error; - trans->msize = msize; - trans->extended = dotu; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: if (csocket) sock_release(csocket); - kfree(trans); - return ERR_PTR(err); + kfree(p); + return err; } -static struct p9_trans * -p9_trans_create_fd(const char *name, char *args, int msize, - unsigned char extended) +static int +p9_fd_create(struct p9_client *client, const char *addr, char *args) { int err; - struct p9_trans *trans; struct p9_fd_opts opts; - struct p9_trans_fd *p; + struct p9_trans_fd *p = NULL; /* this get allocated in p9_fd_open */ parse_opts(args, &opts); if (opts.rfd == ~0 || opts.wfd == ~0) { printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); - return ERR_PTR(-ENOPROTOOPT); + return -ENOPROTOOPT; } - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) - return ERR_PTR(-ENOMEM); - - trans->rpc = p9_fd_rpc; - trans->close = p9_fd_close; - - err = p9_fd_open(trans, opts.rfd, opts.wfd); + err = p9_fd_open(client, opts.rfd, opts.wfd); if (err < 0) goto error; - trans->msize = msize; - trans->extended = extended; - p = (struct p9_trans_fd *) trans->priv; - p->conn = p9_conn_create(trans); + p = (struct p9_trans_fd *) client->trans; + p->conn = p9_conn_create(client); if (IS_ERR(p->conn)) { err = PTR_ERR(p->conn); p->conn = NULL; goto error; } - return trans; + return 0; error: - kfree(trans); - return ERR_PTR(err); + kfree(p); + return err; } static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, .def = 1, - .create = p9_trans_create_tcp, + .create = p9_fd_create_tcp, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; @@ -1588,7 +1031,10 @@ static struct p9_trans_module p9_unix_trans = { .name = "unix", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_unix, + .create = p9_fd_create_unix, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; @@ -1596,23 +1042,71 @@ static struct p9_trans_module p9_fd_trans = { .name = "fd", .maxsize = MAX_SOCK_BUF, .def = 0, - .create = p9_trans_create_fd, + .create = p9_fd_create, + .close = p9_fd_close, + .request = p9_fd_request, + .cancel = p9_fd_cancel, .owner = THIS_MODULE, }; -int p9_trans_fd_init(void) +/** + * p9_poll_proc - poll worker thread + * @a: thread state and arguments + * + * polls all v9fs transports for new events and queues the appropriate + * work to the work queue + * + */ + +static int p9_poll_proc(void *a) { - int i; + unsigned long flags; + + P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); + repeat: + spin_lock_irqsave(&p9_poll_lock, flags); + while (!list_empty(&p9_poll_pending_list)) { + struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, + struct p9_conn, + poll_pending_link); + list_del_init(&conn->poll_pending_link); + spin_unlock_irqrestore(&p9_poll_lock, flags); + + p9_poll_mux(conn); + + spin_lock_irqsave(&p9_poll_lock, flags); + } + spin_unlock_irqrestore(&p9_poll_lock, flags); + + set_current_state(TASK_INTERRUPTIBLE); + if (list_empty(&p9_poll_pending_list)) { + P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); + schedule(); + } + __set_current_state(TASK_RUNNING); + + if (!kthread_should_stop()) + goto repeat; - for (i = 0; i < ARRAY_SIZE(p9_mux_poll_tasks); i++) - p9_mux_poll_tasks[i].task = NULL; + P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); + return 0; +} +int p9_trans_fd_init(void) +{ p9_mux_wq = create_workqueue("v9fs"); if (!p9_mux_wq) { printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); return -ENOMEM; } + p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); + if (IS_ERR(p9_poll_task)) { + destroy_workqueue(p9_mux_wq); + printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); + return PTR_ERR(p9_poll_task); + } + v9fs_register_trans(&p9_tcp_trans); v9fs_register_trans(&p9_unix_trans); v9fs_register_trans(&p9_fd_trans); @@ -1622,6 +1116,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { + kthread_stop(p9_poll_task); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c new file mode 100644 index 000000000000..2f1fe5fc1228 --- /dev/null +++ b/net/9p/trans_rdma.c @@ -0,0 +1,713 @@ +/* + * linux/fs/9p/trans_rdma.c + * + * RDMA transport layer based on the trans_fd.c implementation. + * + * Copyright (C) 2008 by Tom Tucker <tom@opengridcomputing.com> + * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> + * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> + * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> + * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to: + * Free Software Foundation + * 51 Franklin Street, Fifth Floor + * Boston, MA 02111-1301 USA + * + */ + +#include <linux/in.h> +#include <linux/module.h> +#include <linux/net.h> +#include <linux/ipv6.h> +#include <linux/kthread.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/un.h> +#include <linux/uaccess.h> +#include <linux/inet.h> +#include <linux/idr.h> +#include <linux/file.h> +#include <linux/parser.h> +#include <linux/semaphore.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <net/9p/transport.h> +#include <rdma/ib_verbs.h> +#include <rdma/rdma_cm.h> + +#define P9_PORT 5640 +#define P9_RDMA_SQ_DEPTH 32 +#define P9_RDMA_RQ_DEPTH 32 +#define P9_RDMA_SEND_SGE 4 +#define P9_RDMA_RECV_SGE 4 +#define P9_RDMA_IRD 0 +#define P9_RDMA_ORD 0 +#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ +#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can + * safely advertise a maxsize + * of 64k */ + +#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT) +/** + * struct p9_trans_rdma - RDMA transport instance + * + * @state: tracks the transport state machine for connection setup and tear down + * @cm_id: The RDMA CM ID + * @pd: Protection Domain pointer + * @qp: Queue Pair pointer + * @cq: Completion Queue pointer + * @lkey: The local access only memory region key + * @timeout: Number of uSecs to wait for connection management events + * @sq_depth: The depth of the Send Queue + * @sq_sem: Semaphore for the SQ + * @rq_depth: The depth of the Receive Queue. + * @addr: The remote peer's address + * @req_lock: Protects the active request list + * @send_wait: Wait list when the SQ fills up + * @cm_done: Completion event for connection management tracking + */ +struct p9_trans_rdma { + enum { + P9_RDMA_INIT, + P9_RDMA_ADDR_RESOLVED, + P9_RDMA_ROUTE_RESOLVED, + P9_RDMA_CONNECTED, + P9_RDMA_FLUSHING, + P9_RDMA_CLOSING, + P9_RDMA_CLOSED, + } state; + struct rdma_cm_id *cm_id; + struct ib_pd *pd; + struct ib_qp *qp; + struct ib_cq *cq; + struct ib_mr *dma_mr; + u32 lkey; + long timeout; + int sq_depth; + struct semaphore sq_sem; + int rq_depth; + atomic_t rq_count; + struct sockaddr_in addr; + spinlock_t req_lock; + + struct completion cm_done; +}; + +/** + * p9_rdma_context - Keeps track of in-process WR + * + * @wc_op: The original WR op for when the CQE completes in error. + * @busa: Bus address to unmap when the WR completes + * @req: Keeps track of requests (send) + * @rc: Keepts track of replies (receive) + */ +struct p9_rdma_req; +struct p9_rdma_context { + enum ib_wc_opcode wc_op; + dma_addr_t busa; + union { + struct p9_req_t *req; + struct p9_fcall *rc; + }; +}; + +/** + * p9_rdma_opts - Collection of mount options + * @port: port of connection + * @sq_depth: The requested depth of the SQ. This really doesn't need + * to be any deeper than the number of threads used in the client + * @rq_depth: The depth of the RQ. Should be greater than or equal to SQ depth + * @timeout: Time to wait in msecs for CM events + */ +struct p9_rdma_opts { + short port; + int sq_depth; + int rq_depth; + long timeout; +}; + +/* + * Option Parsing (code inspired by NFS code) + */ +enum { + /* Options that take integer arguments */ + Opt_port, Opt_rq_depth, Opt_sq_depth, Opt_timeout, Opt_err, +}; + +static match_table_t tokens = { + {Opt_port, "port=%u"}, + {Opt_sq_depth, "sq=%u"}, + {Opt_rq_depth, "rq=%u"}, + {Opt_timeout, "timeout=%u"}, + {Opt_err, NULL}, +}; + +/** + * parse_options - parse mount options into session structure + * @options: options string passed from mount + * @opts: transport-specific structure to parse options into + * + * Returns 0 upon success, -ERRNO upon failure + */ +static int parse_opts(char *params, struct p9_rdma_opts *opts) +{ + char *p; + substring_t args[MAX_OPT_ARGS]; + int option; + char *options; + int ret; + + opts->port = P9_PORT; + opts->sq_depth = P9_RDMA_SQ_DEPTH; + opts->rq_depth = P9_RDMA_RQ_DEPTH; + opts->timeout = P9_RDMA_TIMEOUT; + + if (!params) + return 0; + + options = kstrdup(params, GFP_KERNEL); + if (!options) { + P9_DPRINTK(P9_DEBUG_ERROR, + "failed to allocate copy of option string\n"); + return -ENOMEM; + } + + while ((p = strsep(&options, ",")) != NULL) { + int token; + int r; + if (!*p) + continue; + token = match_token(p, tokens, args); + r = match_int(&args[0], &option); + if (r < 0) { + P9_DPRINTK(P9_DEBUG_ERROR, + "integer field, but no integer?\n"); + ret = r; + continue; + } + switch (token) { + case Opt_port: + opts->port = option; + break; + case Opt_sq_depth: + opts->sq_depth = option; + break; + case Opt_rq_depth: + opts->rq_depth = option; + break; + case Opt_timeout: + opts->timeout = option; + break; + default: + continue; + } + } + /* RQ must be at least as large as the SQ */ + opts->rq_depth = max(opts->rq_depth, opts->sq_depth); + kfree(options); + return 0; +} + +static int +p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) +{ + struct p9_client *c = id->context; + struct p9_trans_rdma *rdma = c->trans; + switch (event->event) { + case RDMA_CM_EVENT_ADDR_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_INIT); + rdma->state = P9_RDMA_ADDR_RESOLVED; + break; + + case RDMA_CM_EVENT_ROUTE_RESOLVED: + BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); + rdma->state = P9_RDMA_ROUTE_RESOLVED; + break; + + case RDMA_CM_EVENT_ESTABLISHED: + BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); + rdma->state = P9_RDMA_CONNECTED; + break; + + case RDMA_CM_EVENT_DISCONNECTED: + if (rdma) + rdma->state = P9_RDMA_CLOSED; + if (c) + c->status = Disconnected; + break; + + case RDMA_CM_EVENT_TIMEWAIT_EXIT: + break; + + case RDMA_CM_EVENT_ADDR_CHANGE: + case RDMA_CM_EVENT_ROUTE_ERROR: + case RDMA_CM_EVENT_DEVICE_REMOVAL: + case RDMA_CM_EVENT_MULTICAST_JOIN: + case RDMA_CM_EVENT_MULTICAST_ERROR: + case RDMA_CM_EVENT_REJECTED: + case RDMA_CM_EVENT_CONNECT_REQUEST: + case RDMA_CM_EVENT_CONNECT_RESPONSE: + case RDMA_CM_EVENT_CONNECT_ERROR: + case RDMA_CM_EVENT_ADDR_ERROR: + case RDMA_CM_EVENT_UNREACHABLE: + c->status = Disconnected; + rdma_disconnect(rdma->cm_id); + break; + default: + BUG(); + } + complete(&rdma->cm_done); + return 0; +} + +static void +handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + struct p9_req_t *req; + int err = 0; + int16_t tag; + + req = NULL; + ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, + DMA_FROM_DEVICE); + + if (status != IB_WC_SUCCESS) + goto err_out; + + err = p9_parse_header(c->rc, NULL, NULL, &tag, 1); + if (err) + goto err_out; + + req = p9_tag_lookup(client, tag); + if (!req) + goto err_out; + + req->rc = c->rc; + p9_client_cb(client, req); + + return; + + err_out: + P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", + req, err, status); + rdma->state = P9_RDMA_FLUSHING; + client->status = Disconnected; + return; +} + +static void +handle_send(struct p9_client *client, struct p9_trans_rdma *rdma, + struct p9_rdma_context *c, enum ib_wc_status status, u32 byte_len) +{ + ib_dma_unmap_single(rdma->cm_id->device, + c->busa, c->req->tc->size, + DMA_TO_DEVICE); +} + +static void qp_event_handler(struct ib_event *event, void *context) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, + context); +} + +static void cq_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct p9_client *client = cq_context; + struct p9_trans_rdma *rdma = client->trans; + int ret; + struct ib_wc wc; + + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { + struct p9_rdma_context *c = (void *) (unsigned long) wc.wr_id; + + switch (c->wc_op) { + case IB_WC_RECV: + atomic_dec(&rdma->rq_count); + handle_recv(client, rdma, c, wc.status, wc.byte_len); + break; + + case IB_WC_SEND: + handle_send(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->sq_sem); + break; + + default: + printk(KERN_ERR "9prdma: unexpected completion type, " + "c->wc_op=%d, wc.opcode=%d, status=%d\n", + c->wc_op, wc.opcode, wc.status); + break; + } + kfree(c); + } +} + +static void cq_event_handler(struct ib_event *e, void *v) +{ + P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); +} + +static void rdma_destroy_trans(struct p9_trans_rdma *rdma) +{ + if (!rdma) + return; + + if (rdma->dma_mr && !IS_ERR(rdma->dma_mr)) + ib_dereg_mr(rdma->dma_mr); + + if (rdma->qp && !IS_ERR(rdma->qp)) + ib_destroy_qp(rdma->qp); + + if (rdma->pd && !IS_ERR(rdma->pd)) + ib_dealloc_pd(rdma->pd); + + if (rdma->cq && !IS_ERR(rdma->cq)) + ib_destroy_cq(rdma->cq); + + if (rdma->cm_id && !IS_ERR(rdma->cm_id)) + rdma_destroy_id(rdma->cm_id); + + kfree(rdma); +} + +static int +post_recv(struct p9_client *client, struct p9_rdma_context *c) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_recv_wr wr, *bad_wr; + struct ib_sge sge; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->rc->sdata, client->msize, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = client->msize; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_RECV; + wr.wr_id = (unsigned long) c; + wr.sg_list = &sge; + wr.num_sge = 1; + return ib_post_recv(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; +} + +static int rdma_request(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + struct ib_send_wr wr, *bad_wr; + struct ib_sge sge; + int err = 0; + unsigned long flags; + struct p9_rdma_context *c = NULL; + struct p9_rdma_context *rpl_context = NULL; + + /* Allocate an fcall for the reply */ + rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); + if (!rpl_context) + goto err_close; + + /* + * If the request has a buffer, steal it, otherwise + * allocate a new one. Typically, requests should already + * have receive buffers allocated and just swap them around + */ + if (!req->rc) { + req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, + GFP_KERNEL); + if (req->rc) { + req->rc->sdata = (char *) req->rc + + sizeof(struct p9_fcall); + req->rc->capacity = client->msize; + } + } + rpl_context->rc = req->rc; + if (!rpl_context->rc) { + kfree(rpl_context); + goto err_close; + } + + /* + * Post a receive buffer for this request. We need to ensure + * there is a reply buffer available for every outstanding + * request. A flushed request can result in no reply for an + * outstanding request, so we must keep a count to avoid + * overflowing the RQ. + */ + if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { + err = post_recv(client, rpl_context); + if (err) { + kfree(rpl_context->rc); + kfree(rpl_context); + goto err_close; + } + } else + atomic_dec(&rdma->rq_count); + + /* remove posted receive buffer from request structure */ + req->rc = NULL; + + /* Post the request */ + c = kmalloc(sizeof *c, GFP_KERNEL); + if (!c) + goto err_close; + c->req = req; + + c->busa = ib_dma_map_single(rdma->cm_id->device, + c->req->tc->sdata, c->req->tc->size, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) + goto error; + + sge.addr = c->busa; + sge.length = c->req->tc->size; + sge.lkey = rdma->lkey; + + wr.next = NULL; + c->wc_op = IB_WC_SEND; + wr.wr_id = (unsigned long) c; + wr.opcode = IB_WR_SEND; + wr.send_flags = IB_SEND_SIGNALED; + wr.sg_list = &sge; + wr.num_sge = 1; + + if (down_interruptible(&rdma->sq_sem)) + goto error; + + return ib_post_send(rdma->qp, &wr, &bad_wr); + + error: + P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); + return -EIO; + + err_close: + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_CLOSING) { + rdma->state = P9_RDMA_CLOSING; + spin_unlock_irqrestore(&rdma->req_lock, flags); + rdma_disconnect(rdma->cm_id); + } else + spin_unlock_irqrestore(&rdma->req_lock, flags); + return err; +} + +static void rdma_close(struct p9_client *client) +{ + struct p9_trans_rdma *rdma; + + if (!client) + return; + + rdma = client->trans; + if (!rdma) + return; + + client->status = Disconnected; + rdma_disconnect(rdma->cm_id); + rdma_destroy_trans(rdma); +} + +/** + * alloc_rdma - Allocate and initialize the rdma transport structure + * @msize: MTU + * @dotu: Extension attribute + * @opts: Mount options structure + */ +static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) +{ + struct p9_trans_rdma *rdma; + + rdma = kzalloc(sizeof(struct p9_trans_rdma), GFP_KERNEL); + if (!rdma) + return NULL; + + rdma->sq_depth = opts->sq_depth; + rdma->rq_depth = opts->rq_depth; + rdma->timeout = opts->timeout; + spin_lock_init(&rdma->req_lock); + init_completion(&rdma->cm_done); + sema_init(&rdma->sq_sem, rdma->sq_depth); + atomic_set(&rdma->rq_count, 0); + + return rdma; +} + +/* its not clear to me we can do anything after send has been posted */ +static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + +/** + * trans_create_rdma - Transport method for creating atransport instance + * @client: client instance + * @addr: IP address string + * @args: Mount options string + */ +static int +rdma_create_trans(struct p9_client *client, const char *addr, char *args) +{ + int err; + struct p9_rdma_opts opts; + struct p9_trans_rdma *rdma; + struct rdma_conn_param conn_param; + struct ib_qp_init_attr qp_attr; + struct ib_device_attr devattr; + + /* Parse the transport specific mount options */ + err = parse_opts(args, &opts); + if (err < 0) + return err; + + /* Create and initialize the RDMA transport structure */ + rdma = alloc_rdma(&opts); + if (!rdma) + return -ENOMEM; + + /* Create the RDMA CM ID */ + rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); + if (IS_ERR(rdma->cm_id)) + goto error; + + /* Associate the client with the transport */ + client->trans = rdma; + + /* Resolve the server's address */ + rdma->addr.sin_family = AF_INET; + rdma->addr.sin_addr.s_addr = in_aton(addr); + rdma->addr.sin_port = htons(opts.port); + err = rdma_resolve_addr(rdma->cm_id, NULL, + (struct sockaddr *)&rdma->addr, + rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ADDR_RESOLVED)) + goto error; + + /* Resolve the route to the server */ + err = rdma_resolve_route(rdma->cm_id, rdma->timeout); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_ROUTE_RESOLVED)) + goto error; + + /* Query the device attributes */ + err = ib_query_device(rdma->cm_id->device, &devattr); + if (err) + goto error; + + /* Create the Completion Queue */ + rdma->cq = ib_create_cq(rdma->cm_id->device, cq_comp_handler, + cq_event_handler, client, + opts.sq_depth + opts.rq_depth + 1, 0); + if (IS_ERR(rdma->cq)) + goto error; + ib_req_notify_cq(rdma->cq, IB_CQ_NEXT_COMP); + + /* Create the Protection Domain */ + rdma->pd = ib_alloc_pd(rdma->cm_id->device); + if (IS_ERR(rdma->pd)) + goto error; + + /* Cache the DMA lkey in the transport */ + rdma->dma_mr = NULL; + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) + rdma->lkey = rdma->cm_id->device->local_dma_lkey; + else { + rdma->dma_mr = ib_get_dma_mr(rdma->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(rdma->dma_mr)) + goto error; + rdma->lkey = rdma->dma_mr->lkey; + } + + /* Create the Queue Pair */ + memset(&qp_attr, 0, sizeof qp_attr); + qp_attr.event_handler = qp_event_handler; + qp_attr.qp_context = client; + qp_attr.cap.max_send_wr = opts.sq_depth; + qp_attr.cap.max_recv_wr = opts.rq_depth; + qp_attr.cap.max_send_sge = P9_RDMA_SEND_SGE; + qp_attr.cap.max_recv_sge = P9_RDMA_RECV_SGE; + qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + qp_attr.qp_type = IB_QPT_RC; + qp_attr.send_cq = rdma->cq; + qp_attr.recv_cq = rdma->cq; + err = rdma_create_qp(rdma->cm_id, rdma->pd, &qp_attr); + if (err) + goto error; + rdma->qp = rdma->cm_id->qp; + + /* Request a connection */ + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.private_data = NULL; + conn_param.private_data_len = 0; + conn_param.responder_resources = P9_RDMA_IRD; + conn_param.initiator_depth = P9_RDMA_ORD; + err = rdma_connect(rdma->cm_id, &conn_param); + if (err) + goto error; + err = wait_for_completion_interruptible(&rdma->cm_done); + if (err || (rdma->state != P9_RDMA_CONNECTED)) + goto error; + + client->status = Connected; + + return 0; + +error: + rdma_destroy_trans(rdma); + return -ENOTCONN; +} + +static struct p9_trans_module p9_rdma_trans = { + .name = "rdma", + .maxsize = P9_RDMA_MAXSIZE, + .def = 0, + .owner = THIS_MODULE, + .create = rdma_create_trans, + .close = rdma_close, + .request = rdma_request, + .cancel = rdma_cancel, +}; + +/** + * p9_trans_rdma_init - Register the 9P RDMA transport driver + */ +static int __init p9_trans_rdma_init(void) +{ + v9fs_register_trans(&p9_rdma_trans); + return 0; +} + +static void __exit p9_trans_rdma_exit(void) +{ + v9fs_unregister_trans(&p9_rdma_trans); +} + +module_init(p9_trans_rdma_init); +module_exit(p9_trans_rdma_exit); + +MODULE_AUTHOR("Tom Tucker <tom@opengridcomputing.com>"); +MODULE_DESCRIPTION("RDMA Transport for 9P"); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 94912e077a55..2d7781ec663b 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -1,12 +1,10 @@ /* - * The Guest 9p transport driver + * The Virtio 9p transport driver * * This is a block based transport driver based on the lguest block driver * code. * - */ -/* - * Copyright (C) 2007 Eric Van Hensbergen, IBM Corporation + * Copyright (C) 2007, 2008 Eric Van Hensbergen, IBM Corporation * * Based on virtio console driver * Copyright (C) 2006, 2007 Rusty Russell, IBM Corporation @@ -41,6 +39,7 @@ #include <linux/file.h> #include <net/9p/9p.h> #include <linux/parser.h> +#include <net/9p/client.h> #include <net/9p/transport.h> #include <linux/scatterlist.h> #include <linux/virtio.h> @@ -53,50 +52,6 @@ static DEFINE_MUTEX(virtio_9p_lock); /* global which tracks highest initialized channel */ static int chan_index; -#define P9_INIT_MAXTAG 16 - - -/** - * enum p9_req_status_t - virtio request status - * @REQ_STATUS_IDLE: request slot unused - * @REQ_STATUS_SENT: request sent to server - * @REQ_STATUS_RCVD: response received from server - * @REQ_STATUS_FLSH: request has been flushed - * - * The @REQ_STATUS_IDLE state is used to mark a request slot as unused - * but use is actually tracked by the idpool structure which handles tag - * id allocation. - * - */ - -enum p9_req_status_t { - REQ_STATUS_IDLE, - REQ_STATUS_SENT, - REQ_STATUS_RCVD, - REQ_STATUS_FLSH, -}; - -/** - * struct p9_req_t - virtio request slots - * @status: status of this request slot - * @wq: wait_queue for the client to block on for this request - * - * The virtio transport uses an array to track outstanding requests - * instead of a list. While this may incurr overhead during initial - * allocation or expansion, it makes request lookup much easier as the - * tag id is a index into an array. (We use tag+1 so that we can accomodate - * the -1 tag for the T_VERSION request). - * This also has the nice effect of only having to allocate wait_queues - * once, instead of constantly allocating and freeing them. Its possible - * other resources could benefit from this scheme as well. - * - */ - -struct p9_req_t { - int status; - wait_queue_head_t *wq; -}; - /** * struct virtio_chan - per-instance transport information * @initialized: whether the channel is initialized @@ -121,67 +76,14 @@ static struct virtio_chan { spinlock_t lock; + struct p9_client *client; struct virtio_device *vdev; struct virtqueue *vq; - struct p9_idpool *tagpool; - struct p9_req_t *reqs; - int max_tag; - /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; } channels[MAX_9P_CHAN]; -/** - * p9_lookup_tag - Lookup requests by tag - * @c: virtio channel to lookup tag within - * @tag: numeric id for transaction - * - * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction - * ids which did not previously have a slot. - * - * Bugs: there is currently no upper limit on request slots set - * here, but that should be constrained by the id accounting. - */ - -static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag) -{ - /* This looks up the original request by tag so we know which - * buffer to read the data into */ - tag++; - - while (tag >= c->max_tag) { - int old_max = c->max_tag; - int count; - - if (c->max_tag) - c->max_tag *= 2; - else - c->max_tag = P9_INIT_MAXTAG; - - c->reqs = krealloc(c->reqs, sizeof(struct p9_req_t)*c->max_tag, - GFP_ATOMIC); - if (!c->reqs) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - for (count = old_max; count < c->max_tag; count++) { - c->reqs[count].status = REQ_STATUS_IDLE; - c->reqs[count].wq = kmalloc(sizeof(wait_queue_head_t), - GFP_ATOMIC); - if (!c->reqs[count].wq) { - printk(KERN_ERR "Couldn't grow tag array\n"); - BUG(); - } - init_waitqueue_head(c->reqs[count].wq); - } - } - - return &c->reqs[tag]; -} - - /* How many bytes left in this page. */ static unsigned int rest_of_page(void *data) { @@ -197,25 +99,13 @@ static unsigned int rest_of_page(void *data) * */ -static void p9_virtio_close(struct p9_trans *trans) +static void p9_virtio_close(struct p9_client *client) { - struct virtio_chan *chan = trans->priv; - int count; - unsigned long flags; - - spin_lock_irqsave(&chan->lock, flags); - p9_idpool_destroy(chan->tagpool); - for (count = 0; count < chan->max_tag; count++) - kfree(chan->reqs[count].wq); - kfree(chan->reqs); - chan->max_tag = 0; - spin_unlock_irqrestore(&chan->lock, flags); + struct virtio_chan *chan = client->trans; mutex_lock(&virtio_9p_lock); chan->inuse = false; mutex_unlock(&virtio_9p_lock); - - kfree(trans); } /** @@ -236,17 +126,16 @@ static void req_done(struct virtqueue *vq) struct virtio_chan *chan = vq->vdev->priv; struct p9_fcall *rc; unsigned int len; - unsigned long flags; struct p9_req_t *req; - spin_lock_irqsave(&chan->lock, flags); + P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); + while ((rc = chan->vq->vq_ops->get_buf(chan->vq, &len)) != NULL) { - req = p9_lookup_tag(chan, rc->tag); - req->status = REQ_STATUS_RCVD; - wake_up(req->wq); + P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); + P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); + req = p9_tag_lookup(chan->client, rc->tag); + p9_client_cb(chan->client, req); } - /* In case queue is stopped waiting for more buffers. */ - spin_unlock_irqrestore(&chan->lock, flags); } /** @@ -283,8 +172,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, return index-start; } +/* We don't currently allow canceling of virtio requests */ +static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) +{ + return 1; +} + /** - * p9_virtio_rpc - issue a request and wait for a response + * p9_virtio_request - issue a request * @t: transport state * @tc: &p9_fcall request to transmit * @rc: &p9_fcall to put reponse into @@ -292,44 +187,22 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, */ static int -p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) +p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int in, out; - int n, err, size; - struct virtio_chan *chan = t->priv; - char *rdata; - struct p9_req_t *req; - unsigned long flags; - - if (*rc == NULL) { - *rc = kmalloc(sizeof(struct p9_fcall) + t->msize, GFP_KERNEL); - if (!*rc) - return -ENOMEM; - } - - rdata = (char *)*rc+sizeof(struct p9_fcall); - - n = P9_NOTAG; - if (tc->id != P9_TVERSION) { - n = p9_idpool_get(chan->tagpool); - if (n < 0) - return -ENOMEM; - } - - spin_lock_irqsave(&chan->lock, flags); - req = p9_lookup_tag(chan, n); - spin_unlock_irqrestore(&chan->lock, flags); + struct virtio_chan *chan = client->trans; + char *rdata = (char *)req->rc+sizeof(struct p9_fcall); - p9_set_tag(tc, n); + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); - P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc tag %d\n", n); - - out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, tc->sdata, tc->size); - in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, t->msize); + out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, + req->tc->size); + in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, + client->msize); req->status = REQ_STATUS_SENT; - if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, tc)) { + if (chan->vq->vq_ops->add_buf(chan->vq, chan->sg, out, in, req->tc)) { P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio rpc add_buf returned failure"); return -EIO; @@ -337,31 +210,7 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc) chan->vq->vq_ops->kick(chan->vq); - wait_event(*req->wq, req->status == REQ_STATUS_RCVD); - - size = le32_to_cpu(*(__le32 *) rdata); - - err = p9_deserialize_fcall(rdata, size, *rc, t->extended); - if (err < 0) { - P9_DPRINTK(P9_DEBUG_TRANS, - "9p debug: virtio rpc deserialize returned %d\n", err); - return err; - } - -#ifdef CONFIG_NET_9P_DEBUG - if ((p9_debug_level&P9_DEBUG_FCALL) == P9_DEBUG_FCALL) { - char buf[150]; - - p9_printfcall(buf, sizeof(buf), *rc, t->extended); - printk(KERN_NOTICE ">>> %p %s\n", t, buf); - } -#endif - - if (n != P9_NOTAG && p9_idpool_check(n, chan->tagpool)) - p9_idpool_put(n, chan->tagpool); - - req->status = REQ_STATUS_IDLE; - + P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); return 0; } @@ -422,10 +271,9 @@ fail: /** * p9_virtio_create - allocate a new virtio channel + * @client: client instance invoking this transport * @devname: string identifying the channel to connect to (unused) * @args: args passed from sys_mount() for per-transport options (unused) - * @msize: requested maximum packet size - * @extended: 9p2000.u enabled flag * * This sets up a transport channel for 9p communication. Right now * we only match the first available channel, but eventually we couldlook up @@ -441,11 +289,9 @@ fail: * */ -static struct p9_trans * -p9_virtio_create(const char *devname, char *args, int msize, - unsigned char extended) +static int +p9_virtio_create(struct p9_client *client, const char *devname, char *args) { - struct p9_trans *trans; struct virtio_chan *chan = channels; int index = 0; @@ -463,30 +309,13 @@ p9_virtio_create(const char *devname, char *args, int msize, if (index >= MAX_9P_CHAN) { printk(KERN_ERR "9p: no channels available\n"); - return ERR_PTR(-ENODEV); + return -ENODEV; } - chan->tagpool = p9_idpool_create(); - if (IS_ERR(chan->tagpool)) { - printk(KERN_ERR "9p: couldn't allocate tagpool\n"); - return ERR_PTR(-ENOMEM); - } - p9_idpool_get(chan->tagpool); /* reserve tag 0 */ - chan->max_tag = 0; - chan->reqs = NULL; - - trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL); - if (!trans) { - printk(KERN_ERR "9p: couldn't allocate transport\n"); - return ERR_PTR(-ENOMEM); - } - trans->extended = extended; - trans->msize = msize; - trans->close = p9_virtio_close; - trans->rpc = p9_virtio_rpc; - trans->priv = chan; + client->trans = (void *)chan; + chan->client = client; - return trans; + return 0; } /** @@ -526,6 +355,9 @@ static struct virtio_driver p9_virtio_drv = { static struct p9_trans_module p9_virtio_trans = { .name = "virtio", .create = p9_virtio_create, + .close = p9_virtio_close, + .request = p9_virtio_request, + .cancel = p9_virtio_cancel, .maxsize = PAGE_SIZE*16, .def = 0, .owner = THIS_MODULE, diff --git a/net/9p/util.c b/net/9p/util.c index 958fc58cd1ff..dc4ec05ad93d 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -105,6 +105,7 @@ retry: else if (error) return -1; + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); return i; } EXPORT_SYMBOL(p9_idpool_get); @@ -121,6 +122,9 @@ EXPORT_SYMBOL(p9_idpool_get); void p9_idpool_put(int id, struct p9_idpool *p) { unsigned long flags; + + P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); + spin_lock_irqsave(&p->lock, flags); idr_remove(&p->pool, id); spin_unlock_irqrestore(&p->lock, flags); diff --git a/net/Kconfig b/net/Kconfig index 7612cc8c337c..d789d79551ae 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -180,6 +180,7 @@ source "net/tipc/Kconfig" source "net/atm/Kconfig" source "net/802/Kconfig" source "net/bridge/Kconfig" +source "net/dsa/Kconfig" source "net/8021q/Kconfig" source "net/decnet/Kconfig" source "net/llc/Kconfig" @@ -232,18 +233,23 @@ source "net/can/Kconfig" source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" +source "net/phonet/Kconfig" config FIB_RULES bool -menu "Wireless" +menuconfig WIRELESS + bool "Wireless" depends on !S390 + default y + +if WIRELESS source "net/wireless/Kconfig" source "net/mac80211/Kconfig" source "net/ieee80211/Kconfig" -endmenu +endif # WIRELESS source "net/rfkill/Kconfig" source "net/9p/Kconfig" diff --git a/net/Makefile b/net/Makefile index 4f43e7f874f3..27d1f10dc0e0 100644 --- a/net/Makefile +++ b/net/Makefile @@ -26,6 +26,7 @@ obj-$(CONFIG_PACKET) += packet/ obj-$(CONFIG_NET_KEY) += key/ obj-$(CONFIG_NET_SCHED) += sched/ obj-$(CONFIG_BRIDGE) += bridge/ +obj-$(CONFIG_NET_DSA) += dsa/ obj-$(CONFIG_IPX) += ipx/ obj-$(CONFIG_ATALK) += appletalk/ obj-$(CONFIG_WAN_ROUTER) += wanrouter/ @@ -42,6 +43,7 @@ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_DECNET) += decnet/ obj-$(CONFIG_ECONET) += econet/ +obj-$(CONFIG_PHONET) += phonet/ ifneq ($(CONFIG_VLAN_8021Q),) obj-y += 8021q/ endif diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index 0c850427a85b..d3134e7e6ee8 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -2,7 +2,7 @@ * DDP: An implementation of the AppleTalk DDP protocol for * Ethernet 'ELAP'. * - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * With more than a little assistance from * @@ -1934,6 +1934,6 @@ static void __exit atalk_exit(void) module_exit(atalk_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Alan Cox <Alan.Cox@linux.org>"); +MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); MODULE_DESCRIPTION("AppleTalk 0.20\n"); MODULE_ALIAS_NETPROTO(PF_APPLETALK); diff --git a/net/atm/br2684.c b/net/atm/br2684.c index 8d9a6f158880..280de481edc7 100644 --- a/net/atm/br2684.c +++ b/net/atm/br2684.c @@ -375,11 +375,11 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) if (memcmp (skb->data + 6, ethertype_ipv6, sizeof(ethertype_ipv6)) == 0) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else if (memcmp (skb->data + 6, ethertype_ipv4, sizeof(ethertype_ipv4)) == 0) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else goto error; skb_pull(skb, sizeof(llc_oui_ipv4)); @@ -404,9 +404,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb) skb_reset_network_header(skb); iph = ip_hdr(skb); if (iph->version == 4) - skb->protocol = __constant_htons(ETH_P_IP); + skb->protocol = htons(ETH_P_IP); else if (iph->version == 6) - skb->protocol = __constant_htons(ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); else goto error; skb->pkt_type = PACKET_HOST; diff --git a/net/atm/lec.c b/net/atm/lec.c index 5799fb52365a..8f701cde5945 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1931,7 +1931,6 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, switch (priv->lane_version) { case 1: return priv->mcast_vcc; - break; case 2: /* LANE2 wants arp for multicast addresses */ if (!compare_ether_addr(mac_to_find, bus_mac)) return priv->mcast_vcc; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 01c83e2a4c19..28c71574a781 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -317,6 +317,9 @@ void ax25_destroy_socket(ax25_cb *ax25) /* Queue the unaccepted socket for death */ sock_orphan(skb->sk); + /* 9A4GL: hack to release unaccepted sockets */ + skb->sk->sk_state = TCP_LISTEN; + ax25_start_heartbeat(sax25); sax25->state = AX25_STATE_0; } diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index cdc7e751ef36..96e4b9273250 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -39,9 +39,11 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: - if (!sk || - sock_flag(sk, SOCK_DESTROY) || - sock_flag(sk, SOCK_DEAD)) { + /* Magic here: If we listen() and a new link dies before it + is accepted() it isn't 'dead' so doesn't get removed. */ + if (!sk || sock_flag(sk, SOCK_DESTROY) || + (sk->sk_state == TCP_LISTEN && + sock_flag(sk, SOCK_DEAD))) { if (sk) { sock_hold(sk); ax25_destroy_socket(ax25); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index f6348e078aa4..8f9431a12c6f 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -37,10 +37,7 @@ #include <linux/poll.h> #include <net/sock.h> #include <asm/ioctls.h> - -#if defined(CONFIG_KMOD) #include <linux/kmod.h> -#endif #include <net/bluetooth/bluetooth.h> @@ -145,11 +142,8 @@ static int bt_sock_create(struct net *net, struct socket *sock, int proto) if (proto < 0 || proto >= BT_MAX_PROTO) return -EINVAL; -#if defined(CONFIG_KMOD) - if (!bt_proto[proto]) { + if (!bt_proto[proto]) request_module("bt-proto-%d", proto); - } -#endif err = -EPROTONOSUPPORT; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 96434d774c84..acdeab3d9807 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -578,7 +578,7 @@ static int hidp_session(void *arg) if (session->hid) { if (session->hid->claimed & HID_CLAIMED_INPUT) hidinput_disconnect(session->hid); - hid_free_device(session->hid); + hid_destroy_device(session->hid); } /* Wakeup user-space polling for socket errors */ @@ -623,9 +623,15 @@ static struct device *hidp_get_device(struct hidp_session *session) static int hidp_setup_input(struct hidp_session *session, struct hidp_connadd_req *req) { - struct input_dev *input = session->input; + struct input_dev *input; int i; + input = input_allocate_device(); + if (!input) + return -ENOMEM; + + session->input = input; + input_set_drvdata(input, session); input->name = "Bluetooth HID Boot Protocol Device"; @@ -677,67 +683,114 @@ static void hidp_close(struct hid_device *hid) { } -static const struct { - __u16 idVendor; - __u16 idProduct; - unsigned quirks; -} hidp_blacklist[] = { - /* Apple wireless Mighty Mouse */ - { 0x05ac, 0x030c, HID_QUIRK_MIGHTYMOUSE | HID_QUIRK_INVERT_HWHEEL }, +static int hidp_parse(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hidp_connadd_req *req = session->req; + unsigned char *buf; + int ret; - { } /* Terminating entry */ -}; + buf = kmalloc(req->rd_size, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, req->rd_data, req->rd_size)) { + kfree(buf); + return -EFAULT; + } + + ret = hid_parse_report(session->hid, buf, req->rd_size); + + kfree(buf); + + if (ret) + return ret; + + session->req = NULL; + + return 0; +} + +static int hidp_start(struct hid_device *hid) +{ + struct hidp_session *session = hid->driver_data; + struct hid_report *report; -static void hidp_setup_quirks(struct hid_device *hid) + list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT]. + report_list, list) + hidp_send_report(session, report); + + list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT]. + report_list, list) + hidp_send_report(session, report); + + return 0; +} + +static void hidp_stop(struct hid_device *hid) { - unsigned int n; + struct hidp_session *session = hid->driver_data; + + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); - for (n = 0; hidp_blacklist[n].idVendor; n++) - if (hidp_blacklist[n].idVendor == le16_to_cpu(hid->vendor) && - hidp_blacklist[n].idProduct == le16_to_cpu(hid->product)) - hid->quirks = hidp_blacklist[n].quirks; + if (hid->claimed & HID_CLAIMED_INPUT) + hidinput_disconnect(hid); + hid->claimed = 0; } -static void hidp_setup_hid(struct hidp_session *session, +static struct hid_ll_driver hidp_hid_driver = { + .parse = hidp_parse, + .start = hidp_start, + .stop = hidp_stop, + .open = hidp_open, + .close = hidp_close, + .hidinput_input_event = hidp_hidinput_event, +}; + +static int hidp_setup_hid(struct hidp_session *session, struct hidp_connadd_req *req) { - struct hid_device *hid = session->hid; - struct hid_report *report; + struct hid_device *hid; bdaddr_t src, dst; + int ret; - baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); - baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); + hid = hid_allocate_device(); + if (IS_ERR(hid)) { + ret = PTR_ERR(session->hid); + goto err; + } + session->hid = hid; + session->req = req; hid->driver_data = session; - hid->country = req->country; + baswap(&src, &bt_sk(session->ctrl_sock->sk)->src); + baswap(&dst, &bt_sk(session->ctrl_sock->sk)->dst); hid->bus = BUS_BLUETOOTH; hid->vendor = req->vendor; hid->product = req->product; hid->version = req->version; + hid->country = req->country; strncpy(hid->name, req->name, 128); strncpy(hid->phys, batostr(&src), 64); strncpy(hid->uniq, batostr(&dst), 64); - hid->dev = hidp_get_device(session); - - hid->hid_open = hidp_open; - hid->hid_close = hidp_close; - - hid->hidinput_input_event = hidp_hidinput_event; + hid->dev.parent = hidp_get_device(session); + hid->ll_driver = &hidp_hid_driver; - hidp_setup_quirks(hid); + ret = hid_add_device(hid); + if (ret) + goto err_hid; - list_for_each_entry(report, &hid->report_enum[HID_INPUT_REPORT].report_list, list) - hidp_send_report(session, report); - - list_for_each_entry(report, &hid->report_enum[HID_FEATURE_REPORT].report_list, list) - hidp_send_report(session, report); - - if (hidinput_connect(hid) == 0) - hid->claimed |= HID_CLAIMED_INPUT; + return 0; +err_hid: + hid_destroy_device(hid); + session->hid = NULL; +err: + return ret; } int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) @@ -757,38 +810,6 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, BT_DBG("rd_data %p rd_size %d", req->rd_data, req->rd_size); - if (req->rd_size > 0) { - unsigned char *buf = kmalloc(req->rd_size, GFP_KERNEL); - - if (!buf) { - kfree(session); - return -ENOMEM; - } - - if (copy_from_user(buf, req->rd_data, req->rd_size)) { - kfree(buf); - kfree(session); - return -EFAULT; - } - - session->hid = hid_parse_report(buf, req->rd_size); - - kfree(buf); - - if (!session->hid) { - kfree(session); - return -EINVAL; - } - } - - if (!session->hid) { - session->input = input_allocate_device(); - if (!session->input) { - kfree(session); - return -ENOMEM; - } - } - down_write(&hidp_session_sem); s = __hidp_get_session(&bt_sk(ctrl_sock->sk)->dst); @@ -816,15 +837,18 @@ int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, session->flags = req->flags & (1 << HIDP_BLUETOOTH_VENDOR_ID); session->idle_to = req->idle_to; - if (session->input) { + if (req->rd_size > 0) { + err = hidp_setup_hid(session, req); + if (err && err != -ENODEV) + goto err_skb; + } + + if (!session->hid) { err = hidp_setup_input(session, req); if (err < 0) - goto failed; + goto err_skb; } - if (session->hid) - hidp_setup_hid(session, req); - __hidp_link_session(session); hidp_set_timer(session); @@ -850,17 +874,16 @@ unlink: __hidp_unlink_session(session); - if (session->input) { + if (session->input) input_unregister_device(session->input); - session->input = NULL; /* don't try to free it here */ - } - + if (session->hid) + hid_destroy_device(session->hid); +err_skb: + skb_queue_purge(&session->ctrl_transmit); + skb_queue_purge(&session->intr_transmit); failed: up_write(&hidp_session_sem); - if (session->hid) - hid_free_device(session->hid); - input_free_device(session->input); kfree(session); return err; @@ -950,18 +973,43 @@ int hidp_get_conninfo(struct hidp_conninfo *ci) return err; } +static const struct hid_device_id hidp_table[] = { + { HID_BLUETOOTH_DEVICE(HID_ANY_ID, HID_ANY_ID) }, + { } +}; + +static struct hid_driver hidp_driver = { + .name = "generic-bluetooth", + .id_table = hidp_table, +}; + static int __init hidp_init(void) { + int ret; + l2cap_load(); BT_INFO("HIDP (Human Interface Emulation) ver %s", VERSION); - return hidp_init_sockets(); + ret = hid_register_driver(&hidp_driver); + if (ret) + goto err; + + ret = hidp_init_sockets(); + if (ret) + goto err_drv; + + return 0; +err_drv: + hid_unregister_driver(&hidp_driver); +err: + return ret; } static void __exit hidp_exit(void) { hidp_cleanup_sockets(); + hid_unregister_driver(&hidp_driver); } module_init(hidp_init); diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h index 343fb0566b3e..e503c89057ad 100644 --- a/net/bluetooth/hidp/hidp.h +++ b/net/bluetooth/hidp/hidp.h @@ -151,6 +151,8 @@ struct hidp_session { struct sk_buff_head ctrl_transmit; struct sk_buff_head intr_transmit; + + struct hidp_connadd_req *req; }; static inline void hidp_schedule(struct hidp_session *session) diff --git a/net/bridge/br.c b/net/bridge/br.c index 573acdf6f9ff..4d2c1f1cb524 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -28,6 +28,10 @@ static const struct stp_proto br_stp_proto = { .rcv = br_stp_rcv, }; +static struct pernet_operations br_net_ops = { + .exit = br_net_exit, +}; + static int __init br_init(void) { int err; @@ -42,18 +46,22 @@ static int __init br_init(void) if (err) goto err_out; - err = br_netfilter_init(); + err = register_pernet_subsys(&br_net_ops); if (err) goto err_out1; - err = register_netdevice_notifier(&br_device_notifier); + err = br_netfilter_init(); if (err) goto err_out2; - err = br_netlink_init(); + err = register_netdevice_notifier(&br_device_notifier); if (err) goto err_out3; + err = br_netlink_init(); + if (err) + goto err_out4; + brioctl_set(br_ioctl_deviceless_stub); br_handle_frame_hook = br_handle_frame; @@ -61,10 +69,12 @@ static int __init br_init(void) br_fdb_put_hook = br_fdb_put; return 0; -err_out3: +err_out4: unregister_netdevice_notifier(&br_device_notifier); -err_out2: +err_out3: br_netfilter_fini(); +err_out2: + unregister_pernet_subsys(&br_net_ops); err_out1: br_fdb_fini(); err_out: @@ -80,7 +90,7 @@ static void __exit br_deinit(void) unregister_netdevice_notifier(&br_device_notifier); brioctl_set(NULL); - br_cleanup_bridges(); + unregister_pernet_subsys(&br_net_ops); synchronize_net(); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 4f52c3d50ebe..6c023f0f8252 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -178,5 +178,6 @@ void br_dev_setup(struct net_device *dev) dev->priv_flags = IFF_EBRIDGE; dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA | - NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX; + NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX | + NETIF_F_NETNS_LOCAL | NETIF_F_GSO; } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 63c18aacde8c..0a09ccf68c1c 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -168,7 +168,7 @@ static void del_br(struct net_bridge *br) unregister_netdevice(br->dev); } -static struct net_device *new_bridge_dev(const char *name) +static struct net_device *new_bridge_dev(struct net *net, const char *name) { struct net_bridge *br; struct net_device *dev; @@ -178,6 +178,7 @@ static struct net_device *new_bridge_dev(const char *name) if (!dev) return NULL; + dev_net_set(dev, net); br = netdev_priv(dev); br->dev = dev; @@ -262,12 +263,12 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, return p; } -int br_add_bridge(const char *name) +int br_add_bridge(struct net *net, const char *name) { struct net_device *dev; int ret; - dev = new_bridge_dev(name); + dev = new_bridge_dev(net, name); if (!dev) return -ENOMEM; @@ -294,13 +295,13 @@ out_free: goto out; } -int br_del_bridge(const char *name) +int br_del_bridge(struct net *net, const char *name) { struct net_device *dev; int ret = 0; rtnl_lock(); - dev = __dev_get_by_name(&init_net, name); + dev = __dev_get_by_name(net, name); if (dev == NULL) ret = -ENXIO; /* Could not find device */ @@ -346,15 +347,21 @@ int br_min_mtu(const struct net_bridge *br) void br_features_recompute(struct net_bridge *br) { struct net_bridge_port *p; - unsigned long features; + unsigned long features, mask; - features = br->feature_mask; + features = mask = br->feature_mask; + if (list_empty(&br->port_list)) + goto done; + + features &= ~NETIF_F_ONE_FOR_ALL; list_for_each_entry(p, &br->port_list, list) { - features = netdev_compute_features(features, p->dev->features); + features = netdev_increment_features(features, + p->dev->features, mask); } - br->dev->features = features; +done: + br->dev->features = netdev_fix_features(features, NULL); } /* called with RTNL */ @@ -445,13 +452,13 @@ int br_del_if(struct net_bridge *br, struct net_device *dev) return 0; } -void __exit br_cleanup_bridges(void) +void br_net_exit(struct net *net) { struct net_device *dev; rtnl_lock(); restart: - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (dev->priv_flags & IFF_EBRIDGE) { del_br(dev->priv); goto restart; diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 5bbf07362172..6a6433daaf27 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -21,12 +21,12 @@ #include "br_private.h" /* called with RTNL */ -static int get_bridge_ifindices(int *indices, int num) +static int get_bridge_ifindices(struct net *net, int *indices, int num) { struct net_device *dev; int i = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { if (i >= num) break; if (dev->priv_flags & IFF_EBRIDGE) @@ -89,7 +89,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) if (!capable(CAP_NET_ADMIN)) return -EPERM; - dev = dev_get_by_index(&init_net, ifindex); + dev = dev_get_by_index(dev_net(br->dev), ifindex); if (dev == NULL) return -EINVAL; @@ -315,7 +315,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) return -EOPNOTSUPP; } -static int old_deviceless(void __user *uarg) +static int old_deviceless(struct net *net, void __user *uarg) { unsigned long args[3]; @@ -337,7 +337,7 @@ static int old_deviceless(void __user *uarg) if (indices == NULL) return -ENOMEM; - args[2] = get_bridge_ifindices(indices, args[2]); + args[2] = get_bridge_ifindices(net, indices, args[2]); ret = copy_to_user((void __user *)args[1], indices, args[2]*sizeof(int)) ? -EFAULT : args[2]; @@ -360,9 +360,9 @@ static int old_deviceless(void __user *uarg) buf[IFNAMSIZ-1] = 0; if (args[0] == BRCTL_ADD_BRIDGE) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } @@ -374,7 +374,7 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar switch (cmd) { case SIOCGIFBR: case SIOCSIFBR: - return old_deviceless(uarg); + return old_deviceless(net, uarg); case SIOCBRADDBR: case SIOCBRDELBR: @@ -389,9 +389,9 @@ int br_ioctl_deviceless_stub(struct net *net, unsigned int cmd, void __user *uar buf[IFNAMSIZ-1] = 0; if (cmd == SIOCBRADDBR) - return br_add_bridge(buf); + return br_add_bridge(net, buf); - return br_del_bridge(buf); + return br_del_bridge(net, buf); } } return -EOPNOTSUPP; diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 6a9a6cd74b1e..fa5cda4e552a 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -657,7 +657,7 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge; struct net_device *parent; - int pf; + u_int8_t pf; if (!skb->nf_bridge) return NF_ACCEPT; @@ -719,7 +719,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb, return NF_ACCEPT; } *d = (struct net_device *)in; - NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, + NF_HOOK(NFPROTO_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, (struct net_device *)out, br_nf_forward_finish); return NF_STOLEN; @@ -791,7 +791,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb, { struct nf_bridge_info *nf_bridge = skb->nf_bridge; struct net_device *realoutdev = bridge_parent(skb->dev); - int pf; + u_int8_t pf; #ifdef CONFIG_NETFILTER_DEBUG /* Be very paranoid. This probably won't happen anymore, but let's diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f155e6ce8a21..ba7be195803c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -82,6 +82,7 @@ nla_put_failure: */ void br_ifinfo_notify(int event, struct net_bridge_port *port) { + struct net *net = dev_net(port->dev); struct sk_buff *skb; int err = -ENOBUFS; @@ -97,10 +98,10 @@ void br_ifinfo_notify(int event, struct net_bridge_port *port) kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, &init_net,0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); errout: if (err < 0) - rtnl_set_sk_err(&init_net, RTNLGRP_LINK, err); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); } /* @@ -112,11 +113,8 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) struct net_device *dev; int idx; - if (net != &init_net) - return 0; - idx = 0; - for_each_netdev(&init_net, dev) { + for_each_netdev(net, dev) { /* not a bridge port */ if (dev->br_port == NULL || idx < cb->args[0]) goto skip; @@ -147,9 +145,6 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_bridge_port *p; u8 new_state; - if (net != &init_net) - return -EINVAL; - if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -165,7 +160,7 @@ static int br_rtm_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (new_state > BR_STATE_BLOCKING) return -EINVAL; - dev = __dev_get_by_index(&init_net, ifm->ifi_index); + dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) return -ENODEV; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index 76340bdd052e..763a3ec292e5 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -35,9 +35,6 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v struct net_bridge_port *p = dev->br_port; struct net_bridge *br; - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; - /* not a port of a bridge */ if (p == NULL) return NOTIFY_DONE; diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c3dc18ddc043..b6c3b71974dc 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -178,9 +178,9 @@ extern void br_flood_forward(struct net_bridge *br, struct sk_buff *skb); /* br_if.c */ extern void br_port_carrier_check(struct net_bridge_port *p); -extern int br_add_bridge(const char *name); -extern int br_del_bridge(const char *name); -extern void br_cleanup_bridges(void); +extern int br_add_bridge(struct net *net, const char *name); +extern int br_del_bridge(struct net *net, const char *name); +extern void br_net_exit(struct net *net); extern int br_add_if(struct net_bridge *br, struct net_device *dev); extern int br_del_if(struct net_bridge *br, diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index 8b200f96f722..81ae40b3f655 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -140,9 +140,6 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, struct net_bridge *br; const unsigned char *buf; - if (!net_eq(dev_net(dev), &init_net)) - goto err; - if (!p) goto err; diff --git a/net/bridge/netfilter/Kconfig b/net/bridge/netfilter/Kconfig index 909479794999..ba6f73eb06c6 100644 --- a/net/bridge/netfilter/Kconfig +++ b/net/bridge/netfilter/Kconfig @@ -2,21 +2,22 @@ # Bridge netfilter configuration # -menu "Bridge: Netfilter Configuration" - depends on BRIDGE && BRIDGE_NETFILTER - -config BRIDGE_NF_EBTABLES +menuconfig BRIDGE_NF_EBTABLES tristate "Ethernet Bridge tables (ebtables) support" + depends on BRIDGE && BRIDGE_NETFILTER + select NETFILTER_XTABLES help ebtables is a general, extensible frame/packet identification framework. Say 'Y' or 'M' here if you want to do Ethernet filtering/NAT/brouting on the Ethernet bridge. + +if BRIDGE_NF_EBTABLES + # # tables # config BRIDGE_EBT_BROUTE tristate "ebt: broute table support" - depends on BRIDGE_NF_EBTABLES help The ebtables broute table is used to define rules that decide between bridging and routing frames, giving Linux the functionality of a @@ -27,7 +28,6 @@ config BRIDGE_EBT_BROUTE config BRIDGE_EBT_T_FILTER tristate "ebt: filter table support" - depends on BRIDGE_NF_EBTABLES help The ebtables filter table is used to define frame filtering rules at local input, forwarding and local output. See the man page for @@ -37,7 +37,6 @@ config BRIDGE_EBT_T_FILTER config BRIDGE_EBT_T_NAT tristate "ebt: nat table support" - depends on BRIDGE_NF_EBTABLES help The ebtables nat table is used to define rules that alter the MAC source address (MAC SNAT) or the MAC destination address (MAC DNAT). @@ -49,7 +48,6 @@ config BRIDGE_EBT_T_NAT # config BRIDGE_EBT_802_3 tristate "ebt: 802.3 filter support" - depends on BRIDGE_NF_EBTABLES help This option adds matching support for 802.3 Ethernet frames. @@ -57,7 +55,6 @@ config BRIDGE_EBT_802_3 config BRIDGE_EBT_AMONG tristate "ebt: among filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the among match, which allows matching the MAC source and/or destination address on a list of addresses. Optionally, @@ -67,7 +64,6 @@ config BRIDGE_EBT_AMONG config BRIDGE_EBT_ARP tristate "ebt: ARP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the ARP match, which allows ARP and RARP header field filtering. @@ -76,7 +72,6 @@ config BRIDGE_EBT_ARP config BRIDGE_EBT_IP tristate "ebt: IP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the IP match, which allows basic IP header field filtering. @@ -94,7 +89,6 @@ config BRIDGE_EBT_IP6 config BRIDGE_EBT_LIMIT tristate "ebt: limit match support" - depends on BRIDGE_NF_EBTABLES help This option adds the limit match, which allows you to control the rate at which a rule can be matched. This match is the @@ -105,7 +99,6 @@ config BRIDGE_EBT_LIMIT config BRIDGE_EBT_MARK tristate "ebt: mark filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark match, which allows matching frames based on the 'nfmark' value in the frame. This can be set by the mark target. @@ -116,7 +109,6 @@ config BRIDGE_EBT_MARK config BRIDGE_EBT_PKTTYPE tristate "ebt: packet type filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the packet type match, which allows matching on the type of packet based on its Ethernet "class" (as determined by @@ -127,7 +119,6 @@ config BRIDGE_EBT_PKTTYPE config BRIDGE_EBT_STP tristate "ebt: STP filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the Spanning Tree Protocol match, which allows STP header field filtering. @@ -136,7 +127,6 @@ config BRIDGE_EBT_STP config BRIDGE_EBT_VLAN tristate "ebt: 802.1Q VLAN filter support" - depends on BRIDGE_NF_EBTABLES help This option adds the 802.1Q vlan match, which allows the filtering of 802.1Q vlan fields. @@ -156,7 +146,6 @@ config BRIDGE_EBT_ARPREPLY config BRIDGE_EBT_DNAT tristate "ebt: dnat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC DNAT target, which allows altering the MAC destination address of frames. @@ -165,7 +154,6 @@ config BRIDGE_EBT_DNAT config BRIDGE_EBT_MARK_T tristate "ebt: mark target support" - depends on BRIDGE_NF_EBTABLES help This option adds the mark target, which allows marking frames by setting the 'nfmark' value in the frame. @@ -176,7 +164,6 @@ config BRIDGE_EBT_MARK_T config BRIDGE_EBT_REDIRECT tristate "ebt: redirect target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC redirect target, which allows altering the MAC destination address of a frame to that of the device it arrived on. @@ -185,7 +172,6 @@ config BRIDGE_EBT_REDIRECT config BRIDGE_EBT_SNAT tristate "ebt: snat target support" - depends on BRIDGE_NF_EBTABLES help This option adds the MAC SNAT target, which allows altering the MAC source address of frames. @@ -196,7 +182,6 @@ config BRIDGE_EBT_SNAT # config BRIDGE_EBT_LOG tristate "ebt: log support" - depends on BRIDGE_NF_EBTABLES help This option adds the log watcher, that you can use in any rule in any ebtables table. It records info about the frame header @@ -206,7 +191,6 @@ config BRIDGE_EBT_LOG config BRIDGE_EBT_ULOG tristate "ebt: ulog support (OBSOLETE)" - depends on BRIDGE_NF_EBTABLES help This option enables the old bridge-specific "ebt_ulog" implementation which has been obsoleted by the new "nfnetlink_log" code (see @@ -223,7 +207,6 @@ config BRIDGE_EBT_ULOG config BRIDGE_EBT_NFLOG tristate "ebt: nflog support" - depends on BRIDGE_NF_EBTABLES help This option enables the nflog watcher, which allows to LOG messages through the netfilter logging API, which can use @@ -235,4 +218,4 @@ config BRIDGE_EBT_NFLOG To compile it as a module, choose M here. If unsure, say N. -endmenu +endif # BRIDGE_NF_EBTABLES diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c index 98534025360f..bd91dc58d49b 100644 --- a/net/bridge/netfilter/ebt_802_3.c +++ b/net/bridge/netfilter/ebt_802_3.c @@ -7,64 +7,63 @@ * May 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_802_3.h> -#include <linux/module.h> -static int ebt_filter_802_3(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb); __be16 type = hdr->llc.ui.ctrl & IS_UI ? hdr->llc.ui.type : hdr->llc.ni.type; if (info->bitmask & EBT_802_3_SAP) { if (FWINV(info->sap != hdr->llc.ui.ssap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; if (FWINV(info->sap != hdr->llc.ui.dsap, EBT_802_3_SAP)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_802_3_TYPE) { if (!(hdr->llc.ui.dsap == CHECK_TYPE && hdr->llc.ui.ssap == CHECK_TYPE)) - return EBT_NOMATCH; + return false; if (FWINV(info->type != type, EBT_802_3_TYPE)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static struct ebt_match filter_802_3; -static int ebt_802_3_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_802_3_info *info = data; + const struct ebt_802_3_info *info = par->matchinfo; - if (datalen < sizeof(struct ebt_802_3_info)) - return -EINVAL; if (info->bitmask & ~EBT_802_3_MASK || info->invflags & ~EBT_802_3_MASK) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_802_3 __read_mostly = { - .name = EBT_802_3_MATCH, - .match = ebt_filter_802_3, - .check = ebt_802_3_check, +static struct xt_match ebt_802_3_mt_reg __read_mostly = { + .name = "802_3", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_802_3_mt, + .checkentry = ebt_802_3_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_802_3_info)), .me = THIS_MODULE, }; static int __init ebt_802_3_init(void) { - return ebt_register_match(&filter_802_3); + return xt_register_match(&ebt_802_3_mt_reg); } static void __exit ebt_802_3_fini(void) { - ebt_unregister_match(&filter_802_3); + xt_unregister_match(&ebt_802_3_mt_reg); } module_init(ebt_802_3_init); diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 70b6dca5ea75..b595f091f35b 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -7,15 +7,15 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_among.h> #include <linux/ip.h> #include <linux/if_arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_among.h> -static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, - const char *mac, __be32 ip) +static bool ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, + const char *mac, __be32 ip) { /* You may be puzzled as to how this code works. * Some tricks were used, refer to @@ -33,23 +33,19 @@ static int ebt_mac_wormhash_contains(const struct ebt_mac_wormhash *wh, if (ip) { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0 || p->ip == ip) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0 || p->ip == ip) + return true; } } else { for (i = start; i < limit; i++) { p = &wh->pool[i]; - if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) { - if (p->ip == 0) { - return 1; - } - } + if (cmp[1] == p->cmp[1] && cmp[0] == p->cmp[0]) + if (p->ip == 0) + return true; } } - return 0; + return false; } static int ebt_mac_wormhash_check_integrity(const struct ebt_mac_wormhash @@ -131,12 +127,10 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr) return 0; } -static int ebt_filter_among(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; const char *dmac, *smac; const struct ebt_mac_wormhash *wh_dst, *wh_src; __be32 dip = 0, sip = 0; @@ -147,41 +141,41 @@ static int ebt_filter_among(const struct sk_buff *skb, if (wh_src) { smac = eth_hdr(skb)->h_source; if (get_ip_src(skb, &sip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_SRC_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_src, smac, sip)) - return EBT_NOMATCH; + return false; } } if (wh_dst) { dmac = eth_hdr(skb)->h_dest; if (get_ip_dst(skb, &dip)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_AMONG_DST_NEG)) { /* we match only if it contains */ if (!ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } else { /* we match only if it DOES NOT contain */ if (ebt_mac_wormhash_contains(wh_dst, dmac, dip)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_among_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, - unsigned int datalen) +static bool ebt_among_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_among_info *info = data; + const struct ebt_among_info *info = par->matchinfo; + const struct ebt_entry_match *em = + container_of(par->matchinfo, const struct ebt_entry_match, data); int expected_length = sizeof(struct ebt_among_info); const struct ebt_mac_wormhash *wh_dst, *wh_src; int err; @@ -191,42 +185,45 @@ static int ebt_among_check(const char *tablename, unsigned int hookmask, expected_length += ebt_mac_wormhash_size(wh_dst); expected_length += ebt_mac_wormhash_size(wh_src); - if (datalen != EBT_ALIGN(expected_length)) { + if (em->match_size != EBT_ALIGN(expected_length)) { printk(KERN_WARNING "ebtables: among: wrong size: %d " "against expected %d, rounded to %Zd\n", - datalen, expected_length, + em->match_size, expected_length, EBT_ALIGN(expected_length)); - return -EINVAL; + return false; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { printk(KERN_WARNING "ebtables: among: dst integrity fail: %x\n", -err); - return -EINVAL; + return false; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { printk(KERN_WARNING "ebtables: among: src integrity fail: %x\n", -err); - return -EINVAL; + return false; } - return 0; + return true; } -static struct ebt_match filter_among __read_mostly = { - .name = EBT_AMONG_MATCH, - .match = ebt_filter_among, - .check = ebt_among_check, +static struct xt_match ebt_among_mt_reg __read_mostly = { + .name = "among", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_among_mt, + .checkentry = ebt_among_mt_check, + .matchsize = -1, /* special case */ .me = THIS_MODULE, }; static int __init ebt_among_init(void) { - return ebt_register_match(&filter_among); + return xt_register_match(&ebt_among_mt_reg); } static void __exit ebt_among_fini(void) { - ebt_unregister_match(&filter_among); + xt_unregister_match(&ebt_among_mt_reg); } module_init(ebt_among_init); diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c index 7c535be75665..b7ad60419f9a 100644 --- a/net/bridge/netfilter/ebt_arp.c +++ b/net/bridge/netfilter/ebt_arp.c @@ -8,58 +8,58 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arp.h> #include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arp.h> -static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; const struct arphdr *ah; struct arphdr _arph; ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph); if (ah == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_OPCODE && FWINV(info->opcode != ah->ar_op, EBT_ARP_OPCODE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_HTYPE && FWINV(info->htype != ah->ar_hrd, EBT_ARP_HTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_PTYPE && FWINV(info->ptype != ah->ar_pro, EBT_ARP_PTYPE)) - return EBT_NOMATCH; + return false; if (info->bitmask & (EBT_ARP_SRC_IP | EBT_ARP_DST_IP | EBT_ARP_GRAT)) { const __be32 *sap, *dap; __be32 saddr, daddr; if (ah->ar_pln != sizeof(__be32) || ah->ar_pro != htons(ETH_P_IP)) - return EBT_NOMATCH; + return false; sap = skb_header_pointer(skb, sizeof(struct arphdr) + ah->ar_hln, sizeof(saddr), &saddr); if (sap == NULL) - return EBT_NOMATCH; + return false; dap = skb_header_pointer(skb, sizeof(struct arphdr) + 2*ah->ar_hln+sizeof(saddr), sizeof(daddr), &daddr); if (dap == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_IP && FWINV(info->saddr != (*sap & info->smsk), EBT_ARP_SRC_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_DST_IP && FWINV(info->daddr != (*dap & info->dmsk), EBT_ARP_DST_IP)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_GRAT && FWINV(*dap != *sap, EBT_ARP_GRAT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & (EBT_ARP_SRC_MAC | EBT_ARP_DST_MAC)) { @@ -68,18 +68,18 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in uint8_t verdict, i; if (ah->ar_hln != ETH_ALEN || ah->ar_hrd != htons(ARPHRD_ETHER)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_ARP_SRC_MAC) { mp = skb_header_pointer(skb, sizeof(struct arphdr), sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->smaddr[i]) & info->smmsk[i]; if (FWINV(verdict != 0, EBT_ARP_SRC_MAC)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_ARP_DST_MAC) { @@ -87,50 +87,51 @@ static int ebt_filter_arp(const struct sk_buff *skb, const struct net_device *in ah->ar_hln + ah->ar_pln, sizeof(_mac), &_mac); if (mp == NULL) - return EBT_NOMATCH; + return false; verdict = 0; for (i = 0; i < 6; i++) verdict |= (mp[i] ^ info->dmaddr[i]) & info->dmmsk[i]; if (FWINV(verdict != 0, EBT_ARP_DST_MAC)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_arp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_arp_info *info = data; + const struct ebt_arp_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arp_info))) - return -EINVAL; if ((e->ethproto != htons(ETH_P_ARP) && e->ethproto != htons(ETH_P_RARP)) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_ARP_MASK || info->invflags & ~EBT_ARP_MASK) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_arp __read_mostly = { - .name = EBT_ARP_MATCH, - .match = ebt_filter_arp, - .check = ebt_arp_check, +static struct xt_match ebt_arp_mt_reg __read_mostly = { + .name = "arp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_arp_mt, + .checkentry = ebt_arp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_arp_info)), .me = THIS_MODULE, }; static int __init ebt_arp_init(void) { - return ebt_register_match(&filter_arp); + return xt_register_match(&ebt_arp_mt_reg); } static void __exit ebt_arp_fini(void) { - ebt_unregister_match(&filter_arp); + xt_unregister_match(&ebt_arp_mt_reg); } module_init(ebt_arp_init); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index 0c4279590fc7..76584cd72e57 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -8,18 +8,17 @@ * August, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_arpreply.h> #include <linux/if_arp.h> #include <net/arp.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_arpreply.h> -static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_arpreply_info *info = (void *)data; + const struct ebt_arpreply_info *info = par->targinfo; const __be32 *siptr, *diptr; __be32 _sip, _dip; const struct arphdr *ap; @@ -52,45 +51,45 @@ static int ebt_target_reply(struct sk_buff *skb, unsigned int hooknr, if (diptr == NULL) return EBT_DROP; - arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)in, + arp_send(ARPOP_REPLY, ETH_P_ARP, *siptr, (struct net_device *)par->in, *diptr, shp, info->mac, shp); return info->target; } -static int ebt_target_reply_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_arpreply_info *info = data; + const struct ebt_arpreply_info *info = par->targinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_arpreply_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; + return false; if (e->ethproto != htons(ETH_P_ARP) || e->invflags & EBT_IPROTO) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target reply_target __read_mostly = { - .name = EBT_ARPREPLY_TARGET, - .target = ebt_target_reply, - .check = ebt_target_reply_check, +static struct xt_target ebt_arpreply_tg_reg __read_mostly = { + .name = "arpreply", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING), + .target = ebt_arpreply_tg, + .checkentry = ebt_arpreply_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_arpreply_info)), .me = THIS_MODULE, }; static int __init ebt_arpreply_init(void) { - return ebt_register_target(&reply_target); + return xt_register_target(&ebt_arpreply_tg_reg); } static void __exit ebt_arpreply_fini(void) { - ebt_unregister_target(&reply_target); + xt_unregister_target(&ebt_arpreply_tg_reg); } module_init(ebt_arpreply_init); diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c index ca64c1cc1b47..6b49ea9e31fb 100644 --- a/net/bridge/netfilter/ebt_dnat.c +++ b/net/bridge/netfilter/ebt_dnat.c @@ -7,18 +7,17 @@ * June, 2002 * */ - +#include <linux/module.h> +#include <net/sock.h> #include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nat.h> -#include <linux/module.h> -#include <net/sock.h> -static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -27,40 +26,46 @@ static int ebt_target_dnat(struct sk_buff *skb, unsigned int hooknr, return info->target; } -static int ebt_target_dnat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; + unsigned int hook_mask; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || - (hookmask & ~((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT)))) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + (hook_mask & ~((1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT)))) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target dnat __read_mostly = { - .name = EBT_DNAT_TARGET, - .target = ebt_target_dnat, - .check = ebt_target_dnat_check, +static struct xt_target ebt_dnat_tg_reg __read_mostly = { + .name = "dnat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_LOCAL_OUT) | (1 << NF_BR_BROUTING), + .target = ebt_dnat_tg, + .checkentry = ebt_dnat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_dnat_init(void) { - return ebt_register_target(&dnat); + return xt_register_target(&ebt_dnat_tg_reg); } static void __exit ebt_dnat_fini(void) { - ebt_unregister_target(&dnat); + xt_unregister_target(&ebt_dnat_tg_reg); } module_init(ebt_dnat_init); diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index 65caa00dcf2a..d771bbfbcbe6 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -11,24 +11,23 @@ * Innominate Security Technologies AG <mhopf@innominate.com> * September, 2002 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip.h> #include <linux/ip.h> #include <net/ip.h> #include <linux/in.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip.h> struct tcpudphdr { __be16 src; __be16 dst; }; -static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; const struct iphdr *ih; struct iphdr _iph; const struct tcpudphdr *pptr; @@ -36,92 +35,93 @@ static int ebt_filter_ip(const struct sk_buff *skb, const struct net_device *in, ih = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); if (ih == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_TOS && FWINV(info->tos != ih->tos, EBT_IP_TOS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_SOURCE && FWINV((ih->saddr & info->smsk) != info->saddr, EBT_IP_SOURCE)) - return EBT_NOMATCH; + return false; if ((info->bitmask & EBT_IP_DEST) && FWINV((ih->daddr & info->dmsk) != info->daddr, EBT_IP_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_PROTO) { if (FWINV(info->protocol != ih->protocol, EBT_IP_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP_DPORT) && !(info->bitmask & EBT_IP_SPORT)) - return EBT_MATCH; + return true; if (ntohs(ih->frag_off) & IP_OFFSET) - return EBT_NOMATCH; + return false; pptr = skb_header_pointer(skb, ih->ihl*4, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP_DPORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP_SPORT)) - return EBT_NOMATCH; + return false; } } - return EBT_MATCH; + return true; } -static int ebt_ip_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ip_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_ip_info *info = data; + const struct ebt_ip_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IP) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_IP_MASK || info->invflags & ~EBT_IP_MASK) - return -EINVAL; + return false; if (info->bitmask & (EBT_IP_DPORT | EBT_IP_SPORT)) { if (info->invflags & EBT_IP_PROTO) - return -EINVAL; + return false; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return -EINVAL; + return false; } if (info->bitmask & EBT_IP_DPORT && info->dport[0] > info->dport[1]) - return -EINVAL; + return false; if (info->bitmask & EBT_IP_SPORT && info->sport[0] > info->sport[1]) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_ip __read_mostly = { - .name = EBT_IP_MATCH, - .match = ebt_filter_ip, - .check = ebt_ip_check, +static struct xt_match ebt_ip_mt_reg __read_mostly = { + .name = "ip", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip_mt, + .checkentry = ebt_ip_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip_info)), .me = THIS_MODULE, }; static int __init ebt_ip_init(void) { - return ebt_register_match(&filter_ip); + return xt_register_match(&ebt_ip_mt_reg); } static void __exit ebt_ip_fini(void) { - ebt_unregister_match(&filter_ip); + xt_unregister_match(&ebt_ip_mt_reg); } module_init(ebt_ip_init); diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 36efb3a75249..784a6573876c 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -13,26 +13,24 @@ * * Jan, 2008 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_ip6.h> #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/in.h> #include <linux/module.h> #include <net/dsfield.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_ip6.h> struct tcpudphdr { __be16 src; __be16 dst; }; -static int ebt_filter_ip6(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ebt_ip6_info *info = par->matchinfo; const struct ipv6hdr *ih6; struct ipv6hdr _ip6h; const struct tcpudphdr *pptr; @@ -42,100 +40,100 @@ static int ebt_filter_ip6(const struct sk_buff *skb, ih6 = skb_header_pointer(skb, 0, sizeof(_ip6h), &_ip6h); if (ih6 == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_TCLASS && FWINV(info->tclass != ipv6_get_dsfield(ih6), EBT_IP6_TCLASS)) - return EBT_NOMATCH; + return false; for (i = 0; i < 4; i++) tmp_addr.in6_u.u6_addr32[i] = ih6->saddr.in6_u.u6_addr32[i] & info->smsk.in6_u.u6_addr32[i]; if (info->bitmask & EBT_IP6_SOURCE && FWINV((ipv6_addr_cmp(&tmp_addr, &info->saddr) != 0), EBT_IP6_SOURCE)) - return EBT_NOMATCH; + return false; for (i = 0; i < 4; i++) tmp_addr.in6_u.u6_addr32[i] = ih6->daddr.in6_u.u6_addr32[i] & info->dmsk.in6_u.u6_addr32[i]; if (info->bitmask & EBT_IP6_DEST && FWINV((ipv6_addr_cmp(&tmp_addr, &info->daddr) != 0), EBT_IP6_DEST)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_PROTO) { uint8_t nexthdr = ih6->nexthdr; int offset_ph; offset_ph = ipv6_skip_exthdr(skb, sizeof(_ip6h), &nexthdr); if (offset_ph == -1) - return EBT_NOMATCH; + return false; if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO)) - return EBT_NOMATCH; + return false; if (!(info->bitmask & EBT_IP6_DPORT) && !(info->bitmask & EBT_IP6_SPORT)) - return EBT_MATCH; + return true; pptr = skb_header_pointer(skb, offset_ph, sizeof(_ports), &_ports); if (pptr == NULL) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_IP6_DPORT) { u32 dst = ntohs(pptr->dst); if (FWINV(dst < info->dport[0] || dst > info->dport[1], EBT_IP6_DPORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_IP6_SPORT) { u32 src = ntohs(pptr->src); if (FWINV(src < info->sport[0] || src > info->sport[1], EBT_IP6_SPORT)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } - return EBT_MATCH; + return true; } -static int ebt_ip6_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par) { - struct ebt_ip6_info *info = (struct ebt_ip6_info *)data; + const struct ebt_entry *e = par->entryinfo; + struct ebt_ip6_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ip6_info))) - return -EINVAL; if (e->ethproto != htons(ETH_P_IPV6) || e->invflags & EBT_IPROTO) - return -EINVAL; + return false; if (info->bitmask & ~EBT_IP6_MASK || info->invflags & ~EBT_IP6_MASK) - return -EINVAL; + return false; if (info->bitmask & (EBT_IP6_DPORT | EBT_IP6_SPORT)) { if (info->invflags & EBT_IP6_PROTO) - return -EINVAL; + return false; if (info->protocol != IPPROTO_TCP && info->protocol != IPPROTO_UDP && info->protocol != IPPROTO_UDPLITE && info->protocol != IPPROTO_SCTP && info->protocol != IPPROTO_DCCP) - return -EINVAL; + return false; } if (info->bitmask & EBT_IP6_DPORT && info->dport[0] > info->dport[1]) - return -EINVAL; + return false; if (info->bitmask & EBT_IP6_SPORT && info->sport[0] > info->sport[1]) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_ip6 = -{ - .name = EBT_IP6_MATCH, - .match = ebt_filter_ip6, - .check = ebt_ip6_check, +static struct xt_match ebt_ip6_mt_reg __read_mostly = { + .name = "ip6", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_ip6_mt, + .checkentry = ebt_ip6_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_ip6_info)), .me = THIS_MODULE, }; static int __init ebt_ip6_init(void) { - return ebt_register_match(&filter_ip6); + return xt_register_match(&ebt_ip6_mt_reg); } static void __exit ebt_ip6_fini(void) { - ebt_unregister_match(&filter_ip6); + xt_unregister_match(&ebt_ip6_mt_reg); } module_init(ebt_ip6_init); diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 8cbdc01c253e..f7bd9192ff0c 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -10,13 +10,12 @@ * September, 2003 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_limit.h> #include <linux/module.h> - #include <linux/netdevice.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_limit.h> static DEFINE_SPINLOCK(limit_lock); @@ -31,11 +30,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) -static int ebt_limit_match(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct ebt_limit_info *info = (struct ebt_limit_info *)data; + struct ebt_limit_info *info = (void *)par->matchinfo; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -47,11 +45,11 @@ static int ebt_limit_match(const struct sk_buff *skb, /* We're not limited. */ info->credit -= info->cost; spin_unlock_bh(&limit_lock); - return EBT_MATCH; + return true; } spin_unlock_bh(&limit_lock); - return EBT_NOMATCH; + return false; } /* Precision saver. */ @@ -66,20 +64,16 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE; } -static int ebt_limit_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_limit_mt_check(const struct xt_mtchk_param *par) { - struct ebt_limit_info *info = data; - - if (datalen != EBT_ALIGN(sizeof(struct ebt_limit_info))) - return -EINVAL; + struct ebt_limit_info *info = par->matchinfo; /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { printk("Overflow in ebt_limit, try lower: %u/%u\n", info->avg, info->burst); - return -EINVAL; + return false; } /* User avg in seconds * EBT_LIMIT_SCALE: convert to jiffies * 128. */ @@ -87,24 +81,27 @@ static int ebt_limit_check(const char *tablename, unsigned int hookmask, info->credit = user2credits(info->avg * info->burst); info->credit_cap = user2credits(info->avg * info->burst); info->cost = user2credits(info->avg); - return 0; + return true; } -static struct ebt_match ebt_limit_reg __read_mostly = { - .name = EBT_LIMIT_MATCH, - .match = ebt_limit_match, - .check = ebt_limit_check, +static struct xt_match ebt_limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_limit_mt, + .checkentry = ebt_limit_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_limit_info)), .me = THIS_MODULE, }; static int __init ebt_limit_init(void) { - return ebt_register_match(&ebt_limit_reg); + return xt_register_match(&ebt_limit_mt_reg); } static void __exit ebt_limit_fini(void) { - ebt_unregister_match(&ebt_limit_reg); + xt_unregister_match(&ebt_limit_mt_reg); } module_init(ebt_limit_init); diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 2f430d4ae911..3d33c608906a 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -8,10 +8,6 @@ * April, 2002 * */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_log.h> -#include <linux/netfilter.h> #include <linux/module.h> #include <linux/ip.h> #include <linux/in.h> @@ -21,22 +17,23 @@ #include <linux/ipv6.h> #include <net/ipv6.h> #include <linux/in6.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_log.h> +#include <linux/netfilter.h> static DEFINE_SPINLOCK(ebt_log_lock); -static int ebt_log_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_log_tg_check(const struct xt_tgchk_param *par) { - struct ebt_log_info *info = data; + struct ebt_log_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_log_info))) - return -EINVAL; if (info->bitmask & ~EBT_LOG_MASK) - return -EINVAL; + return false; if (info->loglevel >= 8) - return -EINVAL; + return false; info->prefix[EBT_LOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } struct tcpudphdr @@ -84,7 +81,7 @@ print_ports(const struct sk_buff *skb, uint8_t protocol, int offset) #define myNIPQUAD(a) a[0], a[1], a[2], a[3] static void -ebt_log_packet(unsigned int pf, unsigned int hooknum, +ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *loginfo, const char *prefix) @@ -194,11 +191,10 @@ out: } -static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_log_info *info = data; + const struct ebt_log_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; @@ -206,18 +202,21 @@ static void ebt_log(const struct sk_buff *skb, unsigned int hooknr, li.u.log.logflags = info->bitmask; if (info->bitmask & EBT_LOG_NFLOG) - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - "%s", info->prefix); + nf_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); else - ebt_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, - info->prefix); + ebt_log_packet(NFPROTO_BRIDGE, par->hooknum, skb, par->in, + par->out, &li, info->prefix); + return EBT_CONTINUE; } -static struct ebt_watcher log = -{ - .name = EBT_LOG_WATCHER, - .watcher = ebt_log, - .check = ebt_log_check, +static struct xt_target ebt_log_tg_reg __read_mostly = { + .name = "log", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_log_tg, + .checkentry = ebt_log_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_log_info)), .me = THIS_MODULE, }; @@ -231,17 +230,17 @@ static int __init ebt_log_init(void) { int ret; - ret = ebt_register_watcher(&log); + ret = xt_register_target(&ebt_log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_BRIDGE, &ebt_log_logger); + nf_log_register(NFPROTO_BRIDGE, &ebt_log_logger); return 0; } static void __exit ebt_log_fini(void) { nf_log_unregister(&ebt_log_logger); - ebt_unregister_watcher(&log); + xt_unregister_target(&ebt_log_tg_reg); } module_init(ebt_log_init); diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c index 36723f47db0a..2fee7e8e2e93 100644 --- a/net/bridge/netfilter/ebt_mark.c +++ b/net/bridge/netfilter/ebt_mark.c @@ -13,15 +13,15 @@ * Marking a frame doesn't really change anything in the frame anyway. */ +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_t.h> -#include <linux/module.h> -static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int action = info->target & -16; if (action == MARK_SET_VALUE) @@ -36,42 +36,41 @@ static int ebt_target_mark(struct sk_buff *skb, unsigned int hooknr, return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_mark_t_info *info = data; + const struct ebt_mark_t_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target & ~EBT_VERDICT_BITS; if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE && tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target mark_target __read_mostly = { - .name = EBT_MARK_TARGET, - .target = ebt_target_mark, - .check = ebt_target_mark_check, +static struct xt_target ebt_mark_tg_reg __read_mostly = { + .name = "mark", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_mark_tg, + .checkentry = ebt_mark_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_mark_t_info)), .me = THIS_MODULE, }; static int __init ebt_mark_init(void) { - return ebt_register_target(&mark_target); + return xt_register_target(&ebt_mark_tg_reg); } static void __exit ebt_mark_fini(void) { - ebt_unregister_target(&mark_target); + xt_unregister_target(&ebt_mark_tg_reg); } module_init(ebt_mark_init); diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c index 9b0a4543861f..ea570f214b1d 100644 --- a/net/bridge/netfilter/ebt_mark_m.c +++ b/net/bridge/netfilter/ebt_mark_m.c @@ -7,53 +7,52 @@ * July, 2002 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_mark_m.h> -#include <linux/module.h> -static int ebt_filter_mark(const struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, const void *data, - unsigned int datalen) +static bool +ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; if (info->bitmask & EBT_MARK_OR) - return !(!!(skb->mark & info->mask) ^ info->invert); - return !(((skb->mark & info->mask) == info->mark) ^ info->invert); + return !!(skb->mark & info->mask) ^ info->invert; + return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static int ebt_mark_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_mark_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_mark_m_info *info = data; + const struct ebt_mark_m_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_m_info))) - return -EINVAL; if (info->bitmask & ~EBT_MARK_MASK) - return -EINVAL; + return false; if ((info->bitmask & EBT_MARK_OR) && (info->bitmask & EBT_MARK_AND)) - return -EINVAL; + return false; if (!info->bitmask) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_match filter_mark __read_mostly = { - .name = EBT_MARK_MATCH, - .match = ebt_filter_mark, - .check = ebt_mark_check, +static struct xt_match ebt_mark_mt_reg __read_mostly = { + .name = "mark_m", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_mark_mt, + .checkentry = ebt_mark_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_mark_m_info)), .me = THIS_MODULE, }; static int __init ebt_mark_m_init(void) { - return ebt_register_match(&filter_mark); + return xt_register_match(&ebt_mark_mt_reg); } static void __exit ebt_mark_m_fini(void) { - ebt_unregister_match(&filter_mark); + xt_unregister_match(&ebt_mark_mt_reg); } module_init(ebt_mark_m_init); diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c index 8e799aa9e560..2a63d996dd4e 100644 --- a/net/bridge/netfilter/ebt_nflog.c +++ b/net/bridge/netfilter/ebt_nflog.c @@ -14,17 +14,15 @@ #include <linux/module.h> #include <linux/spinlock.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_nflog.h> #include <net/netfilter/nf_log.h> -static void ebt_nflog(const struct sk_buff *skb, - unsigned int hooknr, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + const struct ebt_nflog_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; @@ -32,39 +30,39 @@ static void ebt_nflog(const struct sk_buff *skb, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(PF_BRIDGE, hooknr, skb, in, out, &li, "%s", info->prefix); + nf_log_packet(PF_BRIDGE, par->hooknum, skb, par->in, par->out, + &li, "%s", info->prefix); + return EBT_CONTINUE; } -static int ebt_nflog_check(const char *tablename, - unsigned int hookmask, - const struct ebt_entry *e, - void *data, unsigned int datalen) +static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_nflog_info *info = (struct ebt_nflog_info *)data; + struct ebt_nflog_info *info = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nflog_info))) - return -EINVAL; if (info->flags & ~EBT_NFLOG_MASK) - return -EINVAL; + return false; info->prefix[EBT_NFLOG_PREFIX_SIZE - 1] = '\0'; - return 0; + return true; } -static struct ebt_watcher nflog __read_mostly = { - .name = EBT_NFLOG_WATCHER, - .watcher = ebt_nflog, - .check = ebt_nflog_check, - .me = THIS_MODULE, +static struct xt_target ebt_nflog_tg_reg __read_mostly = { + .name = "nflog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_nflog_tg, + .checkentry = ebt_nflog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nflog_info)), + .me = THIS_MODULE, }; static int __init ebt_nflog_init(void) { - return ebt_register_watcher(&nflog); + return xt_register_target(&ebt_nflog_tg_reg); } static void __exit ebt_nflog_fini(void) { - ebt_unregister_watcher(&nflog); + xt_unregister_target(&ebt_nflog_tg_reg); } module_init(ebt_nflog_init); diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c index 676db32df3d1..883e96e2a542 100644 --- a/net/bridge/netfilter/ebt_pkttype.c +++ b/net/bridge/netfilter/ebt_pkttype.c @@ -7,50 +7,47 @@ * April, 2003 * */ - +#include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_pkttype.h> -#include <linux/module.h> -static int ebt_filter_pkttype(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, - unsigned int datalen) +static bool +ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - return (skb->pkt_type != info->pkt_type) ^ info->invert; + return (skb->pkt_type == info->pkt_type) ^ info->invert; } -static int ebt_pkttype_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_pkttype_info *info = data; + const struct ebt_pkttype_info *info = par->matchinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_pkttype_info))) - return -EINVAL; if (info->invert != 0 && info->invert != 1) - return -EINVAL; + return false; /* Allow any pkt_type value */ - return 0; + return true; } -static struct ebt_match filter_pkttype __read_mostly = { - .name = EBT_PKTTYPE_MATCH, - .match = ebt_filter_pkttype, - .check = ebt_pkttype_check, +static struct xt_match ebt_pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_pkttype_mt, + .checkentry = ebt_pkttype_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_pkttype_info)), .me = THIS_MODULE, }; static int __init ebt_pkttype_init(void) { - return ebt_register_match(&filter_pkttype); + return xt_register_match(&ebt_pkttype_mt_reg); } static void __exit ebt_pkttype_fini(void) { - ebt_unregister_match(&filter_pkttype); + xt_unregister_match(&ebt_pkttype_mt_reg); } module_init(ebt_pkttype_init); diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index b8afe850cf1e..c8a49f7a57ba 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -7,65 +7,70 @@ * April, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_redirect.h> #include <linux/module.h> #include <net/sock.h> #include "../br_private.h" +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_redirect.h> -static int ebt_target_redirect(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; - if (hooknr != NF_BR_BROUTING) + if (par->hooknum != NF_BR_BROUTING) memcpy(eth_hdr(skb)->h_dest, - in->br_port->br->dev->dev_addr, ETH_ALEN); + par->in->br_port->br->dev->dev_addr, ETH_ALEN); else - memcpy(eth_hdr(skb)->h_dest, in->dev_addr, ETH_ALEN); + memcpy(eth_hdr(skb)->h_dest, par->in->dev_addr, ETH_ALEN); skb->pkt_type = PACKET_HOST; return info->target; } -static int ebt_target_redirect_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_redirect_info *info = data; + const struct ebt_redirect_info *info = par->targinfo; + unsigned int hook_mask; - if (datalen != EBT_ALIGN(sizeof(struct ebt_redirect_info))) - return -EINVAL; if (BASE_CHAIN && info->target == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if ( (strcmp(tablename, "nat") || hookmask & ~(1 << NF_BR_PRE_ROUTING)) && - (strcmp(tablename, "broute") || hookmask & ~(1 << NF_BR_BROUTING)) ) - return -EINVAL; + return false; + + hook_mask = par->hook_mask & ~(1 << NF_BR_NUMHOOKS); + if ((strcmp(par->table, "nat") != 0 || + hook_mask & ~(1 << NF_BR_PRE_ROUTING)) && + (strcmp(par->table, "broute") != 0 || + hook_mask & ~(1 << NF_BR_BROUTING))) + return false; if (INVALID_TARGET) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target redirect_target __read_mostly = { - .name = EBT_REDIRECT_TARGET, - .target = ebt_target_redirect, - .check = ebt_target_redirect_check, +static struct xt_target ebt_redirect_tg_reg __read_mostly = { + .name = "redirect", + .revision = 0, + .family = NFPROTO_BRIDGE, + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_PRE_ROUTING) | + (1 << NF_BR_BROUTING), + .target = ebt_redirect_tg, + .checkentry = ebt_redirect_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_redirect_info)), .me = THIS_MODULE, }; static int __init ebt_redirect_init(void) { - return ebt_register_target(&redirect_target); + return xt_register_target(&ebt_redirect_tg_reg); } static void __exit ebt_redirect_fini(void) { - ebt_unregister_target(&redirect_target); + xt_unregister_target(&ebt_redirect_tg_reg); } module_init(ebt_redirect_init); diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c index 5425333dda03..8d04d4c302bd 100644 --- a/net/bridge/netfilter/ebt_snat.c +++ b/net/bridge/netfilter/ebt_snat.c @@ -7,20 +7,19 @@ * June, 2002 * */ - -#include <linux/netfilter.h> -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_nat.h> #include <linux/module.h> #include <net/sock.h> #include <linux/if_arp.h> #include <net/arp.h> +#include <linux/netfilter.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_nat.h> -static int ebt_target_snat(struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; if (!skb_make_writable(skb, 0)) return EBT_DROP; @@ -43,46 +42,43 @@ out: return info->target | ~EBT_VERDICT_BITS; } -static int ebt_target_snat_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_snat_tg_check(const struct xt_tgchk_param *par) { - const struct ebt_nat_info *info = data; + const struct ebt_nat_info *info = par->targinfo; int tmp; - if (datalen != EBT_ALIGN(sizeof(struct ebt_nat_info))) - return -EINVAL; tmp = info->target | ~EBT_VERDICT_BITS; if (BASE_CHAIN && tmp == EBT_RETURN) - return -EINVAL; - CLEAR_BASE_CHAIN_BIT; - if (strcmp(tablename, "nat")) - return -EINVAL; - if (hookmask & ~(1 << NF_BR_POST_ROUTING)) - return -EINVAL; + return false; if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0) - return -EINVAL; + return false; tmp = info->target | EBT_VERDICT_BITS; if ((tmp & ~NAT_ARP_BIT) != ~NAT_ARP_BIT) - return -EINVAL; - return 0; + return false; + return true; } -static struct ebt_target snat __read_mostly = { - .name = EBT_SNAT_TARGET, - .target = ebt_target_snat, - .check = ebt_target_snat_check, +static struct xt_target ebt_snat_tg_reg __read_mostly = { + .name = "snat", + .revision = 0, + .family = NFPROTO_BRIDGE, + .table = "nat", + .hooks = (1 << NF_BR_NUMHOOKS) | (1 << NF_BR_POST_ROUTING), + .target = ebt_snat_tg, + .checkentry = ebt_snat_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_nat_info)), .me = THIS_MODULE, }; static int __init ebt_snat_init(void) { - return ebt_register_target(&snat); + return xt_register_target(&ebt_snat_tg_reg); } static void __exit ebt_snat_fini(void) { - ebt_unregister_target(&snat); + xt_unregister_target(&ebt_snat_tg_reg); } module_init(ebt_snat_init); diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 40f36d37607d..48527e621626 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -7,11 +7,11 @@ * * July, 2003 */ - -#include <linux/netfilter_bridge/ebtables.h> -#include <linux/netfilter_bridge/ebt_stp.h> #include <linux/etherdevice.h> #include <linux/module.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_bridge/ebtables.h> +#include <linux/netfilter_bridge/ebt_stp.h> #define BPDU_TYPE_CONFIG 0 #define BPDU_TYPE_TCN 0x80 @@ -40,7 +40,7 @@ struct stp_config_pdu { #define NR16(p) (p[0] << 8 | p[1]) #define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3]) -static int ebt_filter_config(const struct ebt_stp_info *info, +static bool ebt_filter_config(const struct ebt_stp_info *info, const struct stp_config_pdu *stpc) { const struct ebt_stp_config_info *c; @@ -51,12 +51,12 @@ static int ebt_filter_config(const struct ebt_stp_info *info, c = &info->config; if ((info->bitmask & EBT_STP_FLAGS) && FWINV(c->flags != stpc->flags, EBT_STP_FLAGS)) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_ROOTPRIO) { v16 = NR16(stpc->root); if (FWINV(v16 < c->root_priol || v16 > c->root_priou, EBT_STP_ROOTPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTADDR) { verdict = 0; @@ -64,19 +64,19 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->root[2+i] ^ c->root_addr[i]) & c->root_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_ROOTADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_ROOTCOST) { v32 = NR32(stpc->root_cost); if (FWINV(v32 < c->root_costl || v32 > c->root_costu, EBT_STP_ROOTCOST)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERPRIO) { v16 = NR16(stpc->sender); if (FWINV(v16 < c->sender_priol || v16 > c->sender_priou, EBT_STP_SENDERPRIO)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_SENDERADDR) { verdict = 0; @@ -84,60 +84,60 @@ static int ebt_filter_config(const struct ebt_stp_info *info, verdict |= (stpc->sender[2+i] ^ c->sender_addr[i]) & c->sender_addrmsk[i]; if (FWINV(verdict != 0, EBT_STP_SENDERADDR)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_PORT) { v16 = NR16(stpc->port); if (FWINV(v16 < c->portl || v16 > c->portu, EBT_STP_PORT)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MSGAGE) { v16 = NR16(stpc->msg_age); if (FWINV(v16 < c->msg_agel || v16 > c->msg_ageu, EBT_STP_MSGAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_MAXAGE) { v16 = NR16(stpc->max_age); if (FWINV(v16 < c->max_agel || v16 > c->max_ageu, EBT_STP_MAXAGE)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_HELLOTIME) { v16 = NR16(stpc->hello_time); if (FWINV(v16 < c->hello_timel || v16 > c->hello_timeu, EBT_STP_HELLOTIME)) - return EBT_NOMATCH; + return false; } if (info->bitmask & EBT_STP_FWDD) { v16 = NR16(stpc->forward_delay); if (FWINV(v16 < c->forward_delayl || v16 > c->forward_delayu, EBT_STP_FWDD)) - return EBT_NOMATCH; + return false; } - return EBT_MATCH; + return true; } -static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *data, unsigned int datalen) +static bool +ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_stp_info *info = data; + const struct ebt_stp_info *info = par->matchinfo; const struct stp_header *sp; struct stp_header _stph; const uint8_t header[6] = {0x42, 0x42, 0x03, 0x00, 0x00, 0x00}; sp = skb_header_pointer(skb, 0, sizeof(_stph), &_stph); if (sp == NULL) - return EBT_NOMATCH; + return false; /* The stp code only considers these */ if (memcmp(sp, header, sizeof(header))) - return EBT_NOMATCH; + return false; if (info->bitmask & EBT_STP_TYPE && FWINV(info->type != sp->type, EBT_STP_TYPE)) - return EBT_NOMATCH; + return false; if (sp->type == BPDU_TYPE_CONFIG && info->bitmask & EBT_STP_CONFIG_MASK) { @@ -147,48 +147,48 @@ static int ebt_filter_stp(const struct sk_buff *skb, const struct net_device *in st = skb_header_pointer(skb, sizeof(_stph), sizeof(_stpc), &_stpc); if (st == NULL) - return EBT_NOMATCH; + return false; return ebt_filter_config(info, st); } - return EBT_MATCH; + return true; } -static int ebt_stp_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_stp_mt_check(const struct xt_mtchk_param *par) { - const struct ebt_stp_info *info = data; - const unsigned int len = EBT_ALIGN(sizeof(struct ebt_stp_info)); + const struct ebt_stp_info *info = par->matchinfo; const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00}; const uint8_t msk[6] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + const struct ebt_entry *e = par->entryinfo; if (info->bitmask & ~EBT_STP_MASK || info->invflags & ~EBT_STP_MASK || !(info->bitmask & EBT_STP_MASK)) - return -EINVAL; - if (datalen != len) - return -EINVAL; + return false; /* Make sure the match only receives stp frames */ if (compare_ether_addr(e->destmac, bridge_ula) || compare_ether_addr(e->destmsk, msk) || !(e->bitmask & EBT_DESTMAC)) - return -EINVAL; + return false; - return 0; + return true; } -static struct ebt_match filter_stp __read_mostly = { - .name = EBT_STP_MATCH, - .match = ebt_filter_stp, - .check = ebt_stp_check, +static struct xt_match ebt_stp_mt_reg __read_mostly = { + .name = "stp", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_stp_mt, + .checkentry = ebt_stp_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_stp_info)), .me = THIS_MODULE, }; static int __init ebt_stp_init(void) { - return ebt_register_match(&filter_stp); + return xt_register_match(&ebt_stp_mt_reg); } static void __exit ebt_stp_fini(void) { - ebt_unregister_match(&filter_stp); + xt_unregister_match(&ebt_stp_mt_reg); } module_init(ebt_stp_init); diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2d4c9ef909fc..2c6d6823e703 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -36,6 +36,7 @@ #include <linux/timer.h> #include <linux/netlink.h> #include <linux/netdevice.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_ulog.h> #include <net/netfilter/nf_log.h> @@ -223,7 +224,7 @@ alloc_failure: } /* this function is registered with the netfilter core */ -static void ebt_log_packet(unsigned int pf, unsigned int hooknum, +static void ebt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, const struct nf_loginfo *li, const char *prefix) @@ -245,24 +246,20 @@ static void ebt_log_packet(unsigned int pf, unsigned int hooknum, ebt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static void ebt_ulog(const struct sk_buff *skb, unsigned int hooknr, - const struct net_device *in, const struct net_device *out, - const void *data, unsigned int datalen) +static unsigned int +ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ebt_ulog_info *uloginfo = data; - - ebt_ulog_packet(hooknr, skb, in, out, uloginfo, NULL); + ebt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); + return EBT_CONTINUE; } - -static int ebt_ulog_check(const char *tablename, unsigned int hookmask, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par) { - struct ebt_ulog_info *uloginfo = data; + struct ebt_ulog_info *uloginfo = par->targinfo; - if (datalen != EBT_ALIGN(sizeof(struct ebt_ulog_info)) || - uloginfo->nlgroup > 31) - return -EINVAL; + if (uloginfo->nlgroup > 31) + return false; uloginfo->prefix[EBT_ULOG_PREFIX_LEN - 1] = '\0'; @@ -272,27 +269,31 @@ static int ebt_ulog_check(const char *tablename, unsigned int hookmask, return 0; } -static struct ebt_watcher ulog __read_mostly = { - .name = EBT_ULOG_WATCHER, - .watcher = ebt_ulog, - .check = ebt_ulog_check, +static struct xt_target ebt_ulog_tg_reg __read_mostly = { + .name = "ulog", + .revision = 0, + .family = NFPROTO_BRIDGE, + .target = ebt_ulog_tg, + .checkentry = ebt_ulog_tg_check, + .targetsize = XT_ALIGN(sizeof(struct ebt_ulog_info)), .me = THIS_MODULE, }; static const struct nf_logger ebt_ulog_logger = { - .name = EBT_ULOG_WATCHER, + .name = "ulog", .logfn = &ebt_log_packet, .me = THIS_MODULE, }; static int __init ebt_ulog_init(void) { - int i, ret = 0; + bool ret = true; + int i; if (nlbufsiz >= 128*1024) { printk(KERN_NOTICE "ebt_ulog: Netlink buffer has to be <= 128kB," " please try a smaller nlbufsiz parameter.\n"); - return -EINVAL; + return false; } /* initialize ulog_buffers */ @@ -304,13 +305,16 @@ static int __init ebt_ulog_init(void) ebtulognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, EBT_ULOG_MAXNLGROUPS, NULL, NULL, THIS_MODULE); - if (!ebtulognl) - ret = -ENOMEM; - else if ((ret = ebt_register_watcher(&ulog))) + if (!ebtulognl) { + printk(KERN_WARNING KBUILD_MODNAME ": out of memory trying to " + "call netlink_kernel_create\n"); + ret = false; + } else if (xt_register_target(&ebt_ulog_tg_reg) != 0) { netlink_kernel_release(ebtulognl); + } - if (ret == 0) - nf_log_register(PF_BRIDGE, &ebt_ulog_logger); + if (ret) + nf_log_register(NFPROTO_BRIDGE, &ebt_ulog_logger); return ret; } @@ -321,7 +325,7 @@ static void __exit ebt_ulog_fini(void) int i; nf_log_unregister(&ebt_ulog_logger); - ebt_unregister_watcher(&ulog); + xt_unregister_target(&ebt_ulog_tg_reg); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; if (timer_pending(&ub->timer)) diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c index ab60b0dade80..3dddd489328e 100644 --- a/net/bridge/netfilter/ebt_vlan.c +++ b/net/bridge/netfilter/ebt_vlan.c @@ -22,6 +22,7 @@ #include <linux/if_vlan.h> #include <linux/module.h> #include <linux/moduleparam.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/netfilter_bridge/ebt_vlan.h> @@ -37,15 +38,12 @@ MODULE_LICENSE("GPL"); #define DEBUG_MSG(args...) if (debug) printk (KERN_DEBUG "ebt_vlan: " args) #define GET_BITMASK(_BIT_MASK_) info->bitmask & _BIT_MASK_ -#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return EBT_NOMATCH;} +#define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; } -static int -ebt_filter_vlan(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *data, unsigned int datalen) +static bool +ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ebt_vlan_info *info = data; + const struct ebt_vlan_info *info = par->matchinfo; const struct vlan_hdr *fp; struct vlan_hdr _frame; @@ -57,7 +55,7 @@ ebt_filter_vlan(const struct sk_buff *skb, fp = skb_header_pointer(skb, 0, sizeof(_frame), &_frame); if (fp == NULL) - return EBT_NOMATCH; + return false; /* Tag Control Information (TCI) consists of the following elements: * - User_priority. The user_priority field is three bits in length, @@ -83,30 +81,20 @@ ebt_filter_vlan(const struct sk_buff *skb, if (GET_BITMASK(EBT_VLAN_ENCAP)) EXIT_ON_MISMATCH(encap, EBT_VLAN_ENCAP); - return EBT_MATCH; + return true; } -static int -ebt_check_vlan(const char *tablename, - unsigned int hooknr, - const struct ebt_entry *e, void *data, unsigned int datalen) +static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par) { - struct ebt_vlan_info *info = data; - - /* Parameters buffer overflow check */ - if (datalen != EBT_ALIGN(sizeof(struct ebt_vlan_info))) { - DEBUG_MSG - ("passed size %d is not eq to ebt_vlan_info (%Zd)\n", - datalen, sizeof(struct ebt_vlan_info)); - return -EINVAL; - } + struct ebt_vlan_info *info = par->matchinfo; + const struct ebt_entry *e = par->entryinfo; /* Is it 802.1Q frame checked? */ if (e->ethproto != htons(ETH_P_8021Q)) { DEBUG_MSG ("passed entry proto %2.4X is not 802.1Q (8100)\n", (unsigned short) ntohs(e->ethproto)); - return -EINVAL; + return false; } /* Check for bitmask range @@ -114,14 +102,14 @@ ebt_check_vlan(const char *tablename, if (info->bitmask & ~EBT_VLAN_MASK) { DEBUG_MSG("bitmask %2X is out of mask (%2X)\n", info->bitmask, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Check for inversion flags range */ if (info->invflags & ~EBT_VLAN_MASK) { DEBUG_MSG("inversion flags %2X is out of mask (%2X)\n", info->invflags, EBT_VLAN_MASK); - return -EINVAL; + return false; } /* Reserved VLAN ID (VID) values @@ -136,7 +124,7 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("id %d is out of range (1-4096)\n", info->id); - return -EINVAL; + return false; } /* Note: This is valid VLAN-tagged frame point. * Any value of user_priority are acceptable, @@ -151,7 +139,7 @@ ebt_check_vlan(const char *tablename, if ((unsigned char) info->prio > 7) { DEBUG_MSG("prio %d is out of range (0-7)\n", info->prio); - return -EINVAL; + return false; } } /* Check for encapsulated proto range - it is possible to be @@ -162,17 +150,20 @@ ebt_check_vlan(const char *tablename, DEBUG_MSG ("encap frame length %d is less than minimal\n", ntohs(info->encap)); - return -EINVAL; + return false; } } - return 0; + return true; } -static struct ebt_match filter_vlan __read_mostly = { - .name = EBT_VLAN_MATCH, - .match = ebt_filter_vlan, - .check = ebt_check_vlan, +static struct xt_match ebt_vlan_mt_reg __read_mostly = { + .name = "vlan", + .revision = 0, + .family = NFPROTO_BRIDGE, + .match = ebt_vlan_mt, + .checkentry = ebt_vlan_mt_check, + .matchsize = XT_ALIGN(sizeof(struct ebt_vlan_info)), .me = THIS_MODULE, }; @@ -181,12 +172,12 @@ static int __init ebt_vlan_init(void) DEBUG_MSG("ebtables 802.1Q extension module v" MODULE_VERS "\n"); DEBUG_MSG("module debug=%d\n", !!debug); - return ebt_register_match(&filter_vlan); + return xt_register_match(&ebt_vlan_mt_reg); } static void __exit ebt_vlan_fini(void) { - ebt_unregister_match(&filter_vlan); + xt_unregister_match(&ebt_vlan_mt_reg); } module_init(ebt_vlan_init); diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 32afff859e4a..0fa208e86405 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -19,6 +19,7 @@ #include <linux/kmod.h> #include <linux/module.h> #include <linux/vmalloc.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_bridge/ebtables.h> #include <linux/spinlock.h> #include <linux/mutex.h> @@ -55,29 +56,31 @@ static DEFINE_MUTEX(ebt_mutex); static LIST_HEAD(ebt_tables); -static LIST_HEAD(ebt_targets); -static LIST_HEAD(ebt_matches); -static LIST_HEAD(ebt_watchers); -static struct ebt_target ebt_standard_target = -{ {NULL, NULL}, EBT_STANDARD_TARGET, NULL, NULL, NULL, NULL}; +static struct xt_target ebt_standard_target = { + .name = "standard", + .revision = 0, + .family = NFPROTO_BRIDGE, + .targetsize = sizeof(int), +}; -static inline int ebt_do_watcher (struct ebt_entry_watcher *w, - const struct sk_buff *skb, unsigned int hooknr, const struct net_device *in, - const struct net_device *out) +static inline int +ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb, + struct xt_target_param *par) { - w->u.watcher->watcher(skb, hooknr, in, out, w->data, - w->watcher_size); + par->target = w->u.watcher; + par->targinfo = w->data; + w->u.watcher->target(skb, par); /* watchers don't give a verdict */ return 0; } static inline int ebt_do_match (struct ebt_entry_match *m, - const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out) + const struct sk_buff *skb, struct xt_match_param *par) { - return m->u.match->match(skb, in, out, m->data, - m->match_size); + par->match = m->u.match; + par->matchinfo = m->data; + return m->u.match->match(skb, par); } static inline int ebt_dev_check(char *entry, const struct net_device *device) @@ -153,6 +156,15 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, struct ebt_entries *chaininfo; char *base; struct ebt_table_info *private; + bool hotdrop = false; + struct xt_match_param mtpar; + struct xt_target_param tgpar; + + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.hotdrop = &hotdrop; + tgpar.hooknum = hook; read_lock_bh(&table->lock); private = table->private; @@ -173,8 +185,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, if (ebt_basic_match(point, eth_hdr(skb), in, out)) goto letscontinue; - if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, in, out) != 0) + if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0) goto letscontinue; + if (hotdrop) { + read_unlock_bh(&table->lock); + return NF_DROP; + } /* increase counter */ (*(counter_base + i)).pcnt++; @@ -182,17 +198,18 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb, /* these should only watch: not modify, nor tell us what to do with the packet */ - EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, hook, in, - out); + EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar); t = (struct ebt_entry_target *) (((char *)point) + point->target_offset); /* standard target */ if (!t->u.target->target) verdict = ((struct ebt_standard_target *)t)->verdict; - else - verdict = t->u.target->target(skb, hook, - in, out, t->data, t->target_size); + else { + tgpar.target = t->u.target; + tgpar.targinfo = t->data; + verdict = t->u.target->target(skb, &tgpar); + } if (verdict == EBT_ACCEPT) { read_unlock_bh(&table->lock); return NF_ACCEPT; @@ -288,23 +305,14 @@ find_inlist_lock_noload(struct list_head *head, const char *name, int *error, return NULL; } -#ifndef CONFIG_KMOD -#define find_inlist_lock(h,n,p,e,m) find_inlist_lock_noload((h),(n),(e),(m)) -#else static void * find_inlist_lock(struct list_head *head, const char *name, const char *prefix, int *error, struct mutex *mutex) { - void *ret; - - ret = find_inlist_lock_noload(head, name, error, mutex); - if (!ret) { - request_module("%s%s", prefix, name); - ret = find_inlist_lock_noload(head, name, error, mutex); - } - return ret; + return try_then_request_module( + find_inlist_lock_noload(head, name, error, mutex), + "%s%s", prefix, name); } -#endif static inline struct ebt_table * find_table_lock(const char *name, int *error, struct mutex *mutex) @@ -312,80 +320,71 @@ find_table_lock(const char *name, int *error, struct mutex *mutex) return find_inlist_lock(&ebt_tables, name, "ebtable_", error, mutex); } -static inline struct ebt_match * -find_match_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_matches, name, "ebt_", error, mutex); -} - -static inline struct ebt_watcher * -find_watcher_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_watchers, name, "ebt_", error, mutex); -} - -static inline struct ebt_target * -find_target_lock(const char *name, int *error, struct mutex *mutex) -{ - return find_inlist_lock(&ebt_targets, name, "ebt_", error, mutex); -} - static inline int -ebt_check_match(struct ebt_entry_match *m, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *cnt) { - struct ebt_match *match; + const struct ebt_entry *e = par->entryinfo; + struct xt_match *match; size_t left = ((char *)e + e->watchers_offset) - (char *)m; int ret; if (left < sizeof(struct ebt_entry_match) || left - sizeof(struct ebt_entry_match) < m->match_size) return -EINVAL; - match = find_match_lock(m->u.name, &ret, &ebt_mutex); - if (!match) - return ret; - m->u.match = match; - if (!try_module_get(match->me)) { - mutex_unlock(&ebt_mutex); + + match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE, + m->u.name, 0), "ebt_%s", m->u.name); + if (IS_ERR(match)) + return PTR_ERR(match); + if (match == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (match->check && - match->check(name, hookmask, e, m->data, m->match_size) != 0) { - BUGPRINT("match->check failed\n"); + m->u.match = match; + + par->match = match; + par->matchinfo = m->data; + ret = xt_check_match(par, m->match_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(match->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } static inline int -ebt_check_watcher(struct ebt_entry_watcher *w, struct ebt_entry *e, - const char *name, unsigned int hookmask, unsigned int *cnt) +ebt_check_watcher(struct ebt_entry_watcher *w, struct xt_tgchk_param *par, + unsigned int *cnt) { - struct ebt_watcher *watcher; + const struct ebt_entry *e = par->entryinfo; + struct xt_target *watcher; size_t left = ((char *)e + e->target_offset) - (char *)w; int ret; if (left < sizeof(struct ebt_entry_watcher) || left - sizeof(struct ebt_entry_watcher) < w->watcher_size) return -EINVAL; - watcher = find_watcher_lock(w->u.name, &ret, &ebt_mutex); - if (!watcher) - return ret; - w->u.watcher = watcher; - if (!try_module_get(watcher->me)) { - mutex_unlock(&ebt_mutex); + + watcher = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, w->u.name, 0), + "ebt_%s", w->u.name); + if (IS_ERR(watcher)) + return PTR_ERR(watcher); + if (watcher == NULL) return -ENOENT; - } - mutex_unlock(&ebt_mutex); - if (watcher->check && - watcher->check(name, hookmask, e, w->data, w->watcher_size) != 0) { - BUGPRINT("watcher->check failed\n"); + w->u.watcher = watcher; + + par->target = watcher; + par->targinfo = w->data; + ret = xt_check_target(par, w->watcher_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { module_put(watcher->me); - return -EINVAL; + return ret; } + (*cnt)++; return 0; } @@ -558,30 +557,41 @@ ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo, static inline int ebt_cleanup_match(struct ebt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.match->destroy) - m->u.match->destroy(m->data, m->match_size); - module_put(m->u.match->me); + par.match = m->u.match; + par.matchinfo = m->data; + par.family = NFPROTO_BRIDGE; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } static inline int ebt_cleanup_watcher(struct ebt_entry_watcher *w, unsigned int *i) { + struct xt_tgdtor_param par; + if (i && (*i)-- == 0) return 1; - if (w->u.watcher->destroy) - w->u.watcher->destroy(w->data, w->watcher_size); - module_put(w->u.watcher->me); + par.target = w->u.watcher; + par.targinfo = w->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } static inline int ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) { + struct xt_tgdtor_param par; struct ebt_entry_target *t; if (e->bitmask == 0) @@ -592,10 +602,13 @@ ebt_cleanup_entry(struct ebt_entry *e, unsigned int *cnt) EBT_WATCHER_ITERATE(e, ebt_cleanup_watcher, NULL); EBT_MATCH_ITERATE(e, ebt_cleanup_match, NULL); t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); - if (t->u.target->destroy) - t->u.target->destroy(t->data, t->target_size); - module_put(t->u.target->me); + par.target = t->u.target; + par.targinfo = t->data; + par.family = NFPROTO_BRIDGE; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -605,10 +618,12 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, struct ebt_cl_stack *cl_s, unsigned int udc_cnt) { struct ebt_entry_target *t; - struct ebt_target *target; + struct xt_target *target; unsigned int i, j, hook = 0, hookmask = 0; size_t gap; int ret; + struct xt_mtchk_param mtpar; + struct xt_tgchk_param tgpar; /* don't mess with the struct ebt_entries */ if (e->bitmask == 0) @@ -649,24 +664,31 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, hookmask = cl_s[i - 1].hookmask; } i = 0; - ret = EBT_MATCH_ITERATE(e, ebt_check_match, e, name, hookmask, &i); + + mtpar.table = tgpar.table = name; + mtpar.entryinfo = tgpar.entryinfo = e; + mtpar.hook_mask = tgpar.hook_mask = hookmask; + mtpar.family = tgpar.family = NFPROTO_BRIDGE; + ret = EBT_MATCH_ITERATE(e, ebt_check_match, &mtpar, &i); if (ret != 0) goto cleanup_matches; j = 0; - ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, e, name, hookmask, &j); + ret = EBT_WATCHER_ITERATE(e, ebt_check_watcher, &tgpar, &j); if (ret != 0) goto cleanup_watchers; t = (struct ebt_entry_target *)(((char *)e) + e->target_offset); gap = e->next_offset - e->target_offset; - target = find_target_lock(t->u.name, &ret, &ebt_mutex); - if (!target) + + target = try_then_request_module( + xt_find_target(NFPROTO_BRIDGE, t->u.name, 0), + "ebt_%s", t->u.name); + if (IS_ERR(target)) { + ret = PTR_ERR(target); goto cleanup_watchers; - if (!try_module_get(target->me)) { - mutex_unlock(&ebt_mutex); + } else if (target == NULL) { ret = -ENOENT; goto cleanup_watchers; } - mutex_unlock(&ebt_mutex); t->u.target = target; if (t->u.target == &ebt_standard_target) { @@ -681,13 +703,20 @@ ebt_check_entry(struct ebt_entry *e, struct ebt_table_info *newinfo, ret = -EFAULT; goto cleanup_watchers; } - } else if (t->target_size > gap - sizeof(struct ebt_entry_target) || - (t->u.target->check && - t->u.target->check(name, hookmask, e, t->data, t->target_size) != 0)){ + } else if (t->target_size > gap - sizeof(struct ebt_entry_target)) { module_put(t->u.target->me); ret = -EFAULT; goto cleanup_watchers; } + + tgpar.target = target; + tgpar.targinfo = t->data; + ret = xt_check_target(&tgpar, t->target_size, + e->ethproto, e->invflags & EBT_IPROTO); + if (ret < 0) { + module_put(target->me); + goto cleanup_watchers; + } (*cnt)++; return 0; cleanup_watchers: @@ -1068,87 +1097,6 @@ free_newinfo: return ret; } -int ebt_register_target(struct ebt_target *target) -{ - struct ebt_target *t; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(t, &ebt_targets, list) { - if (strcmp(t->name, target->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&target->list, &ebt_targets); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_target(struct ebt_target *target) -{ - mutex_lock(&ebt_mutex); - list_del(&target->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_match(struct ebt_match *match) -{ - struct ebt_match *m; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(m, &ebt_matches, list) { - if (strcmp(m->name, match->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&match->list, &ebt_matches); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_match(struct ebt_match *match) -{ - mutex_lock(&ebt_mutex); - list_del(&match->list); - mutex_unlock(&ebt_mutex); -} - -int ebt_register_watcher(struct ebt_watcher *watcher) -{ - struct ebt_watcher *w; - int ret; - - ret = mutex_lock_interruptible(&ebt_mutex); - if (ret != 0) - return ret; - list_for_each_entry(w, &ebt_watchers, list) { - if (strcmp(w->name, watcher->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } - } - list_add(&watcher->list, &ebt_watchers); - mutex_unlock(&ebt_mutex); - - return 0; -} - -void ebt_unregister_watcher(struct ebt_watcher *watcher) -{ - mutex_lock(&ebt_mutex); - list_del(&watcher->list); - mutex_unlock(&ebt_mutex); -} - int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; @@ -1518,11 +1466,14 @@ static int __init ebtables_init(void) { int ret; - mutex_lock(&ebt_mutex); - list_add(&ebt_standard_target.list, &ebt_targets); - mutex_unlock(&ebt_mutex); - if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) + ret = xt_register_target(&ebt_standard_target); + if (ret < 0) + return ret; + ret = nf_register_sockopt(&ebt_sockopts); + if (ret < 0) { + xt_unregister_target(&ebt_standard_target); return ret; + } printk(KERN_INFO "Ebtables v2.0 registered\n"); return 0; @@ -1531,17 +1482,12 @@ static int __init ebtables_init(void) static void __exit ebtables_fini(void) { nf_unregister_sockopt(&ebt_sockopts); + xt_unregister_target(&ebt_standard_target); printk(KERN_INFO "Ebtables v2.0 unregistered\n"); } EXPORT_SYMBOL(ebt_register_table); EXPORT_SYMBOL(ebt_unregister_table); -EXPORT_SYMBOL(ebt_register_match); -EXPORT_SYMBOL(ebt_unregister_match); -EXPORT_SYMBOL(ebt_register_watcher); -EXPORT_SYMBOL(ebt_unregister_watcher); -EXPORT_SYMBOL(ebt_register_target); -EXPORT_SYMBOL(ebt_unregister_target); EXPORT_SYMBOL(ebt_do_table); module_init(ebtables_init); module_exit(ebtables_fini); diff --git a/net/can/af_can.c b/net/can/af_can.c index 8035fbf526ae..7d4d2b3c137e 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -128,8 +128,8 @@ static int can_create(struct net *net, struct socket *sock, int protocol) if (net != &init_net) return -EAFNOSUPPORT; -#ifdef CONFIG_KMOD - /* try to load protocol module, when CONFIG_KMOD is defined */ +#ifdef CONFIG_MODULES + /* try to load protocol module kernel is modular */ if (!proto_tab[protocol]) { err = request_module("can-proto-%d", protocol); diff --git a/net/compat.c b/net/compat.c index 67fb6a3834a3..a3a2ba0fac08 100644 --- a/net/compat.c +++ b/net/compat.c @@ -226,14 +226,14 @@ int put_cmsg_compat(struct msghdr *kmsg, int level, int type, int len, void *dat return 0; /* XXX: return error? check spec. */ } - if (level == SOL_SOCKET && type == SO_TIMESTAMP) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMP) { struct timeval *tv = (struct timeval *)data; ctv.tv_sec = tv->tv_sec; ctv.tv_usec = tv->tv_usec; data = &ctv; len = sizeof(ctv); } - if (level == SOL_SOCKET && type == SO_TIMESTAMPNS) { + if (level == SOL_SOCKET && type == SCM_TIMESTAMPNS) { struct timespec *ts = (struct timespec *)data; cts.tv_sec = ts->tv_sec; cts.tv_nsec = ts->tv_nsec; @@ -725,7 +725,7 @@ EXPORT_SYMBOL(compat_mc_getsockopt); static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6)}; + AL(4)}; #undef AL asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags) @@ -738,52 +738,13 @@ asmlinkage long compat_sys_recvmsg(int fd, struct compat_msghdr __user *msg, uns return sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT); } -asmlinkage long compat_sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const compat_sigset_t __user *sigmask, - compat_size_t sigsetsize, int flags) -{ - compat_sigset_t ss32; - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - if (copy_from_user(&ss32, sigmask, sizeof(ss32))) - return -EFAULT; - sigset_from_compat(&ksigmask, &ss32); - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret == -ERESTARTNOHAND) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} - asmlinkage long compat_sys_socketcall(int call, u32 __user *args) { int ret; u32 a[6]; u32 a0, a1; - if (call < SYS_SOCKET || call > SYS_PACCEPT) + if (call < SYS_SOCKET || call > SYS_ACCEPT4) return -EINVAL; if (copy_from_user(a, args, nas[call])) return -EFAULT; @@ -804,7 +765,7 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) ret = sys_listen(a0, a1); break; case SYS_ACCEPT: - ret = do_accept(a0, compat_ptr(a1), compat_ptr(a[2]), 0); + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), 0); break; case SYS_GETSOCKNAME: ret = sys_getsockname(a0, compat_ptr(a1), compat_ptr(a[2])); @@ -844,9 +805,8 @@ asmlinkage long compat_sys_socketcall(int call, u32 __user *args) case SYS_RECVMSG: ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]); break; - case SYS_PACCEPT: - ret = compat_sys_paccept(a0, compat_ptr(a1), compat_ptr(a[2]), - compat_ptr(a[3]), a[4], a[5]); + case SYS_ACCEPT4: + ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]); break; default: ret = -EINVAL; diff --git a/net/core/Makefile b/net/core/Makefile index b1332f6d0042..26a37cb31923 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -6,6 +6,7 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o +obj-$(CONFIG_HAS_DMA) += skb_dma_map.o obj-y += dev.o ethtool.o dev_mcast.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o diff --git a/net/core/datagram.c b/net/core/datagram.c index 52f577a0f544..ee631843c2f5 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -9,7 +9,7 @@ * identical recvmsg() code. So we share it here. The poll was * shared before but buried in udp.c so I moved it. * - * Authors: Alan Cox <alan@redhat.com>. (datagram_poll() from old + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old * udp.c code) * * Fixes: diff --git a/net/core/dev.c b/net/core/dev.c index e8eb2b478344..9174c77d3112 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -891,7 +891,7 @@ int dev_alloc_name(struct net_device *dev, const char *name) * Change name of a device, can pass format strings "eth%d". * for wildcarding. */ -int dev_change_name(struct net_device *dev, char *newname) +int dev_change_name(struct net_device *dev, const char *newname) { char oldname[IFNAMSIZ]; int err = 0; @@ -917,7 +917,6 @@ int dev_change_name(struct net_device *dev, char *newname) err = dev_alloc_name(dev, newname); if (err < 0) return err; - strcpy(newname, dev->name); } else if (__dev_get_by_name(net, newname)) return -EEXIST; @@ -925,10 +924,10 @@ int dev_change_name(struct net_device *dev, char *newname) strlcpy(dev->name, newname, IFNAMSIZ); rollback: - err = device_rename(&dev->dev, dev->name); - if (err) { + ret = device_rename(&dev->dev, dev->name); + if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); - return err; + return ret; } write_lock_bh(&dev_base_lock); @@ -955,6 +954,38 @@ rollback: } /** + * dev_set_alias - change ifalias of a device + * @dev: device + * @alias: name up to IFALIASZ + * @len: limit of bytes to copy from info + * + * Set ifalias for a device, + */ +int dev_set_alias(struct net_device *dev, const char *alias, size_t len) +{ + ASSERT_RTNL(); + + if (len >= IFALIASZ) + return -EINVAL; + + if (!len) { + if (dev->ifalias) { + kfree(dev->ifalias); + dev->ifalias = NULL; + } + return 0; + } + + dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL); + if (!dev->ifalias) + return -ENOMEM; + + strlcpy(dev->ifalias, alias, len+1); + return len; +} + + +/** * netdev_features_change - device changes features * @dev: device to cause notification * @@ -1676,14 +1707,14 @@ static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb) } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) ip_proto = ip_hdr(skb)->protocol; addr1 = ip_hdr(skb)->saddr; addr2 = ip_hdr(skb)->daddr; ihl = ip_hdr(skb)->ihl; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ip_proto = ipv6_hdr(skb)->nexthdr; addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; @@ -2187,6 +2218,9 @@ int netif_receive_skb(struct sk_buff *skb) int ret = NET_RX_DROP; __be16 type; + if (skb->vlan_tci && vlan_hwaccel_do_receive(skb)) + return NET_RX_SUCCESS; + /* if we've gotten here through NAPI, check netpoll */ if (netpoll_receive_skb(skb)) return NET_RX_DROP; @@ -2918,6 +2952,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) return 0; } +static void dev_change_rx_flags(struct net_device *dev, int flags) +{ + if (dev->flags & IFF_UP && dev->change_rx_flags) + dev->change_rx_flags(dev, flags); +} + static int __dev_set_promiscuity(struct net_device *dev, int inc) { unsigned short old_flags = dev->flags; @@ -2955,8 +2995,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc) current->uid, current->gid, audit_get_sessionid(current)); - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_PROMISC); + dev_change_rx_flags(dev, IFF_PROMISC); } return 0; } @@ -3022,8 +3061,7 @@ int dev_set_allmulti(struct net_device *dev, int inc) } } if (dev->flags ^ old_flags) { - if (dev->change_rx_flags) - dev->change_rx_flags(dev, IFF_ALLMULTI); + dev_change_rx_flags(dev, IFF_ALLMULTI); dev_set_rx_mode(dev); } return 0; @@ -3302,6 +3340,12 @@ static void dev_addr_discard(struct net_device *dev) netif_addr_unlock_bh(dev); } +/** + * dev_get_flags - get flags reported to userspace + * @dev: device + * + * Get the combination of flag bits exported through APIs to userspace. + */ unsigned dev_get_flags(const struct net_device *dev) { unsigned flags; @@ -3326,6 +3370,14 @@ unsigned dev_get_flags(const struct net_device *dev) return flags; } +/** + * dev_change_flags - change device settings + * @dev: device + * @flags: device state flags + * + * Change settings on device based state flags. The flags are + * in the userspace exported format. + */ int dev_change_flags(struct net_device *dev, unsigned flags) { int ret, changes; @@ -3347,8 +3399,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags) * Load in the correct multicast list now the flags have changed. */ - if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST) - dev->change_rx_flags(dev, IFF_MULTICAST); + if ((old_flags ^ flags) & IFF_MULTICAST) + dev_change_rx_flags(dev, IFF_MULTICAST); dev_set_rx_mode(dev); @@ -3395,6 +3447,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags) return ret; } +/** + * dev_set_mtu - Change maximum transfer unit + * @dev: device + * @new_mtu: new transfer unit + * + * Change the maximum transfer size of the network device. + */ int dev_set_mtu(struct net_device *dev, int new_mtu) { int err; @@ -3419,6 +3478,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) return err; } +/** + * dev_set_mac_address - Change Media Access Control Address + * @dev: device + * @sa: new address + * + * Change the hardware (MAC) address of the device + */ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) { int err; @@ -3808,14 +3874,11 @@ static int dev_new_index(struct net *net) } /* Delayed registration/unregisteration */ -static DEFINE_SPINLOCK(net_todo_list_lock); static LIST_HEAD(net_todo_list); static void net_set_todo(struct net_device *dev) { - spin_lock(&net_todo_list_lock); list_add_tail(&dev->todo_list, &net_todo_list); - spin_unlock(&net_todo_list_lock); } static void rollback_registered(struct net_device *dev) @@ -3887,6 +3950,46 @@ static void netdev_init_queue_locks(struct net_device *dev) __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL); } +unsigned long netdev_fix_features(unsigned long features, const char *name) +{ + /* Fix illegal SG+CSUM combinations. */ + if ((features & NETIF_F_SG) && + !(features & NETIF_F_ALL_CSUM)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no " + "checksum feature.\n", name); + features &= ~NETIF_F_SG; + } + + /* TSO requires that SG is present as well. */ + if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) { + if (name) + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no " + "SG feature.\n", name); + features &= ~NETIF_F_TSO; + } + + if (features & NETIF_F_UFO) { + if (!(features & NETIF_F_GEN_CSUM)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_HW_CSUM feature.\n", + name); + features &= ~NETIF_F_UFO; + } + + if (!(features & NETIF_F_SG)) { + if (name) + printk(KERN_ERR "%s: Dropping NETIF_F_UFO " + "since no NETIF_F_SG feature.\n", name); + features &= ~NETIF_F_UFO; + } + } + + return features; +} +EXPORT_SYMBOL(netdev_fix_features); + /** * register_netdevice - register a network device * @dev: device to register @@ -3972,36 +4075,7 @@ int register_netdevice(struct net_device *dev) dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM); } - - /* Fix illegal SG+CSUM combinations. */ - if ((dev->features & NETIF_F_SG) && - !(dev->features & NETIF_F_ALL_CSUM)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", - dev->name); - dev->features &= ~NETIF_F_SG; - } - - /* TSO requires that SG is present as well. */ - if ((dev->features & NETIF_F_TSO) && - !(dev->features & NETIF_F_SG)) { - printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_TSO; - } - if (dev->features & NETIF_F_UFO) { - if (!(dev->features & NETIF_F_HW_CSUM)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_HW_CSUM feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - if (!(dev->features & NETIF_F_SG)) { - printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " - "NETIF_F_SG feature.\n", - dev->name); - dev->features &= ~NETIF_F_UFO; - } - } + dev->features = netdev_fix_features(dev->features, dev->name); /* Enable software GSO if SG is supported. */ if (dev->features & NETIF_F_SG) @@ -4142,33 +4216,24 @@ static void netdev_wait_allrefs(struct net_device *dev) * free_netdev(y1); * free_netdev(y2); * - * We are invoked by rtnl_unlock() after it drops the semaphore. + * We are invoked by rtnl_unlock(). * This allows us to deal with problems: * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. + * + * We must not return until all unregister events added during + * the interval the lock was held have been completed. */ -static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list; - /* Need to guard against multiple cpu's getting out of order. */ - mutex_lock(&net_todo_run_mutex); - - /* Not safe to do outside the semaphore. We must not return - * until all unregister events invoked by the local processor - * have been completed (either by this todo run, or one on - * another cpu). - */ - if (list_empty(&net_todo_list)) - goto out; - /* Snapshot list, allow later requests */ - spin_lock(&net_todo_list_lock); list_replace_init(&net_todo_list, &list); - spin_unlock(&net_todo_list_lock); + + __rtnl_unlock(); while (!list_empty(&list)) { struct net_device *dev @@ -4200,9 +4265,6 @@ void netdev_run_todo(void) /* Free network device */ kobject_put(&dev->dev.kobj); } - -out: - mutex_unlock(&net_todo_run_mutex); } static struct net_device_stats *internal_stats(struct net_device *dev) @@ -4322,7 +4384,12 @@ void free_netdev(struct net_device *dev) put_device(&dev->dev); } -/* Synchronize with packet receive processing. */ +/** + * synchronize_net - Synchronize with packet receive processing + * + * Wait for packets currently being received to be done. + * Does not block later packets from starting. + */ void synchronize_net(void) { might_sleep(); @@ -4624,7 +4691,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan, } /** - * netdev_dma_regiser - register the networking subsystem as a DMA client + * netdev_dma_register - register the networking subsystem as a DMA client */ static int __init netdev_dma_register(void) { @@ -4647,43 +4714,45 @@ static int __init netdev_dma_register(void) { return -ENODEV; } #endif /* CONFIG_NET_DMA */ /** - * netdev_compute_feature - compute conjunction of two feature sets - * @all: first feature set - * @one: second feature set + * netdev_increment_features - increment feature set by one + * @all: current feature set + * @one: new feature set + * @mask: mask feature set * * Computes a new feature set after adding a device with feature set - * @one to the master device with current feature set @all. Returns - * the new feature set. + * @one to the master device with current feature set @all. Will not + * enable anything that is off in @mask. Returns the new feature set. */ -int netdev_compute_features(unsigned long all, unsigned long one) -{ - /* if device needs checksumming, downgrade to hw checksumming */ - if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) - all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - - /* if device can't do all checksum, downgrade to ipv4/ipv6 */ - if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM)) - all ^= NETIF_F_HW_CSUM - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; - - if (one & NETIF_F_GSO) - one |= NETIF_F_GSO_SOFTWARE; - one |= NETIF_F_GSO; +unsigned long netdev_increment_features(unsigned long all, unsigned long one, + unsigned long mask) +{ + /* If device needs checksumming, downgrade to it. */ + if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM)) + all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM); + else if (mask & NETIF_F_ALL_CSUM) { + /* If one device supports v4/v6 checksumming, set for all. */ + if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) && + !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + } - /* If even one device supports robust GSO, enable it for all. */ - if (one & NETIF_F_GSO_ROBUST) - all |= NETIF_F_GSO_ROBUST; + /* If one device supports hw checksumming, set for all. */ + if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) { + all &= ~NETIF_F_ALL_CSUM; + all |= NETIF_F_HW_CSUM; + } + } - all &= one | NETIF_F_LLTX; + one |= NETIF_F_ALL_CSUM; - if (!(all & NETIF_F_ALL_CSUM)) - all &= ~NETIF_F_SG; - if (!(all & NETIF_F_SG)) - all &= ~NETIF_F_GSO_MASK; + one |= all & NETIF_F_ONE_FOR_ALL; + all &= one | NETIF_F_LLTX | NETIF_F_GSO; + all |= one & mask & NETIF_F_ONE_FOR_ALL; return all; } -EXPORT_SYMBOL(netdev_compute_features); +EXPORT_SYMBOL(netdev_increment_features); static struct hlist_head *netdev_create_hash(void) { @@ -4719,10 +4788,18 @@ err_name: return -ENOMEM; } -char *netdev_drivername(struct net_device *dev, char *buffer, int len) +/** + * netdev_drivername - network driver for the device + * @dev: network device + * @buffer: buffer for resulting name + * @len: size of buffer + * + * Determine network driver for device. + */ +char *netdev_drivername(const struct net_device *dev, char *buffer, int len) { - struct device_driver *driver; - struct device *parent; + const struct device_driver *driver; + const struct device *parent; if (len <= 0 || !buffer) return buffer; @@ -4889,8 +4966,6 @@ EXPORT_SYMBOL(br_fdb_get_hook); EXPORT_SYMBOL(br_fdb_put_hook); #endif -#ifdef CONFIG_KMOD EXPORT_SYMBOL(dev_load); -#endif EXPORT_PER_CPU_SYMBOL(softnet_data); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 5402b3b38e0d..9e2fa39f22a3 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -6,7 +6,7 @@ * Richard Underwood <richard@wuzz.demon.co.uk> * * Stir fried together from the IP multicast and CAP patches above - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Update the device on a real delete diff --git a/net/core/dst.c b/net/core/dst.c index fe03266130b6..09c1530f4681 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -203,6 +203,7 @@ void __dst_free(struct dst_entry * dst) if (dst_garbage.timer_inc > DST_GC_INC) { dst_garbage.timer_inc = DST_GC_INC; dst_garbage.timer_expires = DST_GC_MIN; + cancel_delayed_work(&dst_gc_work); schedule_delayed_work(&dst_gc_work, dst_garbage.timer_expires); } spin_unlock_bh(&dst_garbage.lock); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 9d92e41826e7..1dc728b38589 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -927,8 +927,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) if (skb_queue_len(&neigh->arp_queue) >= neigh->parms->queue_len) { struct sk_buff *buff; - buff = neigh->arp_queue.next; - __skb_unlink(buff, &neigh->arp_queue); + buff = __skb_dequeue(&neigh->arp_queue); kfree_skb(buff); NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); } @@ -1259,24 +1258,20 @@ static void neigh_proxy_process(unsigned long arg) struct neigh_table *tbl = (struct neigh_table *)arg; long sched_next = 0; unsigned long now = jiffies; - struct sk_buff *skb; + struct sk_buff *skb, *n; spin_lock(&tbl->proxy_queue.lock); - skb = tbl->proxy_queue.next; - - while (skb != (struct sk_buff *)&tbl->proxy_queue) { - struct sk_buff *back = skb; - long tdif = NEIGH_CB(back)->sched_next - now; + skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { + long tdif = NEIGH_CB(skb)->sched_next - now; - skb = skb->next; if (tdif <= 0) { - struct net_device *dev = back->dev; - __skb_unlink(back, &tbl->proxy_queue); + struct net_device *dev = skb->dev; + __skb_unlink(skb, &tbl->proxy_queue); if (tbl->proxy_redo && netif_running(dev)) - tbl->proxy_redo(back); + tbl->proxy_redo(skb); else - kfree_skb(back); + kfree_skb(skb); dev_put(dev); } else if (!sched_next || tdif < sched_next) diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c1f4e0d428c0..92d6b9467314 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -209,9 +209,44 @@ static ssize_t store_tx_queue_len(struct device *dev, return netdev_store(dev, attr, buf, len, change_tx_queue_len); } +static ssize_t store_ifalias(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct net_device *netdev = to_net_dev(dev); + size_t count = len; + ssize_t ret; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + /* ignore trailing newline */ + if (len > 0 && buf[len - 1] == '\n') + --count; + + rtnl_lock(); + ret = dev_set_alias(netdev, buf, count); + rtnl_unlock(); + + return ret < 0 ? ret : len; +} + +static ssize_t show_ifalias(struct device *dev, + struct device_attribute *attr, char *buf) +{ + const struct net_device *netdev = to_net_dev(dev); + ssize_t ret = 0; + + rtnl_lock(); + if (netdev->ifalias) + ret = sprintf(buf, "%s\n", netdev->ifalias); + rtnl_unlock(); + return ret; +} + static struct device_attribute net_class_attributes[] = { __ATTR(addr_len, S_IRUGO, show_addr_len, NULL), __ATTR(dev_id, S_IRUGO, show_dev_id, NULL), + __ATTR(ifalias, S_IRUGO | S_IWUSR, show_ifalias, store_ifalias), __ATTR(iflink, S_IRUGO, show_iflink, NULL), __ATTR(ifindex, S_IRUGO, show_ifindex, NULL), __ATTR(features, S_IRUGO, show_features, NULL), @@ -418,6 +453,7 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); + kfree(dev->ifalias); kfree((char *)dev - dev->padded); } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 7c52fe277b62..1895a4ca9c4f 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,6 +18,7 @@ static struct list_head *first_device = &pernet_list; static DEFINE_MUTEX(net_mutex); LIST_HEAD(net_namespace_list); +EXPORT_SYMBOL_GPL(net_namespace_list); struct net init_net; EXPORT_SYMBOL(init_net); @@ -95,7 +96,7 @@ static void net_free(struct net *net) return; } #endif - + kfree(net->gen); kmem_cache_free(net_cachep, net); } @@ -324,6 +325,38 @@ void unregister_pernet_subsys(struct pernet_operations *module) } EXPORT_SYMBOL_GPL(unregister_pernet_subsys); +int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) +{ + int rv; + + mutex_lock(&net_mutex); +again: + rv = ida_get_new_above(&net_generic_ids, 1, id); + if (rv < 0) { + if (rv == -EAGAIN) { + ida_pre_get(&net_generic_ids, GFP_KERNEL); + goto again; + } + goto out; + } + rv = register_pernet_operations(first_device, ops); + if (rv < 0) + ida_remove(&net_generic_ids, *id); + mutex_unlock(&net_mutex); +out: + return rv; +} +EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); + +void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) +{ + mutex_lock(&net_mutex); + unregister_pernet_operations(ops); + ida_remove(&net_generic_ids, id); + mutex_unlock(&net_mutex); +} +EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); + /** * register_pernet_device - register a network namespace device * @ops: pernet operations structure for the subsystem diff --git a/net/core/pktgen.c b/net/core/pktgen.c index a756847e3814..8997e912aaaf 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1973,28 +1973,21 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) /* make sure that we don't pick a non-existing transmit queue */ ntxq = pkt_dev->odev->real_num_tx_queues; - if (ntxq <= num_online_cpus() && (pkt_dev->flags & F_QUEUE_MAP_CPU)) { - printk(KERN_WARNING "pktgen: WARNING: QUEUE_MAP_CPU " - "disabled because CPU count (%d) exceeds number ", - num_online_cpus()); - printk(KERN_WARNING "pktgen: WARNING: of tx queues " - "(%d) on %s \n", ntxq, pkt_dev->odev->name); - pkt_dev->flags &= ~F_QUEUE_MAP_CPU; - } + if (ntxq <= pkt_dev->queue_map_min) { printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_min (%d) exceeds number of tx\n", - pkt_dev->queue_map_min); - printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " - "%s, resetting\n", ntxq, pkt_dev->odev->name); + "queue_map_min (zero-based) (%d) exceeds valid range " + "[0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_min, (ntxq ?: 1)- 1, ntxq, + pkt_dev->odev->name); pkt_dev->queue_map_min = ntxq - 1; } - if (ntxq <= pkt_dev->queue_map_max) { + if (pkt_dev->queue_map_max >= ntxq) { printk(KERN_WARNING "pktgen: WARNING: Requested " - "queue_map_max (%d) exceeds number of tx\n", - pkt_dev->queue_map_max); - printk(KERN_WARNING "pktgen: WARNING: queues (%d) on " - "%s, resetting\n", ntxq, pkt_dev->odev->name); + "queue_map_max (zero-based) (%d) exceeds valid range " + "[0 - %d] for (%d) queues on %s, resetting\n", + pkt_dev->queue_map_max, (ntxq ?: 1)- 1, ntxq, + pkt_dev->odev->name); pkt_dev->queue_map_max = ntxq - 1; } @@ -2203,6 +2196,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev) } pkt_dev->cur_queue_map = t; } + pkt_dev->cur_queue_map = pkt_dev->cur_queue_map % pkt_dev->odev->real_num_tx_queues; } /* Increment/randomize headers according to flags and current values @@ -2474,7 +2468,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret < 0) { printk(KERN_ERR "Error expanding " "ipsec packet %d\n",ret); - return 0; + goto err; } } @@ -2484,8 +2478,7 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, if (ret) { printk(KERN_ERR "Error creating ipsec " "packet %d\n",ret); - kfree_skb(skb); - return 0; + goto err; } /* restore ll */ eth = (__u8 *) skb_push(skb, ETH_HLEN); @@ -2494,6 +2487,9 @@ static inline int process_ipsec(struct pktgen_dev *pkt_dev, } } return 1; +err: + kfree_skb(skb); + return 0; } #endif diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 71edb8b36341..4dfb6b4d4559 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -73,7 +73,7 @@ void __rtnl_unlock(void) void rtnl_unlock(void) { - mutex_unlock(&rtnl_mutex); + /* This fellow will unlock it for us. */ netdev_run_todo(); } @@ -586,6 +586,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(IFALIASZ) /* IFLA_IFALIAS */ + nla_total_size(IFNAMSIZ) /* IFLA_QDISC */ + nla_total_size(sizeof(struct rtnl_link_ifmap)) + nla_total_size(sizeof(struct rtnl_link_stats)) @@ -640,6 +641,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (txq->qdisc_sleeping) NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->ifalias) + NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); + if (1) { struct rtnl_link_ifmap map = { .mem_start = dev->mem_start, @@ -713,6 +717,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKMODE] = { .type = NLA_U8 }, [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, + [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -853,6 +858,14 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, modified = 1; } + if (tb[IFLA_IFALIAS]) { + err = dev_set_alias(dev, nla_data(tb[IFLA_IFALIAS]), + nla_len(tb[IFLA_IFALIAS])); + if (err < 0) + goto errout; + modified = 1; + } + if (tb[IFLA_BROADCAST]) { nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); send_addr_notify = 1; @@ -865,7 +878,9 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm, if (ifm->ifi_change) flags = (flags & ifm->ifi_change) | (dev->flags & ~ifm->ifi_change); - dev_change_flags(dev, flags); + err = dev_change_flags(dev, flags); + if (err < 0) + goto errout; } if (tb[IFLA_TXQLEN]) @@ -1027,7 +1042,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct nlattr *linkinfo[IFLA_INFO_MAX+1]; int err; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES replay: #endif err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); @@ -1116,7 +1131,7 @@ replay: return -EOPNOTSUPP; if (!ops) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (kind[0]) { __rtnl_unlock(); request_module("rtnl-link-%s", kind); diff --git a/net/core/scm.c b/net/core/scm.c index 10f5c65f6a47..b12303dd39d9 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -106,9 +106,25 @@ void __scm_destroy(struct scm_cookie *scm) if (fpl) { scm->fp = NULL; - for (i=fpl->count-1; i>=0; i--) - fput(fpl->fp[i]); - kfree(fpl); + if (current->scm_work_list) { + list_add_tail(&fpl->list, current->scm_work_list); + } else { + LIST_HEAD(work_list); + + current->scm_work_list = &work_list; + + list_add(&fpl->list, &work_list); + while (!list_empty(&work_list)) { + fpl = list_first_entry(&work_list, struct scm_fp_list, list); + + list_del(&fpl->list); + for (i=fpl->count-1; i>=0; i--) + fput(fpl->fp[i]); + kfree(fpl); + } + + current->scm_work_list = NULL; + } } } diff --git a/net/core/skb_dma_map.c b/net/core/skb_dma_map.c new file mode 100644 index 000000000000..86234923a3b7 --- /dev/null +++ b/net/core/skb_dma_map.c @@ -0,0 +1,66 @@ +/* skb_dma_map.c: DMA mapping helpers for socket buffers. + * + * Copyright (C) David S. Miller <davem@davemloft.net> + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/dma-mapping.h> +#include <linux/skbuff.h> + +int skb_dma_map(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + dma_addr_t map; + int i; + + map = dma_map_single(dev, skb->data, + skb_headlen(skb), dir); + if (dma_mapping_error(dev, map)) + goto out_err; + + sp->dma_maps[0] = map; + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + map = dma_map_page(dev, fp->page, fp->page_offset, + fp->size, dir); + if (dma_mapping_error(dev, map)) + goto unwind; + sp->dma_maps[i + 1] = map; + } + sp->num_dma_maps = i + 1; + + return 0; + +unwind: + while (--i >= 0) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); +out_err: + return -ENOMEM; +} +EXPORT_SYMBOL(skb_dma_map); + +void skb_dma_unmap(struct device *dev, struct sk_buff *skb, + enum dma_data_direction dir) +{ + struct skb_shared_info *sp = skb_shinfo(skb); + int i; + + dma_unmap_single(dev, sp->dma_maps[0], + skb_headlen(skb), dir); + for (i = 0; i < sp->nr_frags; i++) { + skb_frag_t *fp = &sp->frags[i]; + + dma_unmap_page(dev, sp->dma_maps[i + 1], + fp->size, dir); + } +} +EXPORT_SYMBOL(skb_dma_unmap); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ca1ccdf1ef76..d49ef8301b5b 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1,7 +1,7 @@ /* * Routines having to do with the 'struct sk_buff' memory handlers. * - * Authors: Alan Cox <iiitac@pyr.swan.ac.uk> + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Florian La Roche <rzsfl@rz.uni-sb.de> * * Fixes: @@ -263,6 +263,26 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, return skb; } +struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask) +{ + int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1; + struct page *page; + + page = alloc_pages_node(node, gfp_mask, 0); + return page; +} +EXPORT_SYMBOL(__netdev_alloc_page); + +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, + int size) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += size; +} +EXPORT_SYMBOL(skb_add_rx_frag); + /** * dev_alloc_skb - allocate an skbuff for receiving * @length: length to allocate @@ -363,8 +383,7 @@ static void kfree_skbmem(struct sk_buff *skb) } } -/* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb) +static void skb_release_head_state(struct sk_buff *skb) { dst_release(skb->dst); #ifdef CONFIG_XFRM @@ -388,6 +407,12 @@ static void skb_release_all(struct sk_buff *skb) skb->tc_verd = 0; #endif #endif +} + +/* Free everything but the sk_buff shell. */ +static void skb_release_all(struct sk_buff *skb) +{ + skb_release_head_state(skb); skb_release_data(skb); } @@ -424,6 +449,50 @@ void kfree_skb(struct sk_buff *skb) __kfree_skb(skb); } +/** + * skb_recycle_check - check if skb can be reused for receive + * @skb: buffer + * @skb_size: minimum receive buffer size + * + * Checks that the skb passed in is not shared or cloned, and + * that it is linear and its head portion at least as large as + * skb_size so that it can be recycled as a receive buffer. + * If these conditions are met, this function does any necessary + * reference count dropping and cleans up the skbuff as if it + * just came from __alloc_skb(). + */ +int skb_recycle_check(struct sk_buff *skb, int skb_size) +{ + struct skb_shared_info *shinfo; + + if (skb_is_nonlinear(skb) || skb->fclone != SKB_FCLONE_UNAVAILABLE) + return 0; + + skb_size = SKB_DATA_ALIGN(skb_size + NET_SKB_PAD); + if (skb_end_pointer(skb) - skb->head < skb_size) + return 0; + + if (skb_shared(skb) || skb_cloned(skb)) + return 0; + + skb_release_head_state(skb); + shinfo = skb_shinfo(skb); + atomic_set(&shinfo->dataref, 1); + shinfo->nr_frags = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; + shinfo->ip6_frag_id = 0; + shinfo->frag_list = NULL; + + memset(skb, 0, offsetof(struct sk_buff, tail)); + skb->data = skb->head + NET_SKB_PAD; + skb_reset_tail_pointer(skb); + + return 1; +} +EXPORT_SYMBOL(skb_recycle_check); + static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old) { new->tstamp = old->tstamp; @@ -701,6 +770,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, #endif long off; + BUG_ON(nhead < 0); + if (skb_shared(skb)) BUG(); diff --git a/net/core/sock.c b/net/core/sock.c index 91f8bbc93526..341e39456952 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -136,7 +136,6 @@ static struct lock_class_key af_family_keys[AF_MAX]; static struct lock_class_key af_family_slock_keys[AF_MAX]; -#ifdef CONFIG_DEBUG_LOCK_ALLOC /* * Make lock validator output more readable. (we pre-construct these * strings build-time, so that runtime initialization of socket @@ -154,7 +153,8 @@ static const char *af_family_key_strings[AF_MAX+1] = { "sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" , "sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" , "sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" , - "sk_lock-AF_RXRPC" , "sk_lock-AF_MAX" + "sk_lock-AF_RXRPC" , "sk_lock-AF_ISDN" , "sk_lock-AF_PHONET" , + "sk_lock-AF_MAX" }; static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_UNSPEC", "slock-AF_UNIX" , "slock-AF_INET" , @@ -168,7 +168,8 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = { "slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" , "slock-27" , "slock-28" , "slock-AF_CAN" , "slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" , - "slock-AF_RXRPC" , "slock-AF_MAX" + "slock-AF_RXRPC" , "slock-AF_ISDN" , "slock-AF_PHONET" , + "slock-AF_MAX" }; static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_UNSPEC", "clock-AF_UNIX" , "clock-AF_INET" , @@ -182,9 +183,9 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = { "clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" , "clock-27" , "clock-28" , "clock-AF_CAN" , "clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" , - "clock-AF_RXRPC" , "clock-AF_MAX" + "clock-AF_RXRPC" , "clock-AF_ISDN" , "clock-AF_PHONET" , + "clock-AF_MAX" }; -#endif /* * sk_callback_lock locking rules are per-address-family, @@ -324,7 +325,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) */ mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_); - rc = sk->sk_backlog_rcv(sk, skb); + rc = sk_backlog_rcv(sk, skb); mutex_release(&sk->sk_lock.dep_map, 1, _RET_IP_); } else @@ -1371,7 +1372,7 @@ static void __release_sock(struct sock *sk) struct sk_buff *next = skb->next; skb->next = NULL; - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); /* * We are in process context here with softirqs diff --git a/net/core/stream.c b/net/core/stream.c index a6b3437ff082..8727cead64ad 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -9,7 +9,7 @@ * * Authors: Arnaldo Carvalho de Melo <acme@conectiva.com.br> * (from old tcp.c code) - * Alan Cox <alan@redhat.com> (Borrowed comments 8-)) + * Alan Cox <alan@lxorguk.ukuu.org.uk> (Borrowed comments 8-)) */ #include <linux/module.h> diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c index 4809753d12ae..8fe931a3d7a1 100644 --- a/net/dccp/ccid.c +++ b/net/dccp/ccid.c @@ -154,7 +154,7 @@ struct ccid *ccid_new(unsigned char id, struct sock *sk, int rx, gfp_t gfp) struct ccid *ccid = NULL; ccids_read_lock(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ccids[id] == NULL) { /* We only try to load if in process context */ ccids_read_unlock(); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8e9580874216..9a430734530c 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -783,7 +783,7 @@ static struct ccid_operations ccid2 = { }; #ifdef CONFIG_IP_DCCP_CCID2_DEBUG -module_param(ccid2_debug, bool, 0444); +module_param(ccid2_debug, bool, 0644); MODULE_PARM_DESC(ccid2_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index f6756e0c9e69..3b8bd7ca6761 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -963,7 +963,7 @@ static struct ccid_operations ccid3 = { }; #ifdef CONFIG_IP_DCCP_CCID3_DEBUG -module_param(ccid3_debug, bool, 0444); +module_param(ccid3_debug, bool, 0644); MODULE_PARM_DESC(ccid3_debug, "Enable debug messages"); #endif diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index bcd6ac415bb9..5b3ce0688c5c 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -67,7 +67,10 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) u32 i_i, i_tot0 = 0, i_tot1 = 0, w_tot = 0; int i, k = tfrc_lh_length(lh) - 1; /* k is as in rfc3448bis, 5.4 */ - for (i=0; i <= k; i++) { + if (k <= 0) + return; + + for (i = 0; i <= k; i++) { i_i = tfrc_lh_get_interval(lh, i); if (i < k) { @@ -78,7 +81,6 @@ static void tfrc_lh_calc_i_mean(struct tfrc_loss_hist *lh) i_tot1 += i_i * tfrc_lh_weights[i-1]; } - BUG_ON(w_tot == 0); lh->i_mean = max(i_tot0, i_tot1) / w_tot; } diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 97ecec0a8e76..185916218e07 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -10,7 +10,7 @@ #ifdef CONFIG_IP_DCCP_TFRC_DEBUG int tfrc_debug; -module_param(tfrc_debug, bool, 0444); +module_param(tfrc_debug, bool, 0644); MODULE_PARM_DESC(tfrc_debug, "Enable debug messages"); #endif diff --git a/net/dccp/input.c b/net/dccp/input.c index 803933ab396d..779d0ed9ae94 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -370,7 +370,7 @@ int dccp_rcv_established(struct sock *sk, struct sk_buff *skb, goto discard; if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); @@ -610,7 +610,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * Step 8: Process options and mark acknowledgeable */ if (dccp_parse_options(sk, NULL, skb)) - goto discard; + return 1; if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) dccp_event_ack_recv(sk, skb); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 882c5c4de69e..e3dfddab21cc 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -811,9 +811,8 @@ static int dccp_v4_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, inet_iif(skb)); + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 5e1ee0da2c40..d4ce1224e008 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -98,7 +98,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (skb->len < offset + sizeof(*dh) || skb->len < offset + __dccp_basic_hdr_len(dh)) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -107,7 +108,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, &hdr->saddr, dh->dccph_sport, inet6_iif(skb)); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -257,7 +259,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req) fl.fl6_flowlabel = 0; fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -556,7 +558,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, ipv6_addr_copy(&fl.fl6_src, &ireq6->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) @@ -805,10 +807,8 @@ static int dccp_v6_rcv(struct sk_buff *skb) /* Step 2: * Look up flow ID in table and get corresponding socket */ - sk = __inet6_lookup(dev_net(skb->dst->dev), &dccp_hashinfo, - &ipv6_hdr(skb)->saddr, dh->dccph_sport, - &ipv6_hdr(skb)->daddr, ntohs(dh->dccph_dport), - inet6_iif(skb)); + sk = __inet6_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); /* * Step 2: * If no socket ... diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index b2804e2d1b8c..e6bf99e3e41a 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -309,6 +309,7 @@ void dccp_reqsk_init(struct request_sock *req, struct sk_buff *skb) struct dccp_request_sock *dreq = dccp_rsk(req); inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport; + inet_rsk(req)->loc_port = dccp_hdr(skb)->dccph_dport; inet_rsk(req)->acked = 0; req->rcv_wnd = sysctl_dccp_feat_sequence_window; dreq->dreq_timestamp_echo = 0; diff --git a/net/dccp/options.c b/net/dccp/options.c index dc7c158a2f4b..0809b63cb055 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -81,11 +81,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, /* Check if this isn't a single byte option */ if (opt > DCCPO_MAX_RESERVED) { if (opt_ptr == opt_end) - goto out_invalid_option; + goto out_nonsensical_length; len = *opt_ptr++; - if (len < 3) - goto out_invalid_option; + if (len < 2) + goto out_nonsensical_length; /* * Remove the type and len fields, leaving * just the value size @@ -95,7 +95,7 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, opt_ptr += len; if (opt_ptr > opt_end) - goto out_invalid_option; + goto out_nonsensical_length; } /* @@ -283,12 +283,17 @@ ignore_option: if (mandatory) goto out_invalid_option; +out_nonsensical_length: + /* RFC 4340, 5.8: ignore option and all remaining option space */ return 0; out_invalid_option: DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT); DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR; DCCP_WARN("DCCP(%p): invalid option %d, len=%d", sk, opt, len); + DCCP_SKB_CB(skb)->dccpd_reset_data[0] = opt; + DCCP_SKB_CB(skb)->dccpd_reset_data[1] = len > 0 ? value[0] : 0; + DCCP_SKB_CB(skb)->dccpd_reset_data[2] = len > 1 ? value[1] : 0; return -1; } diff --git a/net/dccp/output.c b/net/dccp/output.c index d06945c7d3df..809d803d5006 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -347,7 +347,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst, /* Build and checksum header */ dh = dccp_zeroed_hdr(skb, dccp_header_size); - dh->dccph_sport = inet_sk(sk)->sport; + dh->dccph_sport = inet_rsk(req)->loc_port; dh->dccph_dport = inet_rsk(req)->rmt_port; dh->dccph_doff = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4; diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 1ca3b26eed0f..d0bd34819761 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -309,7 +309,9 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; @@ -1028,7 +1030,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG int dccp_debug; -module_param(dccp_debug, bool, 0444); +module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 2f0ac3c3eb71..28e26bd08e24 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -152,7 +152,7 @@ static struct dn_dev_parms dn_dev_list[] = { #define DN_DEV_LIST_SIZE ARRAY_SIZE(dn_dev_list) -#define DN_DEV_PARMS_OFFSET(x) ((int) ((char *) &((struct dn_dev_parms *)0)->x)) +#define DN_DEV_PARMS_OFFSET(x) offsetof(struct dn_dev_parms, x) #ifdef CONFIG_SYSCTL @@ -166,7 +166,7 @@ static int max_priority[] = { 127 }; /* From DECnet spec */ static int dn_forwarding_proc(ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen); @@ -318,7 +318,7 @@ static int dn_forwarding_proc(ctl_table *table, int write, #endif } -static int dn_forwarding_sysctl(ctl_table *table, int __user *name, int nlen, +static int dn_forwarding_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -490,9 +490,7 @@ int dn_dev_ioctl(unsigned int cmd, void __user *arg) return -EFAULT; ifr->ifr_name[IFNAMSIZ-1] = 0; -#ifdef CONFIG_KMOD dev_load(&init_net, ifr->ifr_name); -#endif switch(cmd) { case SIOCGIFADDR: diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 228067c571ba..36400b266896 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -132,7 +132,7 @@ static int parse_addr(__le16 *addr, char *str) } -static int dn_node_address_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_node_address_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -217,7 +217,7 @@ static int dn_node_address_handler(ctl_table *table, int write, } -static int dn_def_dev_strategy(ctl_table *table, int __user *name, int nlen, +static int dn_def_dev_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig new file mode 100644 index 000000000000..49211b35725b --- /dev/null +++ b/net/dsa/Kconfig @@ -0,0 +1,60 @@ +menuconfig NET_DSA + bool "Distributed Switch Architecture support" + default n + depends on EXPERIMENTAL && !S390 + select PHYLIB + ---help--- + This allows you to use hardware switch chips that use + the Distributed Switch Architecture. + + +if NET_DSA + +# tagging formats +config NET_DSA_TAG_DSA + bool + default n + +config NET_DSA_TAG_EDSA + bool + default n + +config NET_DSA_TAG_TRAILER + bool + default n + + +# switch drivers +config NET_DSA_MV88E6XXX + bool + default n + +config NET_DSA_MV88E6060 + bool "Marvell 88E6060 ethernet switch chip support" + select NET_DSA_TAG_TRAILER + ---help--- + This enables support for the Marvell 88E6060 ethernet switch + chip. + +config NET_DSA_MV88E6XXX_NEED_PPU + bool + default n + +config NET_DSA_MV88E6131 + bool "Marvell 88E6131 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_MV88E6XXX_NEED_PPU + select NET_DSA_TAG_DSA + ---help--- + This enables support for the Marvell 88E6131 ethernet switch + chip. + +config NET_DSA_MV88E6123_61_65 + bool "Marvell 88E6123/6161/6165 ethernet switch chip support" + select NET_DSA_MV88E6XXX + select NET_DSA_TAG_EDSA + ---help--- + This enables support for the Marvell 88E6123/6161/6165 + ethernet switch chips. + +endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile new file mode 100644 index 000000000000..2374faff4dea --- /dev/null +++ b/net/dsa/Makefile @@ -0,0 +1,13 @@ +# tagging formats +obj-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o +obj-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o +obj-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o + +# switch drivers +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o +obj-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o +obj-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o + +# the core +obj-$(CONFIG_NET_DSA) += dsa.o slave.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c new file mode 100644 index 000000000000..33e99462023a --- /dev/null +++ b/net/dsa/dsa.c @@ -0,0 +1,392 @@ +/* + * net/dsa/dsa.c - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/platform_device.h> +#include <net/dsa.h> +#include "dsa_priv.h" + +char dsa_driver_version[] = "0.1"; + + +/* switch driver registration ***********************************************/ +static DEFINE_MUTEX(dsa_switch_drivers_mutex); +static LIST_HEAD(dsa_switch_drivers); + +void register_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_add_tail(&drv->list, &dsa_switch_drivers); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +void unregister_switch_driver(struct dsa_switch_driver *drv) +{ + mutex_lock(&dsa_switch_drivers_mutex); + list_del_init(&drv->list); + mutex_unlock(&dsa_switch_drivers_mutex); +} + +static struct dsa_switch_driver * +dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) +{ + struct dsa_switch_driver *ret; + struct list_head *list; + char *name; + + ret = NULL; + name = NULL; + + mutex_lock(&dsa_switch_drivers_mutex); + list_for_each(list, &dsa_switch_drivers) { + struct dsa_switch_driver *drv; + + drv = list_entry(list, struct dsa_switch_driver, list); + + name = drv->probe(bus, sw_addr); + if (name != NULL) { + ret = drv; + break; + } + } + mutex_unlock(&dsa_switch_drivers_mutex); + + *_name = name; + + return ret; +} + + +/* basic switch operations **************************************************/ +static struct dsa_switch * +dsa_switch_setup(struct device *parent, struct dsa_platform_data *pd, + struct mii_bus *bus, struct net_device *dev) +{ + struct dsa_switch *ds; + int ret; + struct dsa_switch_driver *drv; + char *name; + int i; + + /* + * Probe for switch model. + */ + drv = dsa_switch_probe(bus, pd->sw_addr, &name); + if (drv == NULL) { + printk(KERN_ERR "%s: could not detect attached switch\n", + dev->name); + return ERR_PTR(-EINVAL); + } + printk(KERN_INFO "%s: detected a %s switch\n", dev->name, name); + + + /* + * Allocate and initialise switch state. + */ + ds = kzalloc(sizeof(*ds) + drv->priv_size, GFP_KERNEL); + if (ds == NULL) + return ERR_PTR(-ENOMEM); + + ds->pd = pd; + ds->master_netdev = dev; + ds->master_mii_bus = bus; + + ds->drv = drv; + ds->tag_protocol = drv->tag_protocol; + + + /* + * Validate supplied switch configuration. + */ + ds->cpu_port = -1; + for (i = 0; i < DSA_MAX_PORTS; i++) { + char *name; + + name = pd->port_names[i]; + if (name == NULL) + continue; + + if (!strcmp(name, "cpu")) { + if (ds->cpu_port != -1) { + printk(KERN_ERR "multiple cpu ports?!\n"); + ret = -EINVAL; + goto out; + } + ds->cpu_port = i; + } else { + ds->valid_port_mask |= 1 << i; + } + } + + if (ds->cpu_port == -1) { + printk(KERN_ERR "no cpu port?!\n"); + ret = -EINVAL; + goto out; + } + + + /* + * If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. (Which will + * discard received packets until we set ds->ports[] below.) + */ + wmb(); + dev->dsa_ptr = (void *)ds; + + + /* + * Do basic register setup. + */ + ret = drv->setup(ds); + if (ret < 0) + goto out; + + ret = drv->set_addr(ds, dev->dev_addr); + if (ret < 0) + goto out; + + ds->slave_mii_bus = mdiobus_alloc(); + if (ds->slave_mii_bus == NULL) { + ret = -ENOMEM; + goto out; + } + dsa_slave_mii_bus_init(ds); + + ret = mdiobus_register(ds->slave_mii_bus); + if (ret < 0) + goto out_free; + + + /* + * Create network devices for physical switch ports. + */ + wmb(); + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *slave_dev; + + if (!(ds->valid_port_mask & (1 << i))) + continue; + + slave_dev = dsa_slave_create(ds, parent, i, pd->port_names[i]); + if (slave_dev == NULL) { + printk(KERN_ERR "%s: can't create dsa slave " + "device for port %d(%s)\n", + dev->name, i, pd->port_names[i]); + continue; + } + + ds->ports[i] = slave_dev; + } + + return ds; + +out_free: + mdiobus_free(ds->slave_mii_bus); +out: + dev->dsa_ptr = NULL; + kfree(ds); + return ERR_PTR(ret); +} + +static void dsa_switch_destroy(struct dsa_switch *ds) +{ +} + + +/* hooks for ethertype-less tagging formats *********************************/ +/* + * The original DSA tag format and some other tag formats have no + * ethertype, which means that we need to add a little hack to the + * networking receive path to make sure that received frames get + * the right ->protocol assigned to them when one of those tag + * formats is in use. + */ +bool dsa_uses_dsa_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_DSA)); +} + +bool dsa_uses_trailer_tags(void *dsa_ptr) +{ + struct dsa_switch *ds = dsa_ptr; + + return !!(ds->tag_protocol == htons(ETH_P_TRAILER)); +} + + +/* link polling *************************************************************/ +static void dsa_link_poll_work(struct work_struct *ugly) +{ + struct dsa_switch *ds; + + ds = container_of(ugly, struct dsa_switch, link_poll_work); + + ds->drv->poll_link(ds); + mod_timer(&ds->link_poll_timer, round_jiffies(jiffies + HZ)); +} + +static void dsa_link_poll_timer(unsigned long _ds) +{ + struct dsa_switch *ds = (void *)_ds; + + schedule_work(&ds->link_poll_work); +} + + +/* platform driver init and cleanup *****************************************/ +static int dev_is_class(struct device *dev, void *class) +{ + if (dev->class != NULL && !strcmp(dev->class->name, class)) + return 1; + + return 0; +} + +static struct device *dev_find_class(struct device *parent, char *class) +{ + if (dev_is_class(parent, class)) { + get_device(parent); + return parent; + } + + return device_find_child(parent, class, dev_is_class); +} + +static struct mii_bus *dev_to_mii_bus(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "mdio_bus"); + if (d != NULL) { + struct mii_bus *bus; + + bus = to_mii_bus(d); + put_device(d); + + return bus; + } + + return NULL; +} + +static struct net_device *dev_to_net_device(struct device *dev) +{ + struct device *d; + + d = dev_find_class(dev, "net"); + if (d != NULL) { + struct net_device *nd; + + nd = to_net_dev(d); + dev_hold(nd); + put_device(d); + + return nd; + } + + return NULL; +} + +static int dsa_probe(struct platform_device *pdev) +{ + static int dsa_version_printed; + struct dsa_platform_data *pd = pdev->dev.platform_data; + struct net_device *dev; + struct mii_bus *bus; + struct dsa_switch *ds; + + if (!dsa_version_printed++) + printk(KERN_NOTICE "Distributed Switch Architecture " + "driver version %s\n", dsa_driver_version); + + if (pd == NULL || pd->mii_bus == NULL || pd->netdev == NULL) + return -EINVAL; + + bus = dev_to_mii_bus(pd->mii_bus); + if (bus == NULL) + return -EINVAL; + + dev = dev_to_net_device(pd->netdev); + if (dev == NULL) + return -EINVAL; + + if (dev->dsa_ptr != NULL) { + dev_put(dev); + return -EEXIST; + } + + ds = dsa_switch_setup(&pdev->dev, pd, bus, dev); + if (IS_ERR(ds)) { + dev_put(dev); + return PTR_ERR(ds); + } + + if (ds->drv->poll_link != NULL) { + INIT_WORK(&ds->link_poll_work, dsa_link_poll_work); + init_timer(&ds->link_poll_timer); + ds->link_poll_timer.data = (unsigned long)ds; + ds->link_poll_timer.function = dsa_link_poll_timer; + ds->link_poll_timer.expires = round_jiffies(jiffies + HZ); + add_timer(&ds->link_poll_timer); + } + + platform_set_drvdata(pdev, ds); + + return 0; +} + +static int dsa_remove(struct platform_device *pdev) +{ + struct dsa_switch *ds = platform_get_drvdata(pdev); + + if (ds->drv->poll_link != NULL) + del_timer_sync(&ds->link_poll_timer); + + flush_scheduled_work(); + + dsa_switch_destroy(ds); + + return 0; +} + +static void dsa_shutdown(struct platform_device *pdev) +{ +} + +static struct platform_driver dsa_driver = { + .probe = dsa_probe, + .remove = dsa_remove, + .shutdown = dsa_shutdown, + .driver = { + .name = "dsa", + .owner = THIS_MODULE, + }, +}; + +static int __init dsa_init_module(void) +{ + return platform_driver_register(&dsa_driver); +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + platform_driver_unregister(&dsa_driver); +} +module_exit(dsa_cleanup_module); + +MODULE_AUTHOR("Lennert Buytenhek <buytenh@wantstofly.org>") +MODULE_DESCRIPTION("Driver for Distributed Switch Architecture switch chips"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:dsa"); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h new file mode 100644 index 000000000000..7063378a1ebf --- /dev/null +++ b/net/dsa/dsa_priv.h @@ -0,0 +1,116 @@ +/* + * net/dsa/dsa_priv.h - Hardware switch handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __DSA_PRIV_H +#define __DSA_PRIV_H + +#include <linux/list.h> +#include <linux/phy.h> +#include <linux/timer.h> +#include <linux/workqueue.h> +#include <net/dsa.h> + +struct dsa_switch { + /* + * Configuration data for the platform device that owns + * this dsa switch instance. + */ + struct dsa_platform_data *pd; + + /* + * References to network device and mii bus to use. + */ + struct net_device *master_netdev; + struct mii_bus *master_mii_bus; + + /* + * The used switch driver and frame tagging type. + */ + struct dsa_switch_driver *drv; + __be16 tag_protocol; + + /* + * Slave mii_bus and devices for the individual ports. + */ + int cpu_port; + u32 valid_port_mask; + struct mii_bus *slave_mii_bus; + struct net_device *ports[DSA_MAX_PORTS]; + + /* + * Link state polling. + */ + struct work_struct link_poll_work; + struct timer_list link_poll_timer; +}; + +struct dsa_slave_priv { + struct net_device *dev; + struct dsa_switch *parent; + int port; + struct phy_device *phy; +}; + +struct dsa_switch_driver { + struct list_head list; + + __be16 tag_protocol; + int priv_size; + + /* + * Probing and setup. + */ + char *(*probe)(struct mii_bus *bus, int sw_addr); + int (*setup)(struct dsa_switch *ds); + int (*set_addr)(struct dsa_switch *ds, u8 *addr); + + /* + * Access to the switch's PHY registers. + */ + int (*phy_read)(struct dsa_switch *ds, int port, int regnum); + int (*phy_write)(struct dsa_switch *ds, int port, + int regnum, u16 val); + + /* + * Link state polling and IRQ handling. + */ + void (*poll_link)(struct dsa_switch *ds); + + /* + * ethtool hardware statistics. + */ + void (*get_strings)(struct dsa_switch *ds, int port, uint8_t *data); + void (*get_ethtool_stats)(struct dsa_switch *ds, + int port, uint64_t *data); + int (*get_sset_count)(struct dsa_switch *ds); +}; + +/* dsa.c */ +extern char dsa_driver_version[]; +void register_switch_driver(struct dsa_switch_driver *type); +void unregister_switch_driver(struct dsa_switch_driver *type); + +/* slave.c */ +void dsa_slave_mii_bus_init(struct dsa_switch *ds); +struct net_device *dsa_slave_create(struct dsa_switch *ds, + struct device *parent, + int port, char *name); + +/* tag_dsa.c */ +int dsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_edsa.c */ +int edsa_xmit(struct sk_buff *skb, struct net_device *dev); + +/* tag_trailer.c */ +int trailer_xmit(struct sk_buff *skb, struct net_device *dev); + + +#endif diff --git a/net/dsa/mv88e6060.c b/net/dsa/mv88e6060.c new file mode 100644 index 000000000000..54068ef251e8 --- /dev/null +++ b/net/dsa/mv88e6060.c @@ -0,0 +1,287 @@ +/* + * net/dsa/mv88e6060.c - Driver for Marvell 88e6060 switch chips + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +#define REG_PORT(p) (8 + (p)) +#define REG_GLOBAL 0x0f + +static int reg_read(struct dsa_switch *ds, int addr, int reg) +{ + return mdiobus_read(ds->master_mii_bus, addr, reg); +} + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + + +static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + return mdiobus_write(ds->master_mii_bus, addr, reg, val); +} + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + +static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = mdiobus_read(bus, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x0600) + return "Marvell 88E6060"; + } + + return NULL; +} + +static int mv88e6060_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 6; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x0A, 0xa130); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0x8000) == 0x0000) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6060_setup_global(struct dsa_switch *ds) +{ + /* + * Disable discarding of frames with excessive collisions, + * set the maximum frame size to 1536 bytes, and mask all + * interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0800); + + /* + * Enable automatic address learning, set the address + * database size to 1024 entries, and set the default aging + * time to 5 minutes. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x2130); + + return 0; +} + +static int mv88e6060_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * Do not force flow control, disable Ingress and Egress + * Header tagging, disable VLAN tunneling, and set the port + * state to Forwarding. Additionally, if this is the CPU + * port, enable Ingress and Egress Trailer tagging mode. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x4103 : 0x0003); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + return 0; +} + +static int mv88e6060_setup(struct dsa_switch *ds) +{ + int i; + int ret; + + ret = mv88e6060_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise atu */ + + ret = mv88e6060_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6060_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6060_set_addr(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +static int mv88e6060_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 5) + return port; + return -1; +} + +static int mv88e6060_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_read(ds, addr, regnum); +} + +static int +mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) +{ + int addr; + + addr = mv88e6060_port_to_phy_addr(port); + if (addr == -1) + return 0xffff; + + return reg_write(ds, addr, regnum, val); +} + +static void mv88e6060_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x1000); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + speed = (port_status & 0x0100) ? 100 : 10; + duplex = (port_status & 0x0200) ? 1 : 0; + fc = ((port_status & 0xc000) == 0xc000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static struct dsa_switch_driver mv88e6060_switch_driver = { + .tag_protocol = htons(ETH_P_TRAILER), + .probe = mv88e6060_probe, + .setup = mv88e6060_setup, + .set_addr = mv88e6060_set_addr, + .phy_read = mv88e6060_phy_read, + .phy_write = mv88e6060_phy_write, + .poll_link = mv88e6060_poll_link, +}; + +int __init mv88e6060_init(void) +{ + register_switch_driver(&mv88e6060_switch_driver); + return 0; +} +module_init(mv88e6060_init); + +void __exit mv88e6060_cleanup(void) +{ + unregister_switch_driver(&mv88e6060_switch_driver); +} +module_exit(mv88e6060_cleanup); diff --git a/net/dsa/mv88e6123_61_65.c b/net/dsa/mv88e6123_61_65.c new file mode 100644 index 000000000000..555b164082fc --- /dev/null +++ b/net/dsa/mv88e6123_61_65.c @@ -0,0 +1,421 @@ +/* + * net/dsa/mv88e6123_61_65.c - Marvell 88e6123/6161/6165 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1210) + return "Marvell 88E6123"; + if (ret == 0x1610) + return "Marvell 88E6161"; + if (ret == 0x1650) + return "Marvell 88E6165"; + } + + return NULL; +} + +static int mv88e6123_61_65_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6123_61_65_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Disable the PHY polling unit (since there won't be any + * external PHYs to poll), don't discard packets with + * excessive collisions, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x0000); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Configure the cpu port, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1110)); + + /* + * Disable remote management for now, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0x0000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:2x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x02, 0xffff); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Disable the loopback filter, disable flow control + * messages, disable flood broadcast override, disable + * removing of provider tags, disable ATU age violation + * interrupts, disable tag flow control, force flow + * control priority to the highest, and send all special + * multicast frames to the CPU at the highest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Disable ingress rate limiting by resetting all ingress + * rate limit registers to their initial state. + */ + for (i = 0; i < 6; i++) + REG_WRITE(REG_GLOBAL2, 0x09, 0x9000 | (i << 8)); + + /* + * Initialise cross-chip port VLAN table to reset defaults. + */ + REG_WRITE(REG_GLOBAL2, 0x0b, 0x9000); + + /* + * Clear the priority override table. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x0f, 0x8000 | (i << 8)); + + /* @@@ initialise AVB (22/23) watchdog (27) sdet (29) registers */ + + return 0; +} + +static int mv88e6123_61_65_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Do not limit the period of time that this port can be + * paused for by the remote end or the period of time that + * this port can pause the remote end. + */ + REG_WRITE(addr, 0x02, 0x0000); + + /* + * Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, + * configure the requested (DSA/EDSA) tagging mode if this is + * the CPU port, disable Header mode, enable IGMP/MLD snooping, + * disable VLAN tunneling, determine priority by looking at + * 802.1p and IP priority fields (IP prio has precedence), and + * set STP state to Forwarding. Finally, if this is the CPU + * port, additionally enable forwarding of unknown unicast and + * multicast addresses. + */ + REG_WRITE(addr, 0x04, + (p == ds->cpu_port) ? + (ds->tag_protocol == htons(ETH_P_DSA)) ? + 0x053f : 0x373f : + 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, set the maximum + * frame size to 10240 bytes, don't let the switch add or + * strip 802.1q tags, don't discard tagged or untagged frames + * on this port, do a destination address lookup on all + * received packets as usual, disable ARP mirroring and don't + * send a copy of all transmitted/received frames on this port + * to the CPU. + */ + REG_WRITE(addr, 0x08, 0x2080); + + /* + * Egress rate control: disable egress rate control. + */ + REG_WRITE(addr, 0x09, 0x0001); + + /* + * Egress rate control 2: disable egress rate control. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Port ATU control: disable limiting the number of address + * database entries that this port is allowed to use. + */ + REG_WRITE(addr, 0x0c, 0x0000); + + /* + * Priorit Override: disable DA, SA and VTU priority override. + */ + REG_WRITE(addr, 0x0d, 0x0000); + + /* + * Port Ethertype: use the Ethertype DSA Ethertype value. + */ + REG_WRITE(addr, 0x0f, ETH_P_EDSA); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6123_61_65_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mutex_init(&ps->stats_mutex); + + ret = mv88e6123_61_65_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6123_61_65_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6123_61_65_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6123_61_65_port_to_phy_addr(int port) +{ + if (port >= 0 && port <= 4) + return port; + return -1; +} + +static int +mv88e6123_61_65_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_read(ds, addr, regnum); +} + +static int +mv88e6123_61_65_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6123_61_65_port_to_phy_addr(port); + return mv88e6xxx_phy_write(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6123_61_65_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6123_61_65_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static void +mv88e6123_61_65_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6123_61_65_hw_stats), + mv88e6123_61_65_hw_stats, port, data); +} + +static int mv88e6123_61_65_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6123_61_65_hw_stats); +} + +static struct dsa_switch_driver mv88e6123_61_65_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_EDSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6123_61_65_probe, + .setup = mv88e6123_61_65_setup, + .set_addr = mv88e6xxx_set_addr_indirect, + .phy_read = mv88e6123_61_65_phy_read, + .phy_write = mv88e6123_61_65_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6123_61_65_get_strings, + .get_ethtool_stats = mv88e6123_61_65_get_ethtool_stats, + .get_sset_count = mv88e6123_61_65_get_sset_count, +}; + +int __init mv88e6123_61_65_init(void) +{ + register_switch_driver(&mv88e6123_61_65_switch_driver); + return 0; +} +module_init(mv88e6123_61_65_init); + +void __exit mv88e6123_61_65_cleanup(void) +{ + unregister_switch_driver(&mv88e6123_61_65_switch_driver); +} +module_exit(mv88e6123_61_65_cleanup); diff --git a/net/dsa/mv88e6131.c b/net/dsa/mv88e6131.c new file mode 100644 index 000000000000..36e01eb863a0 --- /dev/null +++ b/net/dsa/mv88e6131.c @@ -0,0 +1,380 @@ +/* + * net/dsa/mv88e6131.c - Marvell 88e6131 switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) +{ + int ret; + + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); + if (ret >= 0) { + ret &= 0xfff0; + if (ret == 0x1060) + return "Marvell 88E6131"; + } + + return NULL; +} + +static int mv88e6131_switch_reset(struct dsa_switch *ds) +{ + int i; + int ret; + + /* + * Set all ports to the disabled state. + */ + for (i = 0; i < 8; i++) { + ret = REG_READ(REG_PORT(i), 0x04); + REG_WRITE(REG_PORT(i), 0x04, ret & 0xfffc); + } + + /* + * Wait for transmit queues to drain. + */ + msleep(2); + + /* + * Reset the switch. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0xc400); + + /* + * Wait up to one second for reset to complete. + */ + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + if ((ret & 0xc800) == 0xc800) + break; + + msleep(1); + } + if (i == 1000) + return -ETIMEDOUT; + + return 0; +} + +static int mv88e6131_setup_global(struct dsa_switch *ds) +{ + int ret; + int i; + + /* + * Enable the PHY polling unit, don't discard packets with + * excessive collisions, use a weighted fair queueing scheme + * to arbitrate between packet queues, set the maximum frame + * size to 1632, and mask all interrupt sources. + */ + REG_WRITE(REG_GLOBAL, 0x04, 0x4400); + + /* + * Set the default address aging time to 5 minutes, and + * enable address learn messages to be sent to all message + * ports. + */ + REG_WRITE(REG_GLOBAL, 0x0a, 0x0148); + + /* + * Configure the priority mapping registers. + */ + ret = mv88e6xxx_config_prio(ds); + if (ret < 0) + return ret; + + /* + * Set the VLAN ethertype to 0x8100. + */ + REG_WRITE(REG_GLOBAL, 0x19, 0x8100); + + /* + * Disable ARP mirroring, and configure the cpu port as the + * port to which ingress and egress monitor frames are to be + * sent. + */ + REG_WRITE(REG_GLOBAL, 0x1a, (ds->cpu_port * 0x1100) | 0x00f0); + + /* + * Disable cascade port functionality, and set the switch's + * DSA device number to zero. + */ + REG_WRITE(REG_GLOBAL, 0x1c, 0xe000); + + /* + * Send all frames with destination addresses matching + * 01:80:c2:00:00:0x to the CPU port. + */ + REG_WRITE(REG_GLOBAL2, 0x03, 0xffff); + + /* + * Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the higest priority. + */ + REG_WRITE(REG_GLOBAL2, 0x05, 0x00ff); + + /* + * Map all DSA device IDs to the CPU port. + */ + for (i = 0; i < 32; i++) + REG_WRITE(REG_GLOBAL2, 0x06, 0x8000 | (i << 8) | ds->cpu_port); + + /* + * Clear all trunk masks. + */ + for (i = 0; i < 8; i++) + REG_WRITE(REG_GLOBAL2, 0x07, 0x8000 | (i << 12) | 0xff); + + /* + * Clear all trunk mappings. + */ + for (i = 0; i < 16; i++) + REG_WRITE(REG_GLOBAL2, 0x08, 0x8000 | (i << 11)); + + /* + * Force the priority of IGMP/MLD snoop frames and ARP frames + * to the highest setting. + */ + REG_WRITE(REG_GLOBAL2, 0x0f, 0x00ff); + + return 0; +} + +static int mv88e6131_setup_port(struct dsa_switch *ds, int p) +{ + int addr = REG_PORT(p); + + /* + * MAC Forcing register: don't force link, speed, duplex + * or flow control state to any particular values. + */ + REG_WRITE(addr, 0x01, 0x0003); + + /* + * Port Control: disable Core Tag, disable Drop-on-Lock, + * transmit frames unmodified, disable Header mode, + * enable IGMP/MLD snoop, disable DoubleTag, disable VLAN + * tunneling, determine priority by looking at 802.1p and + * IP priority fields (IP prio has precedence), and set STP + * state to Forwarding. Finally, if this is the CPU port, + * additionally enable DSA tagging and forwarding of unknown + * unicast addresses. + */ + REG_WRITE(addr, 0x04, (p == ds->cpu_port) ? 0x0537 : 0x0433); + + /* + * Port Control 1: disable trunking. Also, if this is the + * CPU port, enable learn messages to be sent to this port. + */ + REG_WRITE(addr, 0x05, (p == ds->cpu_port) ? 0x8000 : 0x0000); + + /* + * Port based VLAN map: give each port its own address + * database, allow the CPU port to talk to each of the 'real' + * ports, and allow each of the 'real' ports to only talk to + * the CPU port. + */ + REG_WRITE(addr, 0x06, + ((p & 0xf) << 12) | + ((p == ds->cpu_port) ? + ds->valid_port_mask : + (1 << ds->cpu_port))); + + /* + * Default VLAN ID and priority: don't set a default VLAN + * ID, and set the default packet priority to zero. + */ + REG_WRITE(addr, 0x07, 0x0000); + + /* + * Port Control 2: don't force a good FCS, don't use + * VLAN-based, source address-based or destination + * address-based priority overrides, don't let the switch + * add or strip 802.1q tags, don't discard tagged or + * untagged frames on this port, do a destination address + * lookup on received packets as usual, don't send a copy + * of all transmitted/received frames on this port to the + * CPU, and configure the CPU port number. Also, if this + * is the CPU port, enable forwarding of unknown multicast + * addresses. + */ + REG_WRITE(addr, 0x08, + ((p == ds->cpu_port) ? 0x00c0 : 0x0080) | + ds->cpu_port); + + /* + * Rate Control: disable ingress rate limiting. + */ + REG_WRITE(addr, 0x09, 0x0000); + + /* + * Rate Control 2: disable egress rate limiting. + */ + REG_WRITE(addr, 0x0a, 0x0000); + + /* + * Port Association Vector: when learning source addresses + * of packets, add the address to the address database using + * a port bitmap that has only the bit for this port set and + * the other bits clear. + */ + REG_WRITE(addr, 0x0b, 1 << p); + + /* + * Tag Remap: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x18, 0x3210); + + /* + * Tag Remap 2: use an identity 802.1p prio -> switch prio + * mapping. + */ + REG_WRITE(addr, 0x19, 0x7654); + + return 0; +} + +static int mv88e6131_setup(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int i; + int ret; + + mutex_init(&ps->smi_mutex); + mv88e6xxx_ppu_state_init(ds); + mutex_init(&ps->stats_mutex); + + ret = mv88e6131_switch_reset(ds); + if (ret < 0) + return ret; + + /* @@@ initialise vtu and atu */ + + ret = mv88e6131_setup_global(ds); + if (ret < 0) + return ret; + + for (i = 0; i < 6; i++) { + ret = mv88e6131_setup_port(ds, i); + if (ret < 0) + return ret; + } + + return 0; +} + +static int mv88e6131_port_to_phy_addr(int port) +{ + if (port >= 0 && port != 3 && port <= 7) + return port; + return -1; +} + +static int +mv88e6131_phy_read(struct dsa_switch *ds, int port, int regnum) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_read_ppu(ds, addr, regnum); +} + +static int +mv88e6131_phy_write(struct dsa_switch *ds, + int port, int regnum, u16 val) +{ + int addr = mv88e6131_port_to_phy_addr(port); + return mv88e6xxx_phy_write_ppu(ds, addr, regnum, val); +} + +static struct mv88e6xxx_hw_stat mv88e6131_hw_stats[] = { + { "in_good_octets", 8, 0x00, }, + { "in_bad_octets", 4, 0x02, }, + { "in_unicast", 4, 0x04, }, + { "in_broadcasts", 4, 0x06, }, + { "in_multicasts", 4, 0x07, }, + { "in_pause", 4, 0x16, }, + { "in_undersize", 4, 0x18, }, + { "in_fragments", 4, 0x19, }, + { "in_oversize", 4, 0x1a, }, + { "in_jabber", 4, 0x1b, }, + { "in_rx_error", 4, 0x1c, }, + { "in_fcs_error", 4, 0x1d, }, + { "out_octets", 8, 0x0e, }, + { "out_unicast", 4, 0x10, }, + { "out_broadcasts", 4, 0x13, }, + { "out_multicasts", 4, 0x12, }, + { "out_pause", 4, 0x15, }, + { "excessive", 4, 0x11, }, + { "collisions", 4, 0x1e, }, + { "deferred", 4, 0x05, }, + { "single", 4, 0x14, }, + { "multiple", 4, 0x17, }, + { "out_fcs_error", 4, 0x03, }, + { "late", 4, 0x1f, }, + { "hist_64bytes", 4, 0x08, }, + { "hist_65_127bytes", 4, 0x09, }, + { "hist_128_255bytes", 4, 0x0a, }, + { "hist_256_511bytes", 4, 0x0b, }, + { "hist_512_1023bytes", 4, 0x0c, }, + { "hist_1024_max_bytes", 4, 0x0d, }, +}; + +static void +mv88e6131_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + mv88e6xxx_get_strings(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static void +mv88e6131_get_ethtool_stats(struct dsa_switch *ds, + int port, uint64_t *data) +{ + mv88e6xxx_get_ethtool_stats(ds, ARRAY_SIZE(mv88e6131_hw_stats), + mv88e6131_hw_stats, port, data); +} + +static int mv88e6131_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(mv88e6131_hw_stats); +} + +static struct dsa_switch_driver mv88e6131_switch_driver = { + .tag_protocol = __constant_htons(ETH_P_DSA), + .priv_size = sizeof(struct mv88e6xxx_priv_state), + .probe = mv88e6131_probe, + .setup = mv88e6131_setup, + .set_addr = mv88e6xxx_set_addr_direct, + .phy_read = mv88e6131_phy_read, + .phy_write = mv88e6131_phy_write, + .poll_link = mv88e6xxx_poll_link, + .get_strings = mv88e6131_get_strings, + .get_ethtool_stats = mv88e6131_get_ethtool_stats, + .get_sset_count = mv88e6131_get_sset_count, +}; + +int __init mv88e6131_init(void) +{ + register_switch_driver(&mv88e6131_switch_driver); + return 0; +} +module_init(mv88e6131_init); + +void __exit mv88e6131_cleanup(void) +{ + unregister_switch_driver(&mv88e6131_switch_driver); +} +module_exit(mv88e6131_cleanup); diff --git a/net/dsa/mv88e6xxx.c b/net/dsa/mv88e6xxx.c new file mode 100644 index 000000000000..aa6c609c59f2 --- /dev/null +++ b/net/dsa/mv88e6xxx.c @@ -0,0 +1,522 @@ +/* + * net/dsa/mv88e6xxx.c - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" +#include "mv88e6xxx.h" + +/* + * If the switch's ADDR[4:0] strap pins are strapped to zero, it will + * use all 32 SMI bus addresses on its SMI bus, and all switch registers + * will be directly accessible on some {device address,register address} + * pair. If the ADDR[4:0] pins are not strapped to zero, the switch + * will only respond to SMI transactions to that specific address, and + * an indirect addressing mechanism needs to be used to access its + * registers. + */ +static int mv88e6xxx_reg_wait_ready(struct mii_bus *bus, int sw_addr) +{ + int ret; + int i; + + for (i = 0; i < 16; i++) { + ret = mdiobus_read(bus, sw_addr, 0); + if (ret < 0) + return ret; + + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_read(bus, addr, reg); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the read command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9800 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the read command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Read the data. + */ + ret = mdiobus_read(bus, sw_addr, 1); + if (ret < 0) + return ret; + + return ret & 0xffff; +} + +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_read(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val) +{ + int ret; + + if (sw_addr == 0) + return mdiobus_write(bus, addr, reg, val); + + /* + * Wait for the bus to become free. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + /* + * Transmit the data to write. + */ + ret = mdiobus_write(bus, sw_addr, 1, val); + if (ret < 0) + return ret; + + /* + * Transmit the write command. + */ + ret = mdiobus_write(bus, sw_addr, 0, 0x9400 | (addr << 5) | reg); + if (ret < 0) + return ret; + + /* + * Wait for the write command to complete. + */ + ret = mv88e6xxx_reg_wait_ready(bus, sw_addr); + if (ret < 0) + return ret; + + return 0; +} + +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->smi_mutex); + ret = __mv88e6xxx_reg_write(ds->master_mii_bus, + ds->pd->sw_addr, addr, reg, val); + mutex_unlock(&ps->smi_mutex); + + return ret; +} + +int mv88e6xxx_config_prio(struct dsa_switch *ds) +{ + /* + * Configure the IP ToS mapping registers. + */ + REG_WRITE(REG_GLOBAL, 0x10, 0x0000); + REG_WRITE(REG_GLOBAL, 0x11, 0x0000); + REG_WRITE(REG_GLOBAL, 0x12, 0x5555); + REG_WRITE(REG_GLOBAL, 0x13, 0x5555); + REG_WRITE(REG_GLOBAL, 0x14, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x15, 0xaaaa); + REG_WRITE(REG_GLOBAL, 0x16, 0xffff); + REG_WRITE(REG_GLOBAL, 0x17, 0xffff); + + /* + * Configure the IEEE 802.1p priority mapping register. + */ + REG_WRITE(REG_GLOBAL, 0x18, 0xfa41); + + return 0; +} + +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) +{ + REG_WRITE(REG_GLOBAL, 0x01, (addr[0] << 8) | addr[1]); + REG_WRITE(REG_GLOBAL, 0x02, (addr[2] << 8) | addr[3]); + REG_WRITE(REG_GLOBAL, 0x03, (addr[4] << 8) | addr[5]); + + return 0; +} + +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) +{ + int i; + int ret; + + for (i = 0; i < 6; i++) { + int j; + + /* + * Write the MAC address byte. + */ + REG_WRITE(REG_GLOBAL2, 0x0d, 0x8000 | (i << 8) | addr[i]); + + /* + * Wait for the write to complete. + */ + for (j = 0; j < 16; j++) { + ret = REG_READ(REG_GLOBAL2, 0x0d); + if ((ret & 0x8000) == 0) + break; + } + if (j == 16) + return -ETIMEDOUT; + } + + return 0; +} + +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum) +{ + if (addr >= 0) + return mv88e6xxx_reg_read(ds, addr, regnum); + return 0xffff; +} + +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val) +{ + if (addr >= 0) + return mv88e6xxx_reg_write(ds, addr, regnum, val); + return 0; +} + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU +static int mv88e6xxx_ppu_disable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret & ~0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) != 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_ppu_enable(struct dsa_switch *ds) +{ + int ret; + int i; + + ret = REG_READ(REG_GLOBAL, 0x04); + REG_WRITE(REG_GLOBAL, 0x04, ret | 0x4000); + + for (i = 0; i < 1000; i++) { + ret = REG_READ(REG_GLOBAL, 0x00); + msleep(1); + if ((ret & 0xc000) == 0xc000) + return 0; + } + + return -ETIMEDOUT; +} + +static void mv88e6xxx_ppu_reenable_work(struct work_struct *ugly) +{ + struct mv88e6xxx_priv_state *ps; + + ps = container_of(ugly, struct mv88e6xxx_priv_state, ppu_work); + if (mutex_trylock(&ps->ppu_mutex)) { + struct dsa_switch *ds = ((struct dsa_switch *)ps) - 1; + + if (mv88e6xxx_ppu_enable(ds) == 0) + ps->ppu_disabled = 0; + mutex_unlock(&ps->ppu_mutex); + } +} + +static void mv88e6xxx_ppu_reenable_timer(unsigned long _ps) +{ + struct mv88e6xxx_priv_state *ps = (void *)_ps; + + schedule_work(&ps->ppu_work); +} + +static int mv88e6xxx_ppu_access_get(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + + mutex_lock(&ps->ppu_mutex); + + /* + * If the PHY polling unit is enabled, disable it so that + * we can access the PHY registers. If it was already + * disabled, cancel the timer that is going to re-enable + * it. + */ + if (!ps->ppu_disabled) { + ret = mv88e6xxx_ppu_disable(ds); + if (ret < 0) { + mutex_unlock(&ps->ppu_mutex); + return ret; + } + ps->ppu_disabled = 1; + } else { + del_timer(&ps->ppu_timer); + ret = 0; + } + + return ret; +} + +static void mv88e6xxx_ppu_access_put(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + /* + * Schedule a timer to re-enable the PHY polling unit. + */ + mod_timer(&ps->ppu_timer, jiffies + msecs_to_jiffies(10)); + mutex_unlock(&ps->ppu_mutex); +} + +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + + mutex_init(&ps->ppu_mutex); + INIT_WORK(&ps->ppu_work, mv88e6xxx_ppu_reenable_work); + init_timer(&ps->ppu_timer); + ps->ppu_timer.data = (unsigned long)ps; + ps->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; +} + +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_read(ds, addr, regnum); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} + +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val) +{ + int ret; + + ret = mv88e6xxx_ppu_access_get(ds); + if (ret >= 0) { + ret = mv88e6xxx_reg_write(ds, addr, regnum, val); + mv88e6xxx_ppu_access_put(ds); + } + + return ret; +} +#endif + +void mv88e6xxx_poll_link(struct dsa_switch *ds) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; i++) { + struct net_device *dev; + int port_status; + int link; + int speed; + int duplex; + int fc; + + dev = ds->ports[i]; + if (dev == NULL) + continue; + + link = 0; + if (dev->flags & IFF_UP) { + port_status = mv88e6xxx_reg_read(ds, REG_PORT(i), 0x00); + if (port_status < 0) + continue; + + link = !!(port_status & 0x0800); + } + + if (!link) { + if (netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link down\n", dev->name); + netif_carrier_off(dev); + } + continue; + } + + switch (port_status & 0x0300) { + case 0x0000: + speed = 10; + break; + case 0x0100: + speed = 100; + break; + case 0x0200: + speed = 1000; + break; + default: + speed = -1; + break; + } + duplex = (port_status & 0x0400) ? 1 : 0; + fc = (port_status & 0x8000) ? 1 : 0; + + if (!netif_carrier_ok(dev)) { + printk(KERN_INFO "%s: link up, %d Mb/s, %s duplex, " + "flow control %sabled\n", dev->name, + speed, duplex ? "full" : "half", + fc ? "en" : "dis"); + netif_carrier_on(dev); + } + } +} + +static int mv88e6xxx_stats_wait(struct dsa_switch *ds) +{ + int ret; + int i; + + for (i = 0; i < 10; i++) { + ret = REG_READ(REG_GLOBAL2, 0x1d); + if ((ret & 0x8000) == 0) + return 0; + } + + return -ETIMEDOUT; +} + +static int mv88e6xxx_stats_snapshot(struct dsa_switch *ds, int port) +{ + int ret; + + /* + * Snapshot the hardware statistics counters for this port. + */ + REG_WRITE(REG_GLOBAL, 0x1d, 0xdc00 | port); + + /* + * Wait for the snapshotting to complete. + */ + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return ret; + + return 0; +} + +static void mv88e6xxx_stats_read(struct dsa_switch *ds, int stat, u32 *val) +{ + u32 _val; + int ret; + + *val = 0; + + ret = mv88e6xxx_reg_write(ds, REG_GLOBAL, 0x1d, 0xcc00 | stat); + if (ret < 0) + return; + + ret = mv88e6xxx_stats_wait(ds); + if (ret < 0) + return; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1e); + if (ret < 0) + return; + + _val = ret << 16; + + ret = mv88e6xxx_reg_read(ds, REG_GLOBAL, 0x1f); + if (ret < 0) + return; + + *val = _val | ret; +} + +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data) +{ + int i; + + for (i = 0; i < nr_stats; i++) { + memcpy(data + i * ETH_GSTRING_LEN, + stats[i].string, ETH_GSTRING_LEN); + } +} + +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data) +{ + struct mv88e6xxx_priv_state *ps = (void *)(ds + 1); + int ret; + int i; + + mutex_lock(&ps->stats_mutex); + + ret = mv88e6xxx_stats_snapshot(ds, port); + if (ret < 0) { + mutex_unlock(&ps->stats_mutex); + return; + } + + /* + * Read each of the counters. + */ + for (i = 0; i < nr_stats; i++) { + struct mv88e6xxx_hw_stat *s = stats + i; + u32 low; + u32 high; + + mv88e6xxx_stats_read(ds, s->reg, &low); + if (s->sizeof_stat == 8) + mv88e6xxx_stats_read(ds, s->reg + 1, &high); + else + high = 0; + + data[i] = (((u64)high) << 32) | low; + } + + mutex_unlock(&ps->stats_mutex); +} diff --git a/net/dsa/mv88e6xxx.h b/net/dsa/mv88e6xxx.h new file mode 100644 index 000000000000..eb0e0aaa9f1b --- /dev/null +++ b/net/dsa/mv88e6xxx.h @@ -0,0 +1,93 @@ +/* + * net/dsa/mv88e6xxx.h - Marvell 88e6xxx switch chip support + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __MV88E6XXX_H +#define __MV88E6XXX_H + +#define REG_PORT(p) (0x10 + (p)) +#define REG_GLOBAL 0x1b +#define REG_GLOBAL2 0x1c + +struct mv88e6xxx_priv_state { + /* + * When using multi-chip addressing, this mutex protects + * access to the indirect access registers. (In single-chip + * mode, this mutex is effectively useless.) + */ + struct mutex smi_mutex; + +#ifdef CONFIG_NET_DSA_MV88E6XXX_NEED_PPU + /* + * Handles automatic disabling and re-enabling of the PHY + * polling unit. + */ + struct mutex ppu_mutex; + int ppu_disabled; + struct work_struct ppu_work; + struct timer_list ppu_timer; +#endif + + /* + * This mutex serialises access to the statistics unit. + * Hold this mutex over snapshot + dump sequences. + */ + struct mutex stats_mutex; +}; + +struct mv88e6xxx_hw_stat { + char string[ETH_GSTRING_LEN]; + int sizeof_stat; + int reg; +}; + +int __mv88e6xxx_reg_read(struct mii_bus *bus, int sw_addr, int addr, int reg); +int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg); +int __mv88e6xxx_reg_write(struct mii_bus *bus, int sw_addr, int addr, + int reg, u16 val); +int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val); +int mv88e6xxx_config_prio(struct dsa_switch *ds); +int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr); +int mv88e6xxx_phy_read(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write(struct dsa_switch *ds, int addr, int regnum, u16 val); +void mv88e6xxx_ppu_state_init(struct dsa_switch *ds); +int mv88e6xxx_phy_read_ppu(struct dsa_switch *ds, int addr, int regnum); +int mv88e6xxx_phy_write_ppu(struct dsa_switch *ds, int addr, + int regnum, u16 val); +void mv88e6xxx_poll_link(struct dsa_switch *ds); +void mv88e6xxx_get_strings(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint8_t *data); +void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, + int nr_stats, struct mv88e6xxx_hw_stat *stats, + int port, uint64_t *data); + +#define REG_READ(addr, reg) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_read(ds, addr, reg); \ + if (__ret < 0) \ + return __ret; \ + __ret; \ + }) + +#define REG_WRITE(addr, reg, val) \ + ({ \ + int __ret; \ + \ + __ret = mv88e6xxx_reg_write(ds, addr, reg, val); \ + if (__ret < 0) \ + return __ret; \ + }) + + + +#endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c new file mode 100644 index 000000000000..1af5a79309e9 --- /dev/null +++ b/net/dsa/slave.c @@ -0,0 +1,366 @@ +/* + * net/dsa/slave.c - Slave device handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/list.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/phy.h> +#include "dsa_priv.h" + +/* slave mii_bus handling ***************************************************/ +static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_read(ds, addr, reg); + + return 0xffff; +} + +static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) +{ + struct dsa_switch *ds = bus->priv; + + if (ds->valid_port_mask & (1 << addr)) + return ds->drv->phy_write(ds, addr, reg, val); + + return 0; +} + +void dsa_slave_mii_bus_init(struct dsa_switch *ds) +{ + ds->slave_mii_bus->priv = (void *)ds; + ds->slave_mii_bus->name = "dsa slave smi"; + ds->slave_mii_bus->read = dsa_slave_phy_read; + ds->slave_mii_bus->write = dsa_slave_phy_write; + snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "%s:%.2x", + ds->master_mii_bus->id, ds->pd->sw_addr); + ds->slave_mii_bus->parent = &(ds->master_mii_bus->dev); +} + + +/* slave device handling ****************************************************/ +static int dsa_slave_open(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + int err; + + if (!(master->flags & IFF_UP)) + return -ENETDOWN; + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) { + err = dev_unicast_add(master, dev->dev_addr, ETH_ALEN); + if (err < 0) + goto out; + } + + if (dev->flags & IFF_ALLMULTI) { + err = dev_set_allmulti(master, 1); + if (err < 0) + goto del_unicast; + } + if (dev->flags & IFF_PROMISC) { + err = dev_set_promiscuity(master, 1); + if (err < 0) + goto clear_allmulti; + } + + return 0; + +clear_allmulti: + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); +del_unicast: + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); +out: + return err; +} + +static int dsa_slave_close(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_unsync(master, dev); + dev_unicast_unsync(master, dev); + if (dev->flags & IFF_ALLMULTI) + dev_set_allmulti(master, -1); + if (dev->flags & IFF_PROMISC) + dev_set_promiscuity(master, -1); + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + + return 0; +} + +static void dsa_slave_change_rx_flags(struct net_device *dev, int change) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + if (change & IFF_ALLMULTI) + dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); + if (change & IFF_PROMISC) + dev_set_promiscuity(master, dev->flags & IFF_PROMISC ? 1 : -1); +} + +static void dsa_slave_set_rx_mode(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + + dev_mc_sync(master, dev); + dev_unicast_sync(master, dev); +} + +static int dsa_slave_set_mac_address(struct net_device *dev, void *a) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct net_device *master = p->parent->master_netdev; + struct sockaddr *addr = a; + int err; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + if (!(dev->flags & IFF_UP)) + goto out; + + if (compare_ether_addr(addr->sa_data, master->dev_addr)) { + err = dev_unicast_add(master, addr->sa_data, ETH_ALEN); + if (err < 0) + return err; + } + + if (compare_ether_addr(dev->dev_addr, master->dev_addr)) + dev_unicast_delete(master, dev->dev_addr, ETH_ALEN); + +out: + memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN); + + return 0; +} + +static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct mii_ioctl_data *mii_data = if_mii(ifr); + + if (p->phy != NULL) + return phy_mii_ioctl(p->phy, mii_data, cmd); + + return -EOPNOTSUPP; +} + + +/* ethtool operations *******************************************************/ +static int +dsa_slave_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + int err; + + err = -EOPNOTSUPP; + if (p->phy != NULL) { + err = phy_read_status(p->phy); + if (err == 0) + err = phy_ethtool_gset(p->phy, cmd); + } + + return err; +} + +static int +dsa_slave_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return phy_ethtool_sset(p->phy, cmd); + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *drvinfo) +{ + strncpy(drvinfo->driver, "dsa", 32); + strncpy(drvinfo->version, dsa_driver_version, 32); + strncpy(drvinfo->fw_version, "N/A", 32); + strncpy(drvinfo->bus_info, "platform", 32); +} + +static int dsa_slave_nway_reset(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) + return genphy_restart_aneg(p->phy); + + return -EOPNOTSUPP; +} + +static u32 dsa_slave_get_link(struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + if (p->phy != NULL) { + genphy_update_link(p->phy); + return p->phy->link; + } + + return -EOPNOTSUPP; +} + +static void dsa_slave_get_strings(struct net_device *dev, + uint32_t stringset, uint8_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (stringset == ETH_SS_STATS) { + int len = ETH_GSTRING_LEN; + + strncpy(data, "tx_packets", len); + strncpy(data + len, "tx_bytes", len); + strncpy(data + 2 * len, "rx_packets", len); + strncpy(data + 3 * len, "rx_bytes", len); + if (ds->drv->get_strings != NULL) + ds->drv->get_strings(ds, p->port, data + 4 * len); + } +} + +static void dsa_slave_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + data[0] = p->dev->stats.tx_packets; + data[1] = p->dev->stats.tx_bytes; + data[2] = p->dev->stats.rx_packets; + data[3] = p->dev->stats.rx_bytes; + if (ds->drv->get_ethtool_stats != NULL) + ds->drv->get_ethtool_stats(ds, p->port, data + 4); +} + +static int dsa_slave_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (sset == ETH_SS_STATS) { + int count; + + count = 4; + if (ds->drv->get_sset_count != NULL) + count += ds->drv->get_sset_count(ds); + + return count; + } + + return -EOPNOTSUPP; +} + +static const struct ethtool_ops dsa_slave_ethtool_ops = { + .get_settings = dsa_slave_get_settings, + .set_settings = dsa_slave_set_settings, + .get_drvinfo = dsa_slave_get_drvinfo, + .nway_reset = dsa_slave_nway_reset, + .get_link = dsa_slave_get_link, + .set_sg = ethtool_op_set_sg, + .get_strings = dsa_slave_get_strings, + .get_ethtool_stats = dsa_slave_get_ethtool_stats, + .get_sset_count = dsa_slave_get_sset_count, +}; + + +/* slave device setup *******************************************************/ +struct net_device * +dsa_slave_create(struct dsa_switch *ds, struct device *parent, + int port, char *name) +{ + struct net_device *master = ds->master_netdev; + struct net_device *slave_dev; + struct dsa_slave_priv *p; + int ret; + + slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), + name, ether_setup); + if (slave_dev == NULL) + return slave_dev; + + slave_dev->features = master->vlan_features; + SET_ETHTOOL_OPS(slave_dev, &dsa_slave_ethtool_ops); + memcpy(slave_dev->dev_addr, master->dev_addr, ETH_ALEN); + slave_dev->tx_queue_len = 0; + switch (ds->tag_protocol) { +#ifdef CONFIG_NET_DSA_TAG_DSA + case htons(ETH_P_DSA): + slave_dev->hard_start_xmit = dsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_EDSA + case htons(ETH_P_EDSA): + slave_dev->hard_start_xmit = edsa_xmit; + break; +#endif +#ifdef CONFIG_NET_DSA_TAG_TRAILER + case htons(ETH_P_TRAILER): + slave_dev->hard_start_xmit = trailer_xmit; + break; +#endif + default: + BUG(); + } + slave_dev->open = dsa_slave_open; + slave_dev->stop = dsa_slave_close; + slave_dev->change_rx_flags = dsa_slave_change_rx_flags; + slave_dev->set_rx_mode = dsa_slave_set_rx_mode; + slave_dev->set_multicast_list = dsa_slave_set_rx_mode; + slave_dev->set_mac_address = dsa_slave_set_mac_address; + slave_dev->do_ioctl = dsa_slave_ioctl; + SET_NETDEV_DEV(slave_dev, parent); + slave_dev->vlan_features = master->vlan_features; + + p = netdev_priv(slave_dev); + p->dev = slave_dev; + p->parent = ds; + p->port = port; + p->phy = ds->slave_mii_bus->phy_map[port]; + + ret = register_netdev(slave_dev); + if (ret) { + printk(KERN_ERR "%s: error %d registering interface %s\n", + master->name, ret, slave_dev->name); + free_netdev(slave_dev); + return NULL; + } + + netif_carrier_off(slave_dev); + + if (p->phy != NULL) { + phy_attach(slave_dev, p->phy->dev.bus_id, + 0, PHY_INTERFACE_MODE_GMII); + + p->phy->autoneg = AUTONEG_ENABLE; + p->phy->speed = 0; + p->phy->duplex = 0; + p->phy->advertising = p->phy->supported | ADVERTISED_Autoneg; + phy_start_aneg(p->phy); + } + + return slave_dev; +} diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c new file mode 100644 index 000000000000..31866543332e --- /dev/null +++ b/net/dsa/tag_dsa.c @@ -0,0 +1,195 @@ +/* + * net/dsa/tag_dsa.c - (Non-ethertype) DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 + +int dsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *dsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag for tagged + * packets, or insert a DSA tag between the addresses and + * the ethertype field for untagged packets. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x60; + dsa_header[1] = p->port << 3; + + /* + * Move CFI field from byte 2 to byte 1. + */ + if (dsa_header[2] & 0x10) { + dsa_header[1] |= 0x01; + dsa_header[2] &= ~0x10; + } + } else { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + dsa_header = skb->data + 2 * ETH_ALEN; + dsa_header[0] = 0x40; + dsa_header[1] = p->port << 3; + dsa_header[2] = 0x00; + dsa_header[3] = 0x00; + } + + skb->protocol = htons(ETH_P_DSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int dsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *dsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, DSA_HLEN))) + goto out_drop; + + /* + * The ethertype field is part of the DSA header. + */ + dsa_header = skb->data - 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((dsa_header[0] & 0xdf) != 0x00 && (dsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (dsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * Convert the DSA header to an 802.1q header if the 'tagged' + * bit in the DSA header is set. If the 'tagged' bit is clear, + * delete the DSA header entirely. + */ + if (dsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = dsa_header[2] & ~0x10; + new_header[3] = dsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (dsa_header[1] & 0x01) + new_header[2] |= 0x10; + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(dsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(dsa_header, new_header, DSA_HLEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, DSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type dsa_packet_type = { + .type = __constant_htons(ETH_P_DSA), + .func = dsa_rcv, +}; + +static int __init dsa_init_module(void) +{ + dev_add_pack(&dsa_packet_type); + return 0; +} +module_init(dsa_init_module); + +static void __exit dsa_cleanup_module(void) +{ + dev_remove_pack(&dsa_packet_type); +} +module_exit(dsa_cleanup_module); diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c new file mode 100644 index 000000000000..9f4ce55eae59 --- /dev/null +++ b/net/dsa/tag_edsa.c @@ -0,0 +1,214 @@ +/* + * net/dsa/tag_edsa.c - Ethertype DSA tagging + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +#define DSA_HLEN 4 +#define EDSA_HLEN 8 + +int edsa_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u8 *edsa_header; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * Convert the outermost 802.1q tag to a DSA tag and prepend + * a DSA ethertype field is the packet is tagged, or insert + * a DSA ethertype plus DSA tag between the addresses and the + * current ethertype field if the packet is untagged. + */ + if (skb->protocol == htons(ETH_P_8021Q)) { + if (skb_cow_head(skb, DSA_HLEN) < 0) + goto out_free; + skb_push(skb, DSA_HLEN); + + memmove(skb->data, skb->data + DSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct tagged FROM_CPU DSA tag from 802.1q tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x60; + edsa_header[5] = p->port << 3; + + /* + * Move CFI field from byte 6 to byte 5. + */ + if (edsa_header[6] & 0x10) { + edsa_header[5] |= 0x01; + edsa_header[6] &= ~0x10; + } + } else { + if (skb_cow_head(skb, EDSA_HLEN) < 0) + goto out_free; + skb_push(skb, EDSA_HLEN); + + memmove(skb->data, skb->data + EDSA_HLEN, 2 * ETH_ALEN); + + /* + * Construct untagged FROM_CPU DSA tag. + */ + edsa_header = skb->data + 2 * ETH_ALEN; + edsa_header[0] = (ETH_P_EDSA >> 8) & 0xff; + edsa_header[1] = ETH_P_EDSA & 0xff; + edsa_header[2] = 0x00; + edsa_header[3] = 0x00; + edsa_header[4] = 0x40; + edsa_header[5] = p->port << 3; + edsa_header[6] = 0x00; + edsa_header[7] = 0x00; + } + + skb->protocol = htons(ETH_P_EDSA); + + skb->dev = p->parent->master_netdev; + dev_queue_xmit(skb); + + return NETDEV_TX_OK; + +out_free: + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static int edsa_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *edsa_header; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (unlikely(!pskb_may_pull(skb, EDSA_HLEN))) + goto out_drop; + + /* + * Skip the two null bytes after the ethertype. + */ + edsa_header = skb->data + 2; + + /* + * Check that frame type is either TO_CPU or FORWARD, and + * that the source device is zero. + */ + if ((edsa_header[0] & 0xdf) != 0x00 && (edsa_header[0] & 0xdf) != 0xc0) + goto out_drop; + + /* + * Check that the source port is a registered DSA port. + */ + source_port = (edsa_header[1] >> 3) & 0x1f; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + /* + * If the 'tagged' bit is set, convert the DSA tag to a 802.1q + * tag and delete the ethertype part. If the 'tagged' bit is + * clear, delete the ethertype and the DSA tag parts. + */ + if (edsa_header[0] & 0x20) { + u8 new_header[4]; + + /* + * Insert 802.1q ethertype and copy the VLAN-related + * fields, but clear the bit that will hold CFI (since + * DSA uses that bit location for another purpose). + */ + new_header[0] = (ETH_P_8021Q >> 8) & 0xff; + new_header[1] = ETH_P_8021Q & 0xff; + new_header[2] = edsa_header[2] & ~0x10; + new_header[3] = edsa_header[3]; + + /* + * Move CFI bit from its place in the DSA header to + * its 802.1q-designated place. + */ + if (edsa_header[1] & 0x01) + new_header[2] |= 0x10; + + skb_pull_rcsum(skb, DSA_HLEN); + + /* + * Update packet checksum if skb is CHECKSUM_COMPLETE. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) { + __wsum c = skb->csum; + c = csum_add(c, csum_partial(new_header + 2, 2, 0)); + c = csum_sub(c, csum_partial(edsa_header + 2, 2, 0)); + skb->csum = c; + } + + memcpy(edsa_header, new_header, DSA_HLEN); + + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - DSA_HLEN, + 2 * ETH_ALEN); + } else { + /* + * Remove DSA tag and update checksum. + */ + skb_pull_rcsum(skb, EDSA_HLEN); + memmove(skb->data - ETH_HLEN, + skb->data - ETH_HLEN - EDSA_HLEN, + 2 * ETH_ALEN); + } + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type edsa_packet_type = { + .type = __constant_htons(ETH_P_EDSA), + .func = edsa_rcv, +}; + +static int __init edsa_init_module(void) +{ + dev_add_pack(&edsa_packet_type); + return 0; +} +module_init(edsa_init_module); + +static void __exit edsa_cleanup_module(void) +{ + dev_remove_pack(&edsa_packet_type); +} +module_exit(edsa_cleanup_module); diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c new file mode 100644 index 000000000000..efd26697e716 --- /dev/null +++ b/net/dsa/tag_trailer.c @@ -0,0 +1,131 @@ +/* + * net/dsa/tag_trailer.c - Trailer tag format handling + * Copyright (c) 2008 Marvell Semiconductor + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/etherdevice.h> +#include <linux/list.h> +#include <linux/netdevice.h> +#include "dsa_priv.h" + +int trailer_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct sk_buff *nskb; + int padlen; + u8 *trailer; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + /* + * We have to make sure that the trailer ends up as the very + * last 4 bytes of the packet. This means that we have to pad + * the packet to the minimum ethernet frame size, if necessary, + * before adding the trailer. + */ + padlen = 0; + if (skb->len < 60) + padlen = 60 - skb->len; + + nskb = alloc_skb(NET_IP_ALIGN + skb->len + padlen + 4, GFP_ATOMIC); + if (nskb == NULL) { + kfree_skb(skb); + return NETDEV_TX_OK; + } + skb_reserve(nskb, NET_IP_ALIGN); + + skb_reset_mac_header(nskb); + skb_set_network_header(nskb, skb_network_header(skb) - skb->head); + skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); + skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); + kfree_skb(skb); + + if (padlen) { + u8 *pad = skb_put(nskb, padlen); + memset(pad, 0, padlen); + } + + trailer = skb_put(nskb, 4); + trailer[0] = 0x80; + trailer[1] = 1 << p->port; + trailer[2] = 0x10; + trailer[3] = 0x00; + + nskb->protocol = htons(ETH_P_TRAILER); + + nskb->dev = p->parent->master_netdev; + dev_queue_xmit(nskb); + + return NETDEV_TX_OK; +} + +static int trailer_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch *ds = dev->dsa_ptr; + u8 *trailer; + int source_port; + + if (unlikely(ds == NULL)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (skb == NULL) + goto out; + + if (skb_linearize(skb)) + goto out_drop; + + trailer = skb_tail_pointer(skb) - 4; + if (trailer[0] != 0x80 || (trailer[1] & 0xf8) != 0x00 || + (trailer[3] & 0xef) != 0x00 || trailer[3] != 0x00) + goto out_drop; + + source_port = trailer[1] & 7; + if (source_port >= DSA_MAX_PORTS || ds->ports[source_port] == NULL) + goto out_drop; + + pskb_trim_rcsum(skb, skb->len - 4); + + skb->dev = ds->ports[source_port]; + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->last_rx = jiffies; + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +static struct packet_type trailer_packet_type = { + .type = __constant_htons(ETH_P_TRAILER), + .func = trailer_rcv, +}; + +static int __init trailer_init_module(void) +{ + dev_add_pack(&trailer_packet_type); + return 0; +} +module_init(trailer_init_module); + +static void __exit trailer_cleanup_module(void) +{ + dev_remove_pack(&trailer_packet_type); +} +module_exit(trailer_cleanup_module); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index a80839b02e3f..b9d85af2dd31 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -57,6 +57,7 @@ #include <net/sock.h> #include <net/ipv6.h> #include <net/ip.h> +#include <net/dsa.h> #include <asm/uaccess.h> #include <asm/system.h> @@ -129,7 +130,7 @@ int eth_rebuild_header(struct sk_buff *skb) switch (eth->h_proto) { #ifdef CONFIG_INET - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return arp_find(eth->h_dest, skb); #endif default: @@ -184,6 +185,17 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) skb->pkt_type = PACKET_OTHERHOST; } + /* + * Some variants of DSA tagging don't have an ethertype field + * at all, so we check here whether one of those tagging + * variants has been configured on the receiving interface, + * and if so, set skb->protocol without looking at the packet. + */ + if (netdev_uses_dsa_tags(dev)) + return htons(ETH_P_DSA); + if (netdev_uses_trailer_tags(dev)) + return htons(ETH_P_TRAILER); + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; diff --git a/net/ieee80211/ieee80211_module.c b/net/ieee80211/ieee80211_module.c index 3bca97f55d47..949772a5a7dc 100644 --- a/net/ieee80211/ieee80211_module.c +++ b/net/ieee80211/ieee80211_module.c @@ -157,7 +157,7 @@ struct net_device *alloc_ieee80211(int sizeof_priv) err = ieee80211_networks_allocate(ieee); if (err) { IEEE80211_ERROR("Unable to allocate beacon storage: %d\n", err); - goto failed; + goto failed_free_netdev; } ieee80211_networks_initialize(ieee); @@ -193,9 +193,9 @@ struct net_device *alloc_ieee80211(int sizeof_priv) return dev; - failed: - if (dev) - free_netdev(dev); +failed_free_netdev: + free_netdev(dev); +failed: return NULL; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 591ea23639ca..691268f3a359 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -630,5 +630,3 @@ config TCP_MD5SIG If unsure, say N. -source "net/ipv4/ipvs/Kconfig" - diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ad40ef3f9ebc..80ff87ce43aa 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o obj-$(CONFIG_IP_PNP) += ipconfig.o obj-$(CONFIG_NETFILTER) += netfilter.o netfilter/ -obj-$(CONFIG_IP_VS) += ipvs/ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 8a3ac1fa71a9..1aa2dc9e380e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -469,7 +469,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && - !inet->freebind && + !(inet->freebind || inet->transparent) && addr->sin_addr.s_addr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && @@ -1117,6 +1117,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b043eda60b04..1a9dd66511fc 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -663,7 +663,7 @@ out: void arp_xmit(struct sk_buff *skb) { /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); + NF_HOOK(NFPROTO_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); } /* @@ -928,7 +928,7 @@ static int arp_rcv(struct sk_buff *skb, struct net_device *dev, memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb)); - return NF_HOOK(NF_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); + return NF_HOOK(NFPROTO_ARP, NF_ARP_IN, skb, dev, NULL, arp_process); freeskb: kfree_skb(skb); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2c0e4572cc90..2e78f6bd9775 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -13,7 +13,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -47,17 +47,7 @@ #include <asm/bug.h> #include <asm/unaligned.h> -struct cipso_v4_domhsh_entry { - char *domain; - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - /* List of available DOI definitions */ -/* XXX - Updates should be minimal so having a single lock for the - * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be - * okay. */ /* XXX - This currently assumes a minimal number of different DOIs in use, * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we @@ -119,6 +109,19 @@ int cipso_v4_rbm_strictvalid = 1; * be omitted. */ #define CIPSO_V4_TAG_RNG_CAT_MAX 8 +/* Base length of the local tag (non-standard tag). + * Tag definition (may change between kernel versions) + * + * 0 8 16 24 32 + * +----------+----------+----------+----------+ + * | 10000000 | 00000110 | 32-bit secid value | + * +----------+----------+----------+----------+ + * | in (host byte order)| + * +----------+----------+ + * + */ +#define CIPSO_V4_TAG_LOC_BLEN 6 + /* * Helper Functions */ @@ -194,25 +197,6 @@ static void cipso_v4_bitmap_setbit(unsigned char *bitmap, } /** - * cipso_v4_doi_domhsh_free - Frees a domain list entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a domain list entry can be released - * safely. - * - */ -static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) -{ - struct cipso_v4_domhsh_entry *ptr; - - ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); - kfree(ptr->domain); - kfree(ptr); -} - -/** * cipso_v4_cache_entry_free - Frees a cache entry * @entry: the entry to free * @@ -457,7 +441,7 @@ static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) struct cipso_v4_doi *iter; list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && iter->valid) + if (iter->doi == doi && atomic_read(&iter->refcount)) return iter; return NULL; } @@ -496,14 +480,17 @@ int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) if (doi_def->type != CIPSO_V4_MAP_PASS) return -EINVAL; break; + case CIPSO_V4_TAG_LOCAL: + if (doi_def->type != CIPSO_V4_MAP_LOCAL) + return -EINVAL; + break; default: return -EINVAL; } } - doi_def->valid = 1; + atomic_set(&doi_def->refcount, 1); INIT_RCU_HEAD(&doi_def->rcu); - INIT_LIST_HEAD(&doi_def->dom_list); spin_lock(&cipso_v4_doi_list_lock); if (cipso_v4_doi_search(doi_def->doi) != NULL) @@ -519,59 +506,129 @@ doi_add_failure: } /** + * cipso_v4_doi_free - Frees a DOI definition + * @entry: the entry's RCU field + * + * Description: + * This function frees all of the memory associated with a DOI definition. + * + */ +void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + switch (doi_def->type) { + case CIPSO_V4_MAP_TRANS: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + break; + } + kfree(doi_def); +} + +/** + * cipso_v4_doi_free_rcu - Frees a DOI definition via the RCU pointer + * @entry: the entry's RCU field + * + * Description: + * This function is designed to be used as a callback to the call_rcu() + * function so that the memory allocated to the DOI definition can be released + * safely. + * + */ +static void cipso_v4_doi_free_rcu(struct rcu_head *entry) +{ + struct cipso_v4_doi *doi_def; + + doi_def = container_of(entry, struct cipso_v4_doi, rcu); + cipso_v4_doi_free(doi_def); +} + +/** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value * @audit_secid: the LSM secid to use in the audit message - * @callback: the DOI cleanup/free callback * * Description: - * Removes a DOI definition from the CIPSO engine, @callback is called to - * free any memory. The NetLabel routines will be called to release their own - * LSM domain mappings as well as our own domain list. Returns zero on - * success and negative values on failure. + * Removes a DOI definition from the CIPSO engine. The NetLabel routines will + * be called to release their own LSM domain mappings as well as our own + * domain list. Returns zero on success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, - struct netlbl_audit *audit_info, - void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info) { struct cipso_v4_doi *doi_def; - struct cipso_v4_domhsh_entry *dom_iter; spin_lock(&cipso_v4_doi_list_lock); doi_def = cipso_v4_doi_search(doi); - if (doi_def != NULL) { - doi_def->valid = 0; - list_del_rcu(&doi_def->list); + if (doi_def == NULL) { spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_lock(); - list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) - if (dom_iter->valid) - netlbl_cfg_map_del(dom_iter->domain, - audit_info); - rcu_read_unlock(); - cipso_v4_cache_invalidate(); - call_rcu(&doi_def->rcu, callback); - return 0; + return -ENOENT; + } + if (!atomic_dec_and_test(&doi_def->refcount)) { + spin_unlock(&cipso_v4_doi_list_lock); + return -EBUSY; } + list_del_rcu(&doi_def->list); spin_unlock(&cipso_v4_doi_list_lock); - return -ENOENT; + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); + + return 0; } /** - * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition + * cipso_v4_doi_getdef - Returns a reference to a valid DOI definition * @doi: the DOI value * * Description: * Searches for a valid DOI definition and if one is found it is returned to * the caller. Otherwise NULL is returned. The caller must ensure that - * rcu_read_lock() is held while accessing the returned definition. + * rcu_read_lock() is held while accessing the returned definition and the DOI + * definition reference count is decremented when the caller is done. * */ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) { - return cipso_v4_doi_search(doi); + struct cipso_v4_doi *doi_def; + + rcu_read_lock(); + doi_def = cipso_v4_doi_search(doi); + if (doi_def == NULL) + goto doi_getdef_return; + if (!atomic_inc_not_zero(&doi_def->refcount)) + doi_def = NULL; + +doi_getdef_return: + rcu_read_unlock(); + return doi_def; +} + +/** + * cipso_v4_doi_putdef - Releases a reference for the given DOI definition + * @doi_def: the DOI definition + * + * Description: + * Releases a DOI definition reference obtained from cipso_v4_doi_getdef(). + * + */ +void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def) +{ + if (doi_def == NULL) + return; + + if (!atomic_dec_and_test(&doi_def->refcount)) + return; + spin_lock(&cipso_v4_doi_list_lock); + list_del_rcu(&doi_def->list); + spin_unlock(&cipso_v4_doi_list_lock); + + cipso_v4_cache_invalidate(); + call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu); } /** @@ -597,7 +654,7 @@ int cipso_v4_doi_walk(u32 *skip_cnt, rcu_read_lock(); list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) - if (iter_doi->valid) { + if (atomic_read(&iter_doi->refcount) > 0) { if (doi_cnt++ < *skip_cnt) continue; ret_val = callback(iter_doi, cb_arg); @@ -613,85 +670,6 @@ doi_walk_return: return ret_val; } -/** - * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to add - * - * Description: - * Adds the @domain to the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). This function - * does allocate memory. Returns zero on success, negative values on failure. - * - */ -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - struct cipso_v4_domhsh_entry *new_dom; - - new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); - if (new_dom == NULL) - return -ENOMEM; - if (domain) { - new_dom->domain = kstrdup(domain, GFP_KERNEL); - if (new_dom->domain == NULL) { - kfree(new_dom); - return -ENOMEM; - } - } - new_dom->valid = 1; - INIT_RCU_HEAD(&new_dom->rcu); - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - spin_unlock(&cipso_v4_doi_list_lock); - kfree(new_dom->domain); - kfree(new_dom); - return -EEXIST; - } - list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); - spin_unlock(&cipso_v4_doi_list_lock); - - return 0; -} - -/** - * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to remove - * - * Description: - * Removes the @domain from the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). Returns zero - * on success and negative values on error. - * - */ -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - iter->valid = 0; - list_del_rcu(&iter->list); - spin_unlock(&cipso_v4_doi_list_lock); - call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - return 0; - } - spin_unlock(&cipso_v4_doi_list_lock); - - return -ENOENT; -} - /* * Label Mapping Functions */ @@ -712,7 +690,7 @@ static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) return 0; break; @@ -741,7 +719,7 @@ static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *net_lvl = host_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_lvl < doi_def->map.std->lvl.local_size && doi_def->map.std->lvl.local[host_lvl] < CIPSO_V4_INV_LVL) { *net_lvl = doi_def->map.std->lvl.local[host_lvl]; @@ -775,7 +753,7 @@ static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: *host_lvl = net_lvl; return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: map_tbl = doi_def->map.std; if (net_lvl < map_tbl->lvl.cipso_size && map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { @@ -812,7 +790,7 @@ static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, switch (doi_def->type) { case CIPSO_V4_MAP_PASS: return 0; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: cipso_cat_size = doi_def->map.std->cat.cipso_size; cipso_array = doi_def->map.std->cat.cipso; for (;;) { @@ -860,7 +838,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, u32 host_cat_size = 0; u32 *host_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { host_cat_size = doi_def->map.std->cat.local_size; host_cat_array = doi_def->map.std->cat.local; } @@ -875,7 +853,7 @@ static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: net_spot = host_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (host_spot >= host_cat_size) return -EPERM; net_spot = host_cat_array[host_spot]; @@ -921,7 +899,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, u32 net_cat_size = 0; u32 *net_cat_array = NULL; - if (doi_def->type == CIPSO_V4_MAP_STD) { + if (doi_def->type == CIPSO_V4_MAP_TRANS) { net_cat_size = doi_def->map.std->cat.cipso_size; net_cat_array = doi_def->map.std->cat.cipso; } @@ -941,7 +919,7 @@ static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, case CIPSO_V4_MAP_PASS: host_spot = net_spot; break; - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: if (net_spot >= net_cat_size) return -EPERM; host_spot = net_cat_array[net_spot]; @@ -1277,7 +1255,7 @@ static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x01; + buffer[0] = CIPSO_V4_TAG_RBITMAP; buffer[1] = tag_len; buffer[3] = level; @@ -1373,7 +1351,7 @@ static int cipso_v4_gentag_enum(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x02; + buffer[0] = CIPSO_V4_TAG_ENUM; buffer[1] = tag_len; buffer[3] = level; @@ -1469,7 +1447,7 @@ static int cipso_v4_gentag_rng(const struct cipso_v4_doi *doi_def, } else tag_len = 4; - buffer[0] = 0x05; + buffer[0] = CIPSO_V4_TAG_RANGE; buffer[1] = tag_len; buffer[3] = level; @@ -1523,6 +1501,54 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, } /** + * cipso_v4_gentag_loc - Generate a CIPSO local tag (non-standard) + * @doi_def: the DOI definition + * @secattr: the security attributes + * @buffer: the option buffer + * @buffer_len: length of buffer in bytes + * + * Description: + * Generate a CIPSO option using the local tag. Returns the size of the tag + * on success, negative values on failure. + * + */ +static int cipso_v4_gentag_loc(const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr, + unsigned char *buffer, + u32 buffer_len) +{ + if (!(secattr->flags & NETLBL_SECATTR_SECID)) + return -EPERM; + + buffer[0] = CIPSO_V4_TAG_LOCAL; + buffer[1] = CIPSO_V4_TAG_LOC_BLEN; + *(u32 *)&buffer[2] = secattr->attr.secid; + + return CIPSO_V4_TAG_LOC_BLEN; +} + +/** + * cipso_v4_parsetag_loc - Parse a CIPSO local tag + * @doi_def: the DOI definition + * @tag: the CIPSO tag + * @secattr: the security attributes + * + * Description: + * Parse a CIPSO local tag and return the security attributes in @secattr. + * Return zero on success, negatives values on failure. + * + */ +static int cipso_v4_parsetag_loc(const struct cipso_v4_doi *doi_def, + const unsigned char *tag, + struct netlbl_lsm_secattr *secattr) +{ + secattr->attr.secid = *(u32 *)&tag[2]; + secattr->flags |= NETLBL_SECATTR_SECID; + + return 0; +} + +/** * cipso_v4_validate - Validate a CIPSO option * @option: the start of the option, on error it is set to point to the error * @@ -1541,7 +1567,7 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, * that is unrecognized." * */ -int cipso_v4_validate(unsigned char **option) +int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option) { unsigned char *opt = *option; unsigned char *tag; @@ -1566,7 +1592,7 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } - opt_iter = 6; + opt_iter = CIPSO_V4_HDR_LEN; tag = opt + opt_iter; while (opt_iter < opt_len) { for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) @@ -1584,7 +1610,7 @@ int cipso_v4_validate(unsigned char **option) switch (tag[0]) { case CIPSO_V4_TAG_RBITMAP: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RBM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1602,7 +1628,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RBM_BLEN && cipso_v4_map_cat_rbm_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1612,7 +1638,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_ENUM: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_ENUM_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1622,7 +1648,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_ENUM_BLEN && cipso_v4_map_cat_enum_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1631,7 +1657,7 @@ int cipso_v4_validate(unsigned char **option) } break; case CIPSO_V4_TAG_RANGE: - if (tag_len < 4) { + if (tag_len < CIPSO_V4_TAG_RNG_BLEN) { err_offset = opt_iter + 1; goto validate_return_locked; } @@ -1641,7 +1667,7 @@ int cipso_v4_validate(unsigned char **option) err_offset = opt_iter + 3; goto validate_return_locked; } - if (tag_len > 4 && + if (tag_len > CIPSO_V4_TAG_RNG_BLEN && cipso_v4_map_cat_rng_valid(doi_def, &tag[4], tag_len - 4) < 0) { @@ -1649,6 +1675,19 @@ int cipso_v4_validate(unsigned char **option) goto validate_return_locked; } break; + case CIPSO_V4_TAG_LOCAL: + /* This is a non-standard tag that we only allow for + * local connections, so if the incoming interface is + * not the loopback device drop the packet. */ + if (!(skb->dev->flags & IFF_LOOPBACK)) { + err_offset = opt_iter; + goto validate_return_locked; + } + if (tag_len != CIPSO_V4_TAG_LOC_BLEN) { + err_offset = opt_iter + 1; + goto validate_return_locked; + } + break; default: err_offset = opt_iter; goto validate_return_locked; @@ -1704,48 +1743,27 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) } /** - * cipso_v4_sock_setattr - Add a CIPSO option to a socket - * @sk: the socket + * cipso_v4_genopt - Generate a CIPSO option + * @buf: the option buffer + * @buf_len: the size of opt_buf * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket + * @secattr: the security attributes * * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sk, which means it either needs to be in the - * process of being created or locked. Returns zero on success and negative - * values on failure. + * Generate a CIPSO option using the DOI definition and security attributes + * passed to the function. Returns the length of the option on success and + * negative values on failure. * */ -int cipso_v4_sock_setattr(struct sock *sk, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) +static int cipso_v4_genopt(unsigned char *buf, u32 buf_len, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) { - int ret_val = -EPERM; + int ret_val; u32 iter; - unsigned char *buf; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - if (sk == NULL) - return 0; - /* We allocate the maximum CIPSO option size here so we are probably - * being a little wasteful, but it makes our life _much_ easier later - * on and after all we are only talking about 40 bytes. */ - buf_len = CIPSO_V4_OPT_LEN_MAX; - buf = kmalloc(buf_len, GFP_ATOMIC); - if (buf == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } + if (buf_len <= CIPSO_V4_HDR_LEN) + return -ENOSPC; /* XXX - This code assumes only one tag per CIPSO option which isn't * really a good assumption to make but since we only support the MAC @@ -1772,9 +1790,14 @@ int cipso_v4_sock_setattr(struct sock *sk, &buf[CIPSO_V4_HDR_LEN], buf_len - CIPSO_V4_HDR_LEN); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_gentag_loc(doi_def, + secattr, + &buf[CIPSO_V4_HDR_LEN], + buf_len - CIPSO_V4_HDR_LEN); + break; default: - ret_val = -EPERM; - goto socket_setattr_failure; + return -EPERM; } iter++; @@ -1782,9 +1805,58 @@ int cipso_v4_sock_setattr(struct sock *sk, iter < CIPSO_V4_TAG_MAXCNT && doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); if (ret_val < 0) - goto socket_setattr_failure; + return ret_val; cipso_v4_gentag_hdr(doi_def, buf, ret_val); - buf_len = CIPSO_V4_HDR_LEN + ret_val; + return CIPSO_V4_HDR_LEN + ret_val; +} + +/** + * cipso_v4_sock_setattr - Add a CIPSO option to a socket + * @sk: the socket + * @doi_def: the CIPSO DOI to use + * @secattr: the specific security attributes of the socket + * + * Description: + * Set the CIPSO option on the given socket using the DOI definition and + * security attributes passed to the function. This function requires + * exclusive access to @sk, which means it either needs to be in the + * process of being created or locked. Returns zero on success and negative + * values on failure. + * + */ +int cipso_v4_sock_setattr(struct sock *sk, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val = -EPERM; + unsigned char *buf = NULL; + u32 buf_len; + u32 opt_len; + struct ip_options *opt = NULL; + struct inet_sock *sk_inet; + struct inet_connection_sock *sk_conn; + + /* In the case of sock_create_lite(), the sock->sk field is not + * defined yet but it is not a problem as the only users of these + * "lite" PF_INET sockets are functions which do an accept() call + * afterwards so we will label the socket as part of the accept(). */ + if (sk == NULL) + return 0; + + /* We allocate the maximum CIPSO option size here so we are probably + * being a little wasteful, but it makes our life _much_ easier later + * on and after all we are only talking about 40 bytes. */ + buf_len = CIPSO_V4_OPT_LEN_MAX; + buf = kmalloc(buf_len, GFP_ATOMIC); + if (buf == NULL) { + ret_val = -ENOMEM; + goto socket_setattr_failure; + } + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + goto socket_setattr_failure; + buf_len = ret_val; /* We can't use ip_options_get() directly because it makes a call to * ip_options_get_alloc() which allocates memory with GFP_KERNEL and @@ -1822,6 +1894,80 @@ socket_setattr_failure: } /** + * cipso_v4_sock_delattr - Delete the CIPSO option from a socket + * @sk: the socket + * + * Description: + * Removes the CIPSO option from a socket, if present. + * + */ +void cipso_v4_sock_delattr(struct sock *sk) +{ + u8 hdr_delta; + struct ip_options *opt; + struct inet_sock *sk_inet; + + sk_inet = inet_sk(sk); + opt = sk_inet->opt; + if (opt == NULL || opt->cipso == 0) + return; + + if (opt->srr || opt->rr || opt->ts || opt->router_alert) { + u8 cipso_len; + u8 cipso_off; + unsigned char *cipso_ptr; + int iter; + int optlen_new; + + cipso_off = opt->cipso - sizeof(struct iphdr); + cipso_ptr = &opt->__data[cipso_off]; + cipso_len = cipso_ptr[1]; + + if (opt->srr > opt->cipso) + opt->srr -= cipso_len; + if (opt->rr > opt->cipso) + opt->rr -= cipso_len; + if (opt->ts > opt->cipso) + opt->ts -= cipso_len; + if (opt->router_alert > opt->cipso) + opt->router_alert -= cipso_len; + opt->cipso = 0; + + memmove(cipso_ptr, cipso_ptr + cipso_len, + opt->optlen - cipso_off - cipso_len); + + /* determining the new total option length is tricky because of + * the padding necessary, the only thing i can think to do at + * this point is walk the options one-by-one, skipping the + * padding at the end to determine the actual option size and + * from there we can determine the new total option length */ + iter = 0; + optlen_new = 0; + while (iter < opt->optlen) + if (opt->__data[iter] != IPOPT_NOP) { + iter += opt->__data[iter + 1]; + optlen_new = iter; + } else + iter++; + hdr_delta = opt->optlen; + opt->optlen = (optlen_new + 3) & ~3; + hdr_delta -= opt->optlen; + } else { + /* only the cipso option was present on the socket so we can + * remove the entire option struct */ + sk_inet->opt = NULL; + hdr_delta = opt->optlen; + kfree(opt); + } + + if (sk_inet->is_icsk && hdr_delta > 0) { + struct inet_connection_sock *sk_conn = inet_csk(sk); + sk_conn->icsk_ext_hdr_len -= hdr_delta; + sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); + } +} + +/** * cipso_v4_getattr - Helper function for the cipso_v4_*_getattr functions * @cipso: the CIPSO v4 option * @secattr: the security attributes @@ -1859,6 +2005,9 @@ static int cipso_v4_getattr(const unsigned char *cipso, case CIPSO_V4_TAG_RANGE: ret_val = cipso_v4_parsetag_rng(doi_def, &cipso[6], secattr); break; + case CIPSO_V4_TAG_LOCAL: + ret_val = cipso_v4_parsetag_loc(doi_def, &cipso[6], secattr); + break; } if (ret_val == 0) secattr->type = NETLBL_NLTYPE_CIPSOV4; @@ -1893,6 +2042,124 @@ int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** + * cipso_v4_skbuff_setattr - Set the CIPSO option on a packet + * @skb: the packet + * @secattr: the security attributes + * + * Description: + * Set the CIPSO option on the given packet based on the security attributes. + * Returns a pointer to the IP header on success and NULL on failure. + * + */ +int cipso_v4_skbuff_setattr(struct sk_buff *skb, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char buf[CIPSO_V4_OPT_LEN_MAX]; + u32 buf_len = CIPSO_V4_OPT_LEN_MAX; + u32 opt_len; + int len_delta; + + ret_val = cipso_v4_genopt(buf, buf_len, doi_def, secattr); + if (ret_val < 0) + return ret_val; + buf_len = ret_val; + opt_len = (buf_len + 3) & ~3; + + /* we overwrite any existing options to ensure that we have enough + * room for the CIPSO option, the reason is that we _need_ to guarantee + * that the security label is applied to the packet - we do the same + * thing when using the socket options and it hasn't caused a problem, + * if we need to we can always revisit this choice later */ + + len_delta = opt_len - opt->optlen; + /* if we don't ensure enough headroom we could panic on the skb_push() + * call below so make sure we have enough, we are also "mangling" the + * packet so we should probably do a copy-on-write call anyway */ + ret_val = skb_cow(skb, skb_headroom(skb) + len_delta); + if (ret_val < 0) + return ret_val; + + if (len_delta > 0) { + /* we assume that the header + opt->optlen have already been + * "pushed" in ip_options_build() or similar */ + iph = ip_hdr(skb); + skb_push(skb, len_delta); + memmove((char *)iph - len_delta, iph, iph->ihl << 2); + skb_reset_network_header(skb); + iph = ip_hdr(skb); + } else if (len_delta < 0) { + iph = ip_hdr(skb); + memset(iph + 1, IPOPT_NOP, opt->optlen); + } else + iph = ip_hdr(skb); + + if (opt->optlen > 0) + memset(opt, 0, sizeof(*opt)); + opt->optlen = opt_len; + opt->cipso = sizeof(struct iphdr); + opt->is_changed = 1; + + /* we have to do the following because we are being called from a + * netfilter hook which means the packet already has had the header + * fields populated and the checksum calculated - yes this means we + * are doing more work than needed but we do it to keep the core + * stack clean and tidy */ + memcpy(iph + 1, buf, buf_len); + if (opt_len > buf_len) + memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len); + if (len_delta != 0) { + iph->ihl = 5 + (opt_len >> 2); + iph->tot_len = htons(skb->len); + } + ip_send_check(iph); + + return 0; +} + +/** + * cipso_v4_skbuff_delattr - Delete any CIPSO options from a packet + * @skb: the packet + * + * Description: + * Removes any and all CIPSO options from the given packet. Returns zero on + * success, negative values on failure. + * + */ +int cipso_v4_skbuff_delattr(struct sk_buff *skb) +{ + int ret_val; + struct iphdr *iph; + struct ip_options *opt = &IPCB(skb)->opt; + unsigned char *cipso_ptr; + + if (opt->cipso == 0) + return 0; + + /* since we are changing the packet we should make a copy */ + ret_val = skb_cow(skb, skb_headroom(skb)); + if (ret_val < 0) + return ret_val; + + /* the easiest thing to do is just replace the cipso option with noop + * options since we don't change the size of the packet, although we + * still need to recalculate the checksum */ + + iph = ip_hdr(skb); + cipso_ptr = (unsigned char *)iph + opt->cipso; + memset(cipso_ptr, IPOPT_NOOP, cipso_ptr[1]); + opt->cipso = 0; + opt->is_changed = 1; + + ip_send_check(iph); + + return 0; +} + +/** * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option * @skb: the packet * @secattr: the security attributes diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index b12dae2b0b2d..56fce3ab6c55 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -613,9 +613,7 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (colon) *colon = 0; -#ifdef CONFIG_KMOD dev_load(net, ifr.ifr_name); -#endif switch (cmd) { case SIOCGIFADDR: /* Get interface address */ @@ -1283,7 +1281,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen, +static int devinet_conf_sysctl(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -1379,12 +1377,11 @@ int ipv4_doint_and_flush(ctl_table *ctl, int write, return ret; } -int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen, +int ipv4_doint_and_flush_strategy(ctl_table *table, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { - int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp, - newval, newlen); + int ret = devinet_conf_sysctl(table, oldval, oldlenp, newval, newlen); struct net *net = table->extra2; if (ret == 1) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 55c355e63234..72b2de76f1cd 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -1,7 +1,7 @@ /* * NET3: Implementation of the ICMP protocol layer. * - * Alan Cox, <alan@redhat.com> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index f70fac612596..a0d86455c53e 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -9,7 +9,7 @@ * seems to fall out with gcc 2.6.2. * * Authors: - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -1234,6 +1234,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) write_lock_bh(&in_dev->mc_list_lock); im->next=in_dev->mc_list; in_dev->mc_list=im; + in_dev->mc_count++; write_unlock_bh(&in_dev->mc_list_lock); #ifdef CONFIG_IP_MULTICAST igmpv3_del_delrec(in_dev, im->multiaddr); @@ -1282,6 +1283,7 @@ void ip_mc_dec_group(struct in_device *in_dev, __be32 addr) if (--i->users == 0) { write_lock_bh(&in_dev->mc_list_lock); *ip = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); igmp_group_dropped(i); @@ -1330,6 +1332,7 @@ void ip_mc_init_dev(struct in_device *in_dev) setup_timer(&in_dev->mr_gq_timer, igmp_gq_timer_expire, (unsigned long)in_dev); in_dev->mr_ifc_count = 0; + in_dev->mc_count = 0; setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire, (unsigned long)in_dev); in_dev->mr_qrv = IGMP_Unsolicited_Report_Count; @@ -1369,8 +1372,8 @@ void ip_mc_destroy_dev(struct in_device *in_dev) write_lock_bh(&in_dev->mc_list_lock); while ((i = in_dev->mc_list) != NULL) { in_dev->mc_list = i->next; + in_dev->mc_count--; write_unlock_bh(&in_dev->mc_list_lock); - igmp_group_dropped(i); ip_ma_put(i); @@ -2383,7 +2386,7 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) if (state->in_dev->mc_list == im) { seq_printf(seq, "%d\t%-10s: %5d %7s\n", - state->dev->ifindex, state->dev->name, state->dev->mc_count, querier); + state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } seq_printf(seq, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 0c1ae68ee84b..bd1278a2d828 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -30,20 +30,22 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); #endif /* - * This array holds the first and last local port number. + * This struct holds the first and last local port number. */ -int sysctl_local_port_range[2] = { 32768, 61000 }; -DEFINE_SEQLOCK(sysctl_port_range_lock); +struct local_ports sysctl_local_ports __read_mostly = { + .lock = SEQLOCK_UNLOCKED, + .range = { 32768, 61000 }, +}; void inet_get_local_port_range(int *low, int *high) { unsigned seq; do { - seq = read_seqbegin(&sysctl_port_range_lock); + seq = read_seqbegin(&sysctl_local_ports.lock); - *low = sysctl_local_port_range[0]; - *high = sysctl_local_port_range[1]; - } while (read_seqretry(&sysctl_port_range_lock, seq)); + *low = sysctl_local_ports.range[0]; + *high = sysctl_local_ports.range[1]; + } while (read_seqretry(&sysctl_local_ports.lock, seq)); } EXPORT_SYMBOL(inet_get_local_port_range); @@ -335,6 +337,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; @@ -515,6 +518,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index c10036e7a463..564230dabcb8 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -53,11 +53,9 @@ static DEFINE_MUTEX(inet_diag_table_mutex); static const struct inet_diag_handler *inet_diag_lock_handler(int type) { -#ifdef CONFIG_KMOD if (!inet_diag_table[type]) request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK, NETLINK_INET_DIAG, type); -#endif mutex_lock(&inet_diag_table_mutex); if (!inet_diag_table[type]) @@ -782,11 +780,15 @@ skip_listen_ht: struct sock *sk; struct hlist_node *node; + num = 0; + + if (hlist_empty(&head->chain) && hlist_empty(&head->twchain)) + continue; + if (i > s_i) s_num = 0; read_lock_bh(lock); - num = 0; sk_for_each(sk, node, &head->chain) { struct inet_sock *inet = inet_sk(sk); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 743f011b9a84..1c5fd38f8824 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -126,6 +126,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 2152d222b954..e4f81f54befe 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -6,7 +6,7 @@ * The IP fragmentation functionality. * * Authors: Fred N. van Kempen <waltje@uWalt.NL.Mugnet.ORG> - * Alan Cox <Alan.Cox@linux.org> + * Alan Cox <alan@lxorguk.ukuu.org.uk> * * Fixes: * Alan Cox : Split from ip.c , see ip_input.c for history. diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a61158ea722..85c487b8572b 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -27,6 +27,7 @@ #include <linux/inetdevice.h> #include <linux/igmp.h> #include <linux/netfilter_ipv4.h> +#include <linux/etherdevice.h> #include <linux/if_ether.h> #include <net/sock.h> @@ -41,6 +42,7 @@ #include <net/xfrm.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <net/rtnetlink.h> #ifdef CONFIG_IPV6 #include <net/ipv6.h> @@ -117,8 +119,10 @@ Alexey Kuznetsov. */ +static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); +static int ipgre_tunnel_bind_dev(struct net_device *dev); /* Fallback tunnel: no source, no destination, no key, no options */ @@ -163,38 +167,64 @@ static DEFINE_RWLOCK(ipgre_lock); /* Given src, dst and key, find appropriate for input tunnel. */ static struct ip_tunnel * ipgre_tunnel_lookup(struct net *net, - __be32 remote, __be32 local, __be32 key) + __be32 remote, __be32 local, + __be32 key, __be16 gre_proto) { unsigned h0 = HASH(remote); unsigned h1 = HASH(key); struct ip_tunnel *t; + struct ip_tunnel *t2 = NULL; struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IPGRE; for (t = ign->tunnels_r_l[h0^h1]; t; t = t->next) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_r[h0^h1]; t; t = t->next) { if (remote == t->parms.iph.daddr) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_l[h1]; t; t = t->next) { if (local == t->parms.iph.saddr || (local == t->parms.iph.daddr && ipv4_is_multicast(local))) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } } + for (t = ign->tunnels_wc[h1]; t; t = t->next) { - if (t->parms.i_key == key && (t->dev->flags&IFF_UP)) - return t; + if (t->parms.i_key == key && t->dev->flags & IFF_UP) { + if (t->dev->type == dev_type) + return t; + if (t->dev->type == ARPHRD_IPGRE && !t2) + t2 = t; + } } + if (t2) + return t2; + if (ign->fb_tunnel_dev->flags&IFF_UP) return netdev_priv(ign->fb_tunnel_dev); return NULL; @@ -249,25 +279,37 @@ static void ipgre_tunnel_unlink(struct ipgre_net *ign, struct ip_tunnel *t) } } -static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, - struct ip_tunnel_parm *parms, int create) +static struct ip_tunnel *ipgre_tunnel_find(struct net *net, + struct ip_tunnel_parm *parms, + int type) { __be32 remote = parms->iph.daddr; __be32 local = parms->iph.saddr; __be32 key = parms->i_key; - struct ip_tunnel *t, **tp, *nt; + struct ip_tunnel *t, **tp; + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + + for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) + if (local == t->parms.iph.saddr && + remote == t->parms.iph.daddr && + key == t->parms.i_key && + type == t->dev->type) + break; + + return t; +} + +static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, + struct ip_tunnel_parm *parms, int create) +{ + struct ip_tunnel *t, *nt; struct net_device *dev; char name[IFNAMSIZ]; struct ipgre_net *ign = net_generic(net, ipgre_net_id); - for (tp = __ipgre_bucket(ign, parms); (t = *tp) != NULL; tp = &t->next) { - if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) { - if (key == t->parms.i_key) - return t; - } - } - if (!create) - return NULL; + t = ipgre_tunnel_find(net, parms, ARPHRD_IPGRE); + if (t || !create) + return t; if (parms->name[0]) strlcpy(name, parms->name, IFNAMSIZ); @@ -285,9 +327,11 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct net *net, goto failed_free; } - dev->init = ipgre_tunnel_init; nt = netdev_priv(dev); nt->parms = *parms; + dev->rtnl_link_ops = &ipgre_link_ops; + + dev->mtu = ipgre_tunnel_bind_dev(dev); if (register_netdevice(dev) < 0) goto failed_free; @@ -380,8 +424,9 @@ static void ipgre_err(struct sk_buff *skb, u32 info) read_lock(&ipgre_lock); t = ipgre_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr, - (flags&GRE_KEY) ? - *(((__be32*)p) + (grehlen>>2) - 1) : 0); + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); if (t == NULL || t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) goto out; @@ -431,6 +476,8 @@ static int ipgre_rcv(struct sk_buff *skb) u32 seqno = 0; struct ip_tunnel *tunnel; int offset = 4; + __be16 gre_proto; + unsigned int len; if (!pskb_may_pull(skb, 16)) goto drop_nolock; @@ -470,20 +517,22 @@ static int ipgre_rcv(struct sk_buff *skb) } } + gre_proto = *(__be16 *)(h + 2); + read_lock(&ipgre_lock); if ((tunnel = ipgre_tunnel_lookup(dev_net(skb->dev), - iph->saddr, iph->daddr, key)) != NULL) { + iph->saddr, iph->daddr, key, + gre_proto))) { struct net_device_stats *stats = &tunnel->dev->stats; secpath_reset(skb); - skb->protocol = *(__be16*)(h + 2); + skb->protocol = gre_proto; /* WCCP version 1 and 2 protocol decoding. * - Change protocol to IP * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header */ - if (flags == 0 && - skb->protocol == htons(ETH_P_WCCP)) { + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { skb->protocol = htons(ETH_P_IP); if ((*(h + offset) & 0xF0) != 0x40) offset += 4; @@ -491,7 +540,6 @@ static int ipgre_rcv(struct sk_buff *skb) skb->mac_header = skb->network_header; __pskb_pull(skb, offset); - skb_reset_network_header(skb); skb_postpull_rcsum(skb, skb_transport_header(skb), offset); skb->pkt_type = PACKET_HOST; #ifdef CONFIG_NET_IPGRE_BROADCAST @@ -519,13 +567,32 @@ static int ipgre_rcv(struct sk_buff *skb) } tunnel->i_seqno = seqno + 1; } + + len = skb->len; + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + stats->rx_length_errors++; + stats->rx_errors++; + goto drop; + } + + iph = ip_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + stats->rx_packets++; - stats->rx_bytes += skb->len; + stats->rx_bytes += len; skb->dev = tunnel->dev; dst_release(skb->dst); skb->dst = NULL; nf_reset(skb); + + skb_reset_network_header(skb); ipgre_ecn_decapsulate(iph, skb); + netif_rx(skb); read_unlock(&ipgre_lock); return(0); @@ -560,7 +627,10 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_error; } - if (dev->header_ops) { + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IPGRE) { gre_hlen = 0; tiph = (struct iphdr*)skb->data; } else { @@ -637,7 +707,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) df = tiph->frag_off; if (df) - mtu = dst_mtu(&rt->u.dst) - tunnel->hlen; + mtu = dst_mtu(&rt->u.dst) - dev->hard_header_len - tunnel->hlen; else mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu; @@ -703,7 +773,7 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) old_iph = ip_hdr(skb); } - skb->transport_header = skb->network_header; + skb_reset_transport_header(skb); skb_push(skb, gre_hlen); skb_reset_network_header(skb); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); @@ -736,8 +806,9 @@ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT); } - ((__be16*)(iph+1))[0] = tunnel->parms.o_flags; - ((__be16*)(iph+1))[1] = skb->protocol; + ((__be16 *)(iph + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(iph + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { __be32 *ptr = (__be32*)(((u8*)iph) + tunnel->hlen - 4); @@ -773,7 +844,7 @@ tx_error: return 0; } -static void ipgre_tunnel_bind_dev(struct net_device *dev) +static int ipgre_tunnel_bind_dev(struct net_device *dev) { struct net_device *tdev = NULL; struct ip_tunnel *tunnel; @@ -785,7 +856,7 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tunnel = netdev_priv(dev); iph = &tunnel->parms.iph; - /* Guess output device to choose reasonable mtu and hard_header_len */ + /* Guess output device to choose reasonable mtu and needed_headroom */ if (iph->daddr) { struct flowi fl = { .oif = tunnel->parms.link, @@ -799,14 +870,16 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) tdev = rt->u.dst.dev; ip_rt_put(rt); } - dev->flags |= IFF_POINTOPOINT; + + if (dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; } if (!tdev && tunnel->parms.link) tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); if (tdev) { - hlen = tdev->hard_header_len; + hlen = tdev->hard_header_len + tdev->needed_headroom; mtu = tdev->mtu; } dev->iflink = tunnel->parms.link; @@ -820,10 +893,15 @@ static void ipgre_tunnel_bind_dev(struct net_device *dev) if (tunnel->parms.o_flags&GRE_SEQ) addend += 4; } - dev->hard_header_len = hlen + addend; - dev->mtu = mtu - addend; + dev->needed_headroom = addend + hlen; + mtu -= dev->hard_header_len - addend; + + if (mtu < 68) + mtu = 68; + tunnel->hlen = addend; + return mtu; } static int @@ -917,7 +995,7 @@ ipgre_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t->parms.iph.frag_off = p.iph.frag_off; if (t->parms.link != p.link) { t->parms.link = p.link; - ipgre_tunnel_bind_dev(dev); + dev->mtu = ipgre_tunnel_bind_dev(dev); netdev_state_change(dev); } } @@ -959,7 +1037,8 @@ done: static int ipgre_tunnel_change_mtu(struct net_device *dev, int new_mtu) { struct ip_tunnel *tunnel = netdev_priv(dev); - if (new_mtu < 68 || new_mtu > 0xFFF8 - tunnel->hlen) + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) return -EINVAL; dev->mtu = new_mtu; return 0; @@ -1078,6 +1157,7 @@ static int ipgre_close(struct net_device *dev) static void ipgre_tunnel_setup(struct net_device *dev) { + dev->init = ipgre_tunnel_init; dev->uninit = ipgre_tunnel_uninit; dev->destructor = free_netdev; dev->hard_start_xmit = ipgre_tunnel_xmit; @@ -1085,7 +1165,7 @@ static void ipgre_tunnel_setup(struct net_device *dev) dev->change_mtu = ipgre_tunnel_change_mtu; dev->type = ARPHRD_IPGRE; - dev->hard_header_len = LL_MAX_HEADER + sizeof(struct iphdr) + 4; + dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4; dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4; dev->flags = IFF_NOARP; dev->iflink = 0; @@ -1107,8 +1187,6 @@ static int ipgre_tunnel_init(struct net_device *dev) memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4); memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); - ipgre_tunnel_bind_dev(dev); - if (iph->daddr) { #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -1189,6 +1267,7 @@ static int ipgre_init_net(struct net *net) ign->fb_tunnel_dev->init = ipgre_fb_tunnel_init; dev_net_set(ign->fb_tunnel_dev, net); + ign->fb_tunnel_dev->rtnl_link_ops = &ipgre_link_ops; if ((err = register_netdev(ign->fb_tunnel_dev))) goto err_reg_dev; @@ -1221,6 +1300,298 @@ static struct pernet_operations ipgre_net_ops = { .exit = ipgre_exit_net, }; +static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be32 daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + memcpy(&daddr, nla_data(data[IFLA_GRE_REMOTE]), 4); + if (!daddr) + return -EINVAL; + } + +out: + return ipgre_tunnel_validate(tb, data); +} + +static void ipgre_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + parms->iph.protocol = IPPROTO_GRE; + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]); + + if (data[IFLA_GRE_REMOTE]) + parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]); + + if (data[IFLA_GRE_TTL]) + parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]); + + if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); +} + +static int ipgre_tap_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ipgre_tunnel_bind_dev(dev); + + return 0; +} + +static void ipgre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->init = ipgre_tap_init; + dev->uninit = ipgre_tunnel_uninit; + dev->destructor = free_netdev; + dev->hard_start_xmit = ipgre_tunnel_xmit; + dev->change_mtu = ipgre_tunnel_change_mtu; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ipgre_newlink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + int mtu; + int err; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &nt->parms); + + if (ipgre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + random_ether_addr(dev->dev_addr); + + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ipgre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip_tunnel *t, *nt; + struct net *net = dev_net(dev); + struct ipgre_net *ign = net_generic(net, ipgre_net_id); + struct ip_tunnel_parm p; + int mtu; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ipgre_netlink_parms(data, &p); + + t = ipgre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + unsigned nflags = 0; + + t = nt; + + if (ipv4_is_multicast(p.iph.daddr)) + nflags = IFF_BROADCAST; + else if (p.iph.daddr) + nflags = IFF_POINTOPOINT; + + if ((dev->flags ^ nflags) & + (IFF_POINTOPOINT | IFF_BROADCAST)) + return -EINVAL; + + ipgre_tunnel_unlink(ign, t); + t->parms.iph.saddr = p.iph.saddr; + t->parms.iph.daddr = p.iph.daddr; + t->parms.i_key = p.i_key; + memcpy(dev->dev_addr, &p.iph.saddr, 4); + memcpy(dev->broadcast, &p.iph.daddr, 4); + ipgre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + t->parms.o_key = p.o_key; + t->parms.iph.ttl = p.iph.ttl; + t->parms.iph.tos = p.iph.tos; + t->parms.iph.frag_off = p.iph.frag_off; + + if (t->parms.link != p.link) { + t->parms.link = p.link; + mtu = ipgre_tunnel_bind_dev(dev); + if (!tb[IFLA_MTU]) + dev->mtu = mtu; + netdev_state_change(dev); + } + + return 0; +} + +static size_t ipgre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_PMTUDISC */ + nla_total_size(1) + + 0; +} + +static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *t = netdev_priv(dev); + struct ip_tunnel_parm *p = &t->parms; + + NLA_PUT_U32(skb, IFLA_GRE_LINK, p->link); + NLA_PUT_BE16(skb, IFLA_GRE_IFLAGS, p->i_flags); + NLA_PUT_BE16(skb, IFLA_GRE_OFLAGS, p->o_flags); + NLA_PUT_BE32(skb, IFLA_GRE_IKEY, p->i_key); + NLA_PUT_BE32(skb, IFLA_GRE_OKEY, p->o_key); + NLA_PUT_BE32(skb, IFLA_GRE_LOCAL, p->iph.saddr); + NLA_PUT_BE32(skb, IFLA_GRE_REMOTE, p->iph.daddr); + NLA_PUT_U8(skb, IFLA_GRE_TTL, p->iph.ttl); + NLA_PUT_U8(skb, IFLA_GRE_TOS, p->iph.tos); + NLA_PUT_U8(skb, IFLA_GRE_PMTUDISC, !!(p->iph.frag_off & htons(IP_DF))); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_TOS] = { .type = NLA_U8 }, + [IFLA_GRE_PMTUDISC] = { .type = NLA_U8 }, +}; + +static struct rtnl_link_ops ipgre_link_ops __read_mostly = { + .kind = "gre", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tunnel_setup, + .validate = ipgre_tunnel_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + +static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { + .kind = "gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = ipgre_tap_setup, + .validate = ipgre_tap_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, +}; + /* * And now the modules code and kernel interface. */ @@ -1238,19 +1609,39 @@ static int __init ipgre_init(void) err = register_pernet_gen_device(&ipgre_net_id, &ipgre_net_ops); if (err < 0) - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto gen_device_failed; + err = rtnl_link_register(&ipgre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ipgre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: return err; + +tap_ops_failed: + rtnl_link_unregister(&ipgre_link_ops); +rtnl_link_failed: + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); +gen_device_failed: + inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + goto out; } static void __exit ipgre_fini(void) { + rtnl_link_unregister(&ipgre_tap_ops); + rtnl_link_unregister(&ipgre_link_ops); + unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); - - unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops); } module_init(ipgre_init); module_exit(ipgre_fini); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("gre"); +MODULE_ALIAS_RTNL_LINK("gretap"); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index e0bed56c51f1..cfb38ac9d698 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -8,7 +8,7 @@ * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> @@ -209,9 +209,17 @@ static int ip_local_deliver_finish(struct sk_buff *skb) hash = protocol & (MAX_INET_PROTOS - 1); ipprot = rcu_dereference(inet_protos[hash]); - if (ipprot != NULL && (net == &init_net || ipprot->netns_ok)) { + if (ipprot != NULL) { int ret; + if (!net_eq(net, &init_net) && !ipprot->netns_ok) { + if (net_ratelimit()) + printk("%s: proto %d isn't netns-ready\n", + __func__, protocol); + kfree_skb(skb); + goto out; + } + if (!ipprot->no_policy) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { kfree_skb(skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index be3f18a7a40e..2c88da6e7862 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -438,7 +438,7 @@ int ip_options_compile(struct net *net, goto error; } opt->cipso = optptr - iph; - if (cipso_v4_validate(&optptr)) { + if (cipso_v4_validate(skb, &optptr)) { pp_ptr = optptr; goto error; } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d533a89e08de..d2a8f8bb78a6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -340,6 +340,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1371,7 +1372,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 105d92a039b9..465abf0a9869 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -419,7 +419,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<<IP_TTL) | (1<<IP_HDRINCL) | (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) | (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) | - (1<<IP_PASSSEC))) || + (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) || optname == IP_MULTICAST_TTL || optname == IP_MULTICAST_LOOP) { if (optlen >= sizeof(int)) { @@ -878,6 +878,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; @@ -1130,6 +1140,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, case IP_FREEBIND: val = inet->freebind; break; + case IP_TRANSPARENT: + val = inet->transparent; + break; default: release_sock(sk); return -ENOPROTOOPT; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4c6d2caf9203..29609d29df76 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -41,7 +41,7 @@ Made the tunnels use dev->name not tunnel: when error reporting. Added tx_dropped stat - -Alan Cox (Alan.Cox@linux.org) 21 March 95 + -Alan Cox (alan@lxorguk.ukuu.org.uk) 21 March 95 Reworked: Changed to tunnel to destination gateway in addition to the diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c519b8d30eee..25924b1eb2ef 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1,7 +1,7 @@ /* * IP multicast routing support for mrouted 3.6/3.8 * - * (c) 1995 Alan Cox, <alan@redhat.com> + * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk> * Linux Consultancy and Custom Driver Development * * This program is free software; you can redistribute it and/or @@ -1945,13 +1945,14 @@ int __init ip_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c deleted file mode 100644 index 73e0ea87c1f5..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_ah.c +++ /dev/null @@ -1,178 +0,0 @@ -/* - * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -ah_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -ah_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -ah_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * AH is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -ah_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void ah_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void ah_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_ah = { - .name = "AH", - .protocol = IPPROTO_AH, - .num_states = 1, - .dont_defrag = 1, - .init = ah_init, - .exit = ah_exit, - .conn_schedule = ah_conn_schedule, - .conn_in_get = ah_conn_in_get, - .conn_out_get = ah_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = ah_debug_packet, - .timeout_change = NULL, /* ISAKMP */ - .set_state_timeout = NULL, -}; diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c deleted file mode 100644 index 21d70c8ffa54..000000000000 --- a/net/ipv4/ipvs/ip_vs_proto_esp.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS - * - * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 - * Wensong Zhang <wensong@linuxvirtualserver.org> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation; - * - */ - -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> - -#include <net/ip_vs.h> - - -/* TODO: - -struct isakmp_hdr { - __u8 icookie[8]; - __u8 rcookie[8]; - __u8 np; - __u8 version; - __u8 xchgtype; - __u8 flags; - __u32 msgid; - __u32 length; -}; - -*/ - -#define PORT_ISAKMP 500 - - -static struct ip_vs_conn * -esp_conn_in_get(const struct sk_buff *skb, - struct ip_vs_protocol *pp, - const struct iphdr *iph, - unsigned int proto_off, - int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - /* - * We are not sure if the packet is from our - * service, so our conn_schedule hook should return NF_ACCEPT - */ - IP_VS_DBG(12, "Unknown ISAKMP entry for outin packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static struct ip_vs_conn * -esp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) -{ - struct ip_vs_conn *cp; - - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->saddr, - htons(PORT_ISAKMP), - iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(IPPROTO_UDP, - iph->daddr, - htons(PORT_ISAKMP), - iph->saddr, - htons(PORT_ISAKMP)); - } - - if (!cp) { - IP_VS_DBG(12, "Unknown ISAKMP entry for inout packet " - "%s%s %u.%u.%u.%u->%u.%u.%u.%u\n", - inverse ? "ICMP+" : "", - pp->name, - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - } - - return cp; -} - - -static int -esp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, - int *verdict, struct ip_vs_conn **cpp) -{ - /* - * ESP is only related traffic. Pass the packet to IP stack. - */ - *verdict = NF_ACCEPT; - return 0; -} - - -static void -esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "%s TRUNCATED", pp->name); - else - sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", - pp->name, NIPQUAD(ih->saddr), - NIPQUAD(ih->daddr)); - - printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); -} - - -static void esp_init(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -static void esp_exit(struct ip_vs_protocol *pp) -{ - /* nothing to do now */ -} - - -struct ip_vs_protocol ip_vs_protocol_esp = { - .name = "ESP", - .protocol = IPPROTO_ESP, - .num_states = 1, - .dont_defrag = 1, - .init = esp_init, - .exit = esp_exit, - .conn_schedule = esp_conn_schedule, - .conn_in_get = esp_conn_in_get, - .conn_out_get = esp_conn_out_get, - .snat_handler = NULL, - .dnat_handler = NULL, - .csum_check = NULL, - .state_transition = NULL, - .register_app = NULL, - .unregister_app = NULL, - .app_conn_bind = NULL, - .debug_packet = esp_debug_packet, - .timeout_change = NULL, /* ISAKMP */ -}; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacdf991d..6efdb70b3eb2 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -12,6 +12,7 @@ /* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) { + struct net *net = dev_net(skb->dst->dev); const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; struct flowi fl = {}; @@ -19,7 +20,9 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int hh_len; unsigned int type; - type = inet_addr_type(&init_net, iph->saddr); + type = inet_addr_type(net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,7 +36,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; /* Drop old route. */ @@ -43,7 +47,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) /* non-local src, find valid iif to satisfy * rp-filter when calling ip_route_input. */ fl.nl_u.ip4_u.daddr = iph->saddr; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return -1; odst = skb->dst; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 90eb7cb47e77..3816e1dc9295 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -56,23 +61,30 @@ config IP_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_IPTABLES + # The matches. -config IP_NF_MATCH_RECENT - tristate '"recent" match support' - depends on IP_NF_IPTABLES +config IP_NF_MATCH_ADDRTYPE + tristate '"addrtype" address type match support' depends on NETFILTER_ADVANCED help - This match is used for creating one or many lists of recently - used addresses and then matching against that/those list(s). + This option allows you to match what routing thinks of an address, + eg. UNICAST, LOCAL, BROADCAST, ... - Short options are available by using 'iptables -m recent -h' - Official Website: <http://snowman.net/projects/ipt_recent/> + If you want to compile it as a module, say M here and read + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED + help + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. To compile it as a module, choose M here. If unsure, say N. config IP_NF_MATCH_ECN tristate '"ecn" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `ECN' match, which allows you to match against @@ -80,19 +92,8 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_AH - tristate '"ah" match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This match extension allows you to match a range of SPIs - inside AH header of IPSec packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_MATCH_TTL tristate '"ttl" match support' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user @@ -100,21 +101,9 @@ config IP_NF_MATCH_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_MATCH_ADDRTYPE - tristate '"addrtype" address type match support' - depends on IP_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. - # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of @@ -136,7 +125,6 @@ config IP_NF_TARGET_REJECT config IP_NF_TARGET_LOG tristate "LOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `LOG' target, which allows you to create rules in @@ -146,7 +134,6 @@ config IP_NF_TARGET_LOG config IP_NF_TARGET_ULOG tristate "ULOG target support" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n ---help--- @@ -167,7 +154,7 @@ config IP_NF_TARGET_ULOG # NAT + specific targets: nf_conntrack config NF_NAT tristate "Full NAT" - depends on IP_NF_IPTABLES && NF_CONNTRACK_IPV4 + depends on NF_CONNTRACK_IPV4 default m if NETFILTER_ADVANCED=n help The Full NAT option allows masquerading, port forwarding and other @@ -194,26 +181,26 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_REDIRECT - tristate "REDIRECT target support" +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_NETMAP - tristate "NETMAP target support" +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" depends on NF_NAT depends on NETFILTER_ADVANCED help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. To compile it as a module, choose M here. If unsure, say N. @@ -262,44 +249,43 @@ config NF_NAT_PROTO_SCTP config NF_NAT_FTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_FTP config NF_NAT_IRC tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_IRC config NF_NAT_TFTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_TFTP config NF_NAT_AMANDA tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_AMANDA config NF_NAT_PPTP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_PPTP select NF_NAT_PROTO_GRE config NF_NAT_H323 tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_H323 config NF_NAT_SIP tristate - depends on IP_NF_IPTABLES && NF_CONNTRACK && NF_NAT + depends on NF_CONNTRACK && NF_NAT default NF_NAT && NF_CONNTRACK_SIP # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" - depends on IP_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -308,6 +294,19 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support (EXPERIMENTAL)" + depends on IP_NF_MANGLE && EXPERIMENTAL + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK + help + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE @@ -338,23 +337,9 @@ config IP_NF_TARGET_TTL To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL - depends on NF_CONNTRACK_IPV4 - depends on NETFILTER_ADVANCED - select NF_CONNTRACK_MARK - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. - # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on IP_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to iptables. This table is the very @@ -367,7 +352,6 @@ config IP_NF_RAW # security table for MAC policy config IP_NF_SECURITY tristate "Security table" - depends on IP_NF_IPTABLES depends on SECURITY depends on NETFILTER_ADVANCED help @@ -376,6 +360,8 @@ config IP_NF_SECURITY If unsure, say N. +endif # IP_NF_IPTABLES + # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" @@ -388,9 +374,10 @@ config IP_NF_ARPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_ARPTABLES + config IP_NF_ARPFILTER tristate "ARP packet filtering" - depends on IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and @@ -401,10 +388,11 @@ config IP_NF_ARPFILTER config IP_NF_ARP_MANGLE tristate "ARP payload mangling" - depends on IP_NF_ARPTABLES help Allows altering the ARP packet payload: source and destination hardware and network addresses. +endif # IP_NF_ARPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 3f31291f37ce..5f9b650d90fc 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o @@ -48,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 03e83a65aec5..8d70d29f1ccf 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -200,15 +200,12 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("arp_tables: error: '%s'\n", (char *)targinfo); + printk("arp_tables: error: '%s'\n", + (const char *)par->targinfo); return NF_DROP; } @@ -232,6 +229,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const char *indev, *outdev; void *table_base; const struct xt_table_info *private; + struct xt_target_param tgpar; if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; @@ -245,6 +243,11 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); + tgpar.in = in; + tgpar.out = out; + tgpar.hooknum = hook; + tgpar.family = NFPROTO_ARP; + arp = arp_hdr(skb); do { if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { @@ -290,11 +293,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, /* Targets which reenter must return * abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); /* Target might have changed stuff. */ arp = arp_hdr(skb); @@ -456,23 +458,24 @@ static inline int check_entry(struct arpt_entry *e, const char *name) static inline int check_target(struct arpt_entry *e, const char *name) { - struct arpt_entry_target *t; - struct xt_target *target; + struct arpt_entry_target *t = arpt_get_target(e); int ret; - - t = arpt_get_target(e); - target = t->u.kernel.target; - - ret = xt_check_target(target, NF_ARP, t->u.target_size - sizeof(*t), - name, e->comefrom, 0, 0); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static inline int @@ -488,7 +491,8 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, + t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); if (IS_ERR(target) || !target) { @@ -554,15 +558,19 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct arpt_entry_target *t; if (i && (*i)-- == 0) return 1; t = arpt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -788,7 +796,7 @@ static void compat_standard_from_user(void *dst, void *src) int v = *(compat_int_t *)src; if (v > 0) - v += xt_compat_calc_jump(NF_ARP, v); + v += xt_compat_calc_jump(NFPROTO_ARP, v); memcpy(dst, &v, sizeof(v)); } @@ -797,7 +805,7 @@ static int compat_standard_to_user(void __user *dst, void *src) compat_int_t cv = *(int *)src; if (cv > 0) - cv -= xt_compat_calc_jump(NF_ARP, cv); + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; } @@ -815,7 +823,7 @@ static int compat_calc_entry(struct arpt_entry *e, t = arpt_get_target(e); off += xt_compat_target_offset(t->u.kernel.target); newinfo->size -= off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) return ret; @@ -866,9 +874,9 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; #ifdef CONFIG_COMPAT if (compat) - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); #endif - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (t && !IS_ERR(t)) { struct arpt_getinfo info; @@ -878,7 +886,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) if (compat) { struct xt_table_info tmp; ret = compat_table_info(private, &tmp); - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); private = &tmp; } #endif @@ -901,7 +909,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat) ret = t ? PTR_ERR(t) : -ENOENT; #ifdef CONFIG_COMPAT if (compat) - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); #endif return ret; } @@ -925,7 +933,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, return -EINVAL; } - t = xt_find_table_lock(net, NF_ARP, get.name); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; @@ -967,7 +975,7 @@ static int __do_replace(struct net *net, const char *name, goto out; } - t = try_then_request_module(xt_find_table_lock(net, NF_ARP, name), + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), "arptable_%s", name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; @@ -1134,7 +1142,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - t = xt_find_table_lock(net, NF_ARP, name); + t = xt_find_table_lock(net, NFPROTO_ARP, name); if (!t || IS_ERR(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; @@ -1218,7 +1226,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, entry_offset = (void *)e - (void *)base; t = compat_arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, + target = try_then_request_module(xt_find_target(NFPROTO_ARP, t->u.user.name, t->u.user.revision), "arpt_%s", t->u.user.name); @@ -1232,7 +1240,7 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, off += xt_compat_target_offset(target); *size += off; - ret = xt_compat_add_offset(NF_ARP, entry_offset, off); + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); if (ret) goto release_target; @@ -1333,7 +1341,7 @@ static int translate_compat_table(const char *name, duprintf("translate_compat_table: size %u\n", info->size); j = 0; - xt_compat_lock(NF_ARP); + xt_compat_lock(NFPROTO_ARP); /* Walk through entries, checking offsets. */ ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, check_compat_entry_size_and_hooks, @@ -1383,8 +1391,8 @@ static int translate_compat_table(const char *name, ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_copy_entry_from_user, &pos, &size, name, newinfo, entry1); - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); if (ret) goto free_newinfo; @@ -1420,8 +1428,8 @@ out: COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j); return ret; out_unlock: - xt_compat_flush_offsets(NF_ARP); - xt_compat_unlock(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); goto out; } @@ -1607,8 +1615,8 @@ static int compat_get_entries(struct net *net, return -EINVAL; } - xt_compat_lock(NF_ARP); - t = xt_find_table_lock(net, NF_ARP, get.name); + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (t && !IS_ERR(t)) { const struct xt_table_info *private = t->private; struct xt_table_info info; @@ -1623,13 +1631,13 @@ static int compat_get_entries(struct net *net, private->size, get.size); ret = -EAGAIN; } - xt_compat_flush_offsets(NF_ARP); + xt_compat_flush_offsets(NFPROTO_ARP); module_put(t->me); xt_table_unlock(t); } else ret = t ? PTR_ERR(t) : -ENOENT; - xt_compat_unlock(NF_ARP); + xt_compat_unlock(NFPROTO_ARP); return ret; } @@ -1709,7 +1717,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len break; } - try_then_request_module(xt_find_revision(NF_ARP, rev.name, + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; @@ -1787,7 +1795,7 @@ void arpt_unregister_table(struct xt_table *table) static struct xt_target arpt_standard_target __read_mostly = { .name = ARPT_STANDARD_TARGET, .targetsize = sizeof(int), - .family = NF_ARP, + .family = NFPROTO_ARP, #ifdef CONFIG_COMPAT .compatsize = sizeof(compat_int_t), .compat_from_user = compat_standard_from_user, @@ -1799,7 +1807,7 @@ static struct xt_target arpt_error_target __read_mostly = { .name = ARPT_ERROR_TARGET, .target = arpt_error, .targetsize = ARPT_FUNCTION_MAXNAMELEN, - .family = NF_ARP, + .family = NFPROTO_ARP, }; static struct nf_sockopt_ops arpt_sockopts = { @@ -1821,12 +1829,12 @@ static struct nf_sockopt_ops arpt_sockopts = { static int __net_init arp_tables_net_init(struct net *net) { - return xt_proto_init(net, NF_ARP); + return xt_proto_init(net, NFPROTO_ARP); } static void __net_exit arp_tables_net_exit(struct net *net) { - xt_proto_fini(net, NF_ARP); + xt_proto_fini(net, NFPROTO_ARP); } static struct pernet_operations arp_tables_net_ops = { diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index a385959d2655..b0d5b1d0a769 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -9,12 +9,9 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - unsigned int hooknum, const struct xt_target *target, - const void *targinfo) +target(struct sk_buff *skb, const struct xt_target_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; const struct arphdr *arp; unsigned char *arpptr; int pln, hln; @@ -57,11 +54,9 @@ target(struct sk_buff *skb, return mangle->target; } -static bool -checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int hook_mask) +static bool checkentry(const struct xt_tgchk_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) @@ -75,7 +70,7 @@ checkentry(const char *tablename, const void *e, const struct xt_target *target, static struct xt_target arpt_mangle_reg __read_mostly = { .name = "mangle", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = target, .targetsize = sizeof(struct arpt_mangle), .checkentry = checkentry, diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index 082f5dd3156c..bee3d117661a 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -51,7 +51,7 @@ static struct xt_table packet_filter = { .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .private = NULL, .me = THIS_MODULE, - .af = NF_ARP, + .af = NFPROTO_ARP, }; /* The work comes in here from netfilter.c */ @@ -89,21 +89,21 @@ static struct nf_hook_ops arpt_ops[] __read_mostly = { { .hook = arpt_in_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_IN, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_out_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = NF_IP_PRI_FILTER, }, { .hook = arpt_forward_hook, .owner = THIS_MODULE, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_FORWARD, .priority = NF_IP_PRI_FILTER, }, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 4e7c719445c2..213fb27debc1 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -171,31 +171,25 @@ ip_checkentry(const struct ipt_ip *ip) } static unsigned int -ipt_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ipt_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + printk("ip_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - bool *hotdrop) +do_match(struct ipt_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, ip_hdrlen(skb), hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -326,7 +320,6 @@ ipt_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; const struct iphdr *ip; u_int16_t datalen; bool hotdrop = false; @@ -336,6 +329,8 @@ ipt_do_table(struct sk_buff *skb, void *table_base; struct ipt_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ ip = ip_hdr(skb); @@ -348,7 +343,13 @@ ipt_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + mtpar.thoff = ip_hdrlen(skb); + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV4; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -362,12 +363,11 @@ ipt_do_table(struct sk_buff *skb, do { IP_NF_ASSERT(e); IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { + if (ip_packet_match(ip, indev, outdev, + &e->ip, mtpar.fragoff)) { struct ipt_entry_target *t; - if (IPT_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, &hotdrop) != 0) + if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); @@ -413,16 +413,14 @@ ipt_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; #ifdef CONFIG_NETFILTER_DEBUG ((struct ipt_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); - + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec @@ -575,12 +573,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ipt_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -606,34 +609,28 @@ check_entry(struct ipt_entry *e, const char *name) } static int -check_match(struct ipt_entry_match *m, const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, unsigned int *i) +check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ipt_ip *ip = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m), - name, hookmask, ip->proto, - ip->invflags & IPT_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, +find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -648,7 +645,7 @@ find_check_match(struct ipt_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ip, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -660,23 +657,25 @@ err: static int check_target(struct ipt_entry *e, const char *name) { - struct ipt_entry_target *t; - struct xt_target *target; + struct ipt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; int ret; - t = ipt_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ip.proto, - e->ip.invflags & IPT_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -687,14 +686,18 @@ find_check_entry(struct ipt_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, find_check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -769,6 +772,7 @@ check_entry_size_and_hooks(struct ipt_entry *e, static int cleanup_entry(struct ipt_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ipt_entry_target *t; if (i && (*i)-- == 0) @@ -777,9 +781,13 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) /* Cleanup all matches */ IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1648,12 +1656,16 @@ static int compat_check_entry(struct ipt_entry *e, const char *name, unsigned int *i) { + struct xt_mtchk_param mtpar; unsigned int j; int ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2121,30 +2133,23 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmphdr *ic; struct icmphdr _icmph; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2155,15 +2160,9 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static bool -icmp_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_icmp *icmpinfo = matchinfo; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IPT_ICMP_INV); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index fafe8ebb4c55..7ac1677419a9 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -281,11 +281,9 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) ***********************************************************************/ static unsigned int -clusterip_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t hash; @@ -349,13 +347,10 @@ clusterip_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -clusterip_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; @@ -406,9 +401,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } cipinfo->config = config; - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->target->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->target->family); return false; } @@ -416,9 +411,9 @@ clusterip_tg_check(const char *tablename, const void *e_void, } /* drop reference count of cluster config when rule is deleted */ -static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ @@ -426,7 +421,7 @@ static void clusterip_tg_destroy(const struct xt_target *target, void *targinfo) clusterip_config_put(cipinfo->config); - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->target->family); } #ifdef CONFIG_COMPAT @@ -445,7 +440,7 @@ struct compat_ipt_clusterip_tgt_info static struct xt_target clusterip_tg_reg __read_mostly = { .name = "CLUSTERIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = clusterip_tg, .checkentry = clusterip_tg_check, .destroy = clusterip_tg_destroy, @@ -546,7 +541,7 @@ arp_mangle(unsigned int hook, static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index d60139c134ca..f7e2fa0974dc 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -77,11 +77,9 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) } static unsigned int -ecn_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ecn_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_ECN_info *einfo = targinfo; + const struct ipt_ECN_info *einfo = par->targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) if (!set_ect_ip(skb, einfo)) @@ -95,13 +93,10 @@ ecn_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ecn_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ecn_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ECN_info *einfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (einfo->operation & IPT_ECN_OP_MASK) { printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", @@ -124,7 +119,7 @@ ecn_tg_check(const char *tablename, const void *e_void, static struct xt_target ecn_tg_reg __read_mostly = { .name = "ECN", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ecn_tg, .targetsize = sizeof(struct ipt_ECN_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 0af14137137b..fc6ce04a3e35 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -375,7 +375,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ipt_log_packet(unsigned int pf, +ipt_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -426,28 +426,23 @@ ipt_log_packet(unsigned int pf, } static unsigned int -log_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ipt_log_packet(PF_INET, hooknum, skb, in, out, &li, + ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_log_info *loginfo = targinfo; + const struct ipt_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -463,7 +458,7 @@ log_tg_check(const char *tablename, const void *e, static struct xt_target log_tg_reg __read_mostly = { .name = "LOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = log_tg, .targetsize = sizeof(struct ipt_log_info), .checkentry = log_tg_check, @@ -483,7 +478,7 @@ static int __init log_tg_init(void) ret = xt_register_target(&log_tg_reg); if (ret < 0) return ret; - nf_log_register(PF_INET, &ipt_log_logger); + nf_log_register(NFPROTO_IPV4, &ipt_log_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 0841aefaa503..f389f60cb105 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -31,12 +31,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); static DEFINE_RWLOCK(masq_lock); /* FIXME: Multiple targets. --RR */ -static bool -masquerade_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("masquerade_check: bad MAP_IPS.\n"); @@ -50,9 +47,7 @@ masquerade_tg_check(const char *tablename, const void *e, } static unsigned int -masquerade_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; struct nf_conn_nat *nat; @@ -62,7 +57,7 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, const struct rtable *rt; __be32 newsrc; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); nat = nfct_nat(ct); @@ -76,16 +71,16 @@ masquerade_tg(struct sk_buff *skb, const struct net_device *in, if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; - mr = targinfo; + mr = par->targinfo; rt = skb->rtable; - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); if (!newsrc) { - printk("MASQUERADE: %s ate my IP address\n", out->name); + printk("MASQUERADE: %s ate my IP address\n", par->out->name); return NF_DROP; } write_lock_bh(&masq_lock); - nat->masq_index = out->ifindex; + nat->masq_index = par->out->ifindex; write_unlock_bh(&masq_lock); /* Transfer from original range. */ @@ -119,9 +114,7 @@ static int masq_device_event(struct notifier_block *this, void *ptr) { const struct net_device *dev = ptr; - - if (!net_eq(dev_net(dev), &init_net)) - return NOTIFY_DONE; + struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for @@ -129,7 +122,8 @@ static int masq_device_event(struct notifier_block *this, and forget them. */ NF_CT_ASSERT(dev->ifindex != 0); - nf_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); } return NOTIFY_DONE; @@ -153,7 +147,7 @@ static struct notifier_block masq_inet_notifier = { static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = masquerade_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6739abfd1521..7c29582d4ec8 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -22,12 +22,9 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); -static bool -netmap_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { pr_debug("NETMAP:check: bad MAP_IPS.\n"); @@ -41,24 +38,23 @@ netmap_tg_check(const char *tablename, const void *e, } static unsigned int -netmap_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +netmap_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_POST_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) new_ip = ip_hdr(skb)->daddr & ~netmask; else new_ip = ip_hdr(skb)->saddr & ~netmask; @@ -70,12 +66,12 @@ netmap_tg(struct sk_buff *skb, const struct net_device *in, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(hooknum)); + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); } static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = netmap_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 5c6292449d13..698e5e78685b 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -26,12 +26,9 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ -static bool -redirect_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { pr_debug("redirect_check: bad MAP_IPS.\n"); @@ -45,24 +42,22 @@ redirect_tg_check(const char *tablename, const void *e, } static unsigned int -redirect_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +redirect_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING - || hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); /* Local packets: make them go to loopback */ - if (hooknum == NF_INET_LOCAL_OUT) + if (par->hooknum == NF_INET_LOCAL_OUT) newdst = htonl(0x7F000001); else { struct in_device *indev; @@ -92,7 +87,7 @@ redirect_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = redirect_tg, .targetsize = sizeof(struct nf_nat_multi_range_compat), .table = "nat", diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 2639872849da..0b4b6e0ff2b9 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -136,11 +136,9 @@ static inline void send_unreach(struct sk_buff *skb_in, int code) } static unsigned int -reject_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_reject_info *reject = targinfo; + const struct ipt_reject_info *reject = par->targinfo; /* WARNING: This code causes reentry within iptables. This means that the iptables jump stack is now crap. We @@ -168,7 +166,7 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(skb, hooknum); + send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; @@ -177,13 +175,10 @@ reject_tg(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_reject_info *rejinfo = targinfo; - const struct ipt_entry *e = e_void; + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { printk("ipt_REJECT: ECHOREPLY no longer supported.\n"); @@ -201,7 +196,7 @@ reject_tg_check(const char *tablename, const void *e_void, static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = reject_tg, .targetsize = sizeof(struct ipt_reject_info), .table = "filter", diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c index 30eed65e7338..6d76aae90cc0 100644 --- a/net/ipv4/netfilter/ipt_TTL.c +++ b/net/ipv4/netfilter/ipt_TTL.c @@ -20,12 +20,10 @@ MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); MODULE_LICENSE("GPL"); static unsigned int -ttl_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph; - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; int new_ttl; if (!skb_make_writable(skb, skb->len)) @@ -61,12 +59,9 @@ ttl_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -ttl_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool ttl_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_TTL_info *info = targinfo; + const struct ipt_TTL_info *info = par->targinfo; if (info->mode > IPT_TTL_MAXMODE) { printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", @@ -80,7 +75,7 @@ ttl_tg_check(const char *tablename, const void *e, static struct xt_target ttl_tg_reg __read_mostly = { .name = "TTL", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ttl_tg, .targetsize = sizeof(struct ipt_TTL_info), .table = "mangle", diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index b192756c6d0d..18a2826b57c6 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -281,18 +281,14 @@ alloc_failure: } static unsigned int -ulog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +ulog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - - ipt_ulog_packet(hooknum, skb, in, out, loginfo, NULL); - + ipt_ulog_packet(par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); return XT_CONTINUE; } -static void ipt_logfn(unsigned int pf, +static void ipt_logfn(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -317,12 +313,9 @@ static void ipt_logfn(unsigned int pf, ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } -static bool -ulog_tg_check(const char *tablename, const void *e, - const struct xt_target *target, void *targinfo, - unsigned int hookmask) +static bool ulog_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ulog_info *loginfo = targinfo; + const struct ipt_ulog_info *loginfo = par->targinfo; if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { pr_debug("ipt_ULOG: prefix term %i\n", @@ -374,7 +367,7 @@ static int ulog_tg_compat_to_user(void __user *dst, void *src) static struct xt_target ulog_tg_reg __read_mostly = { .name = "ULOG", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = ulog_tg, .targetsize = sizeof(struct ipt_ulog_info), .checkentry = ulog_tg_check, @@ -419,7 +412,7 @@ static int __init ulog_tg_init(void) return ret; } if (nflog) - nf_log_register(PF_INET, &ipt_ulog_logger); + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); return 0; } diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 462a22c97877..88762f02779d 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -30,12 +30,9 @@ static inline bool match_type(const struct net_device *dev, __be32 addr, } static bool -addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info *info = matchinfo; + const struct ipt_addrtype_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool ret = true; @@ -50,20 +47,17 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_addrtype_info_v1 *info = matchinfo; + const struct ipt_addrtype_info_v1 *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); const struct net_device *dev = NULL; bool ret = true; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) - dev = in; + dev = par->in; else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) - dev = out; + dev = par->out; if (info->source) ret &= match_type(dev, iph->saddr, info->source) ^ @@ -74,12 +68,9 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { - struct ipt_addrtype_info_v1 *info = matchinfo; + struct ipt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { @@ -88,14 +79,16 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) && + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) { printk(KERN_ERR "ipt_addrtype: output interface limitation " "not valid in PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) && + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) { printk(KERN_ERR "ipt_addrtype: input interface limitation " "not valid in POST_ROUTING and OUTPUT\n"); @@ -108,14 +101,14 @@ addrtype_mt_checkentry_v1(const char *tablename, const void *ip_void, static struct xt_match addrtype_mt_reg[] __read_mostly = { { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = addrtype_mt_v0, .matchsize = sizeof(struct ipt_addrtype_info), .me = THIS_MODULE }, { .name = "addrtype", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .match = addrtype_mt_v1, .checkentry = addrtype_mt_checkentry_v1, diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index e977989629c7..0104c0b399de 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -36,27 +36,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ah = skb_header_pointer(skb, protoff, - sizeof(_ahdr), &_ahdr); + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return 0; } @@ -65,13 +61,9 @@ ah_mt(const struct sk_buff *skb, const struct net_device *in, !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ah *ahinfo = matchinfo; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { @@ -83,7 +75,7 @@ ah_mt_check(const char *tablename, const void *ip_void, static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ah_mt, .matchsize = sizeof(struct ipt_ah), .proto = IPPROTO_AH, diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c index 749de8284ce5..6289b64144c6 100644 --- a/net/ipv4/netfilter/ipt_ecn.c +++ b/net/ipv4/netfilter/ipt_ecn.c @@ -67,12 +67,9 @@ static inline bool match_tcp(const struct sk_buff *skb, return true; } -static bool -ecn_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ecn_info *info = matchinfo; + const struct ipt_ecn_info *info = par->matchinfo; if (info->operation & IPT_ECN_OP_MATCH_IP) if (!match_ip(skb, info)) @@ -81,20 +78,17 @@ ecn_mt(const struct sk_buff *skb, const struct net_device *in, if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { if (ip_hdr(skb)->protocol != IPPROTO_TCP) return false; - if (!match_tcp(skb, info, hotdrop)) + if (!match_tcp(skb, info, par->hotdrop)) return false; } return true; } -static bool -ecn_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ecn_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ecn_info *info = matchinfo; - const struct ipt_ip *ip = ip_void; + const struct ipt_ecn_info *info = par->matchinfo; + const struct ipt_ip *ip = par->entryinfo; if (info->operation & IPT_ECN_OP_MATCH_MASK) return false; @@ -114,7 +108,7 @@ ecn_mt_check(const char *tablename, const void *ip_void, static struct xt_match ecn_mt_reg __read_mostly = { .name = "ecn", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ecn_mt, .matchsize = sizeof(struct ipt_ecn_info), .checkentry = ecn_mt_check, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c index e0b8caeb710c..297f1cbf4ff5 100644 --- a/net/ipv4/netfilter/ipt_ttl.c +++ b/net/ipv4/netfilter/ipt_ttl.c @@ -18,12 +18,9 @@ MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); MODULE_LICENSE("GPL"); -static bool -ttl_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_ttl_info *info = matchinfo; + const struct ipt_ttl_info *info = par->matchinfo; const u8 ttl = ip_hdr(skb)->ttl; switch (info->mode) { @@ -46,7 +43,7 @@ ttl_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match ttl_mt_reg __read_mostly = { .name = "ttl", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 1ea677dcf845..c9224310ebae 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -70,7 +70,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -81,7 +81,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_filter); + dev_net(in)->ipv4.iptable_filter); } static unsigned int @@ -101,7 +101,7 @@ ipt_local_out_hook(unsigned int hook, } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_filter); + dev_net(out)->ipv4.iptable_filter); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index da59182f2226..69f2c4287146 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -81,7 +81,7 @@ ipt_pre_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -92,7 +92,7 @@ ipt_post_routing_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_post_routing_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -114,7 +114,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_mangle); + dev_net(in)->ipv4.iptable_mangle); } static unsigned int @@ -147,7 +147,7 @@ ipt_local_hook(unsigned int hook, tos = iph->tos; ret = ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_mangle); + dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) { iph = ip_hdr(skb); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fddce7754b72..8faebfe638f1 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -53,7 +53,7 @@ ipt_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_pre_routing_net(in, out)->ipv4.iptable_raw); + dev_net(in)->ipv4.iptable_raw); } static unsigned int @@ -72,7 +72,7 @@ ipt_local_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_raw); + dev_net(out)->ipv4.iptable_raw); } /* 'raw' is the very first table. */ diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index db6d312128e1..36f3be3cc428 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -73,7 +73,7 @@ ipt_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -84,7 +84,7 @@ ipt_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ipt_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv4.iptable_security); + dev_net(in)->ipv4.iptable_security); } static unsigned int @@ -103,7 +103,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } return ipt_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv4.iptable_security); + dev_net(out)->ipv4.iptable_security); } static struct nf_hook_ops ipt_ops[] __read_mostly = { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 5a955c440364..4a7c35275396 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> * @@ -24,6 +25,7 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> #include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, struct nf_conn *ct, @@ -63,23 +65,6 @@ static int ipv4_print_tuple(struct seq_file *s, NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,35 +129,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb); } static unsigned int ipv4_conntrack_local(unsigned int hooknum, @@ -188,20 +151,13 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum, printk("ipt_hook: happy cracking.\n"); return NF_ACCEPT; } - return nf_conntrack_in(PF_INET, hooknum, skb); + return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, @@ -209,13 +165,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { .priority = NF_IP_PRI_CONNTRACK, }, { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, @@ -254,7 +203,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, .procname = "ip_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -270,7 +219,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM, .procname = "ip_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -278,7 +227,7 @@ static ctl_table ip_ct_sysctl_table[] = { { .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, .procname = "ip_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -323,7 +272,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) return -EINVAL; } - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(sock_net(sk), &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -422,6 +371,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c index 3a020720e40b..313ebf00ee36 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -21,18 +21,20 @@ #include <net/netfilter/nf_conntrack_acct.h> struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -42,13 +44,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -158,8 +161,8 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, - sizeof(struct ct_iter_state)); + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); } static const struct file_operations ct_file_ops = { @@ -167,21 +170,23 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; /* expects */ struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -191,13 +196,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -265,8 +271,8 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); } static const struct file_operations ip_exp_file_ops = { @@ -274,11 +280,12 @@ static const struct file_operations ip_exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -288,7 +295,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -296,13 +303,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu+1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -314,7 +322,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -354,7 +363,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -362,39 +372,54 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -int __init nf_conntrack_ipv4_compat_init(void) +static int __net_init ip_conntrack_net_init(struct net *net) { struct proc_dir_entry *proc, *proc_exp, *proc_stat; - proc = proc_net_fops_create(&init_net, "ip_conntrack", 0440, &ct_file_ops); + proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops); if (!proc) goto err1; - proc_exp = proc_net_fops_create(&init_net, "ip_conntrack_expect", 0440, + proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440, &ip_exp_file_ops); if (!proc_exp) goto err2; proc_stat = proc_create("ip_conntrack", S_IRUGO, - init_net.proc_net_stat, &ct_cpu_seq_fops); + net->proc_net_stat, &ct_cpu_seq_fops); if (!proc_stat) goto err3; return 0; err3: - proc_net_remove(&init_net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack_expect"); err2: - proc_net_remove(&init_net, "ip_conntrack"); + proc_net_remove(net, "ip_conntrack"); err1: return -ENOMEM; } +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + proc_net_remove(net, "ip_conntrack_expect"); + proc_net_remove(net, "ip_conntrack"); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + void __exit nf_conntrack_ipv4_compat_fini(void) { - remove_proc_entry("ip_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "ip_conntrack_expect"); - proc_net_remove(&init_net, "ip_conntrack"); + unregister_pernet_subsys(&ip_conntrack_net_ops); } diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 97791048fa9b..4e8879220222 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -79,7 +79,7 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -91,7 +91,7 @@ static int icmp_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); } @@ -123,7 +123,7 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, +icmp_error_message(struct net *net, struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum) { @@ -155,7 +155,7 @@ icmp_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple); + h = nf_conntrack_find_get(net, &innertuple); if (!h) { pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; @@ -172,8 +172,8 @@ icmp_error_message(struct sk_buff *skb, /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmphdr *icmph; struct icmphdr _ih; @@ -181,16 +181,16 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; @@ -203,7 +203,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) + if (LOG_INVALID(net, IPPROTO_ICMP)) nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; @@ -217,7 +217,7 @@ icmp_error(struct sk_buff *skb, unsigned int dataoff, && icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, skb, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 000000000000..fa2d6b6fc3e5 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,97 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif +#endif + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 6c6a3cba8d50..a65cf692359f 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -37,9 +37,6 @@ static struct nf_conntrack_l3proto *l3proto __read_mostly; /* Calculated at init based on memory size */ static unsigned int nf_nat_htable_size __read_mostly; -static int nf_nat_vmalloced; - -static struct hlist_head *bysource __read_mostly; #define MAX_IP_NAT_PROTO 256 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO] @@ -145,7 +142,8 @@ same_src(const struct nf_conn *ct, /* Only called for SRC manip */ static int -find_appropriate_src(const struct nf_conntrack_tuple *tuple, +find_appropriate_src(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { @@ -155,7 +153,7 @@ find_appropriate_src(const struct nf_conntrack_tuple *tuple, const struct hlist_node *n; rcu_read_lock(); - hlist_for_each_entry_rcu(nat, n, &bysource[h], bysource) { + hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) { ct = nat->ct; if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -231,6 +229,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); const struct nf_nat_protocol *proto; /* 1) If this srcip/proto/src-proto-part is currently mapped, @@ -242,7 +241,7 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, manips not an issue. */ if (maniptype == IP_NAT_MANIP_SRC && !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (find_appropriate_src(orig_tuple, tuple, range)) { + if (find_appropriate_src(net, orig_tuple, tuple, range)) { pr_debug("get_unique_tuple: Found current src map\n"); if (!nf_nat_used_tuple(tuple, ct)) return; @@ -283,6 +282,7 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; struct nf_conn_nat *nat; int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK); @@ -334,7 +334,8 @@ nf_nat_setup_info(struct nf_conn *ct, /* nf_conntrack_alter_reply might re-allocate exntension aera */ nat = nfct_nat(ct); nat->ct = ct; - hlist_add_head_rcu(&nat->bysource, &bysource[srchash]); + hlist_add_head_rcu(&nat->bysource, + &net->ipv4.nat_bysource[srchash]); spin_unlock_bh(&nf_nat_lock); } @@ -583,6 +584,132 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { .flags = NF_CT_EXT_F_PREALLOC, }; +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> + +static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { + [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, + [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, +}; + +static int nfnetlink_parse_nat_proto(struct nlattr *attr, + const struct nf_conn *ct, + struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_PROTONAT_MAX+1]; + const struct nf_nat_protocol *npt; + int err; + + err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); + if (err < 0) + return err; + + npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); + if (npt->nlattr_to_range) + err = npt->nlattr_to_range(tb, range); + nf_nat_proto_put(npt); + return err; +} + +static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { + [CTA_NAT_MINIP] = { .type = NLA_U32 }, + [CTA_NAT_MAXIP] = { .type = NLA_U32 }, +}; + +static int +nfnetlink_parse_nat(struct nlattr *nat, + const struct nf_conn *ct, struct nf_nat_range *range) +{ + struct nlattr *tb[CTA_NAT_MAX+1]; + int err; + + memset(range, 0, sizeof(*range)); + + err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); + if (err < 0) + return err; + + if (tb[CTA_NAT_MINIP]) + range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); + + if (!tb[CTA_NAT_MAXIP]) + range->max_ip = range->min_ip; + else + range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); + + if (range->min_ip) + range->flags |= IP_NAT_RANGE_MAP_IPS; + + if (!tb[CTA_NAT_PROTO]) + return 0; + + err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); + if (err < 0) + return err; + + return 0; +} + +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + struct nf_nat_range range; + + if (nfnetlink_parse_nat(attr, ct, &range) < 0) + return -EINVAL; + if (nf_nat_initialized(ct, manip)) + return -EEXIST; + + return nf_nat_setup_info(ct, &range, manip); +} +#else +static int +nfnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + return -EOPNOTSUPP; +} +#endif + +static int __net_init nf_nat_net_init(struct net *net) +{ + net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, + &net->ipv4.nat_vmalloced); + if (!net->ipv4.nat_bysource) + return -ENOMEM; + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(struct nf_conn *i, void *data) +{ + struct nf_conn_nat *nat = nfct_nat(i); + + if (!nat) + return 0; + memset(nat, 0, sizeof(*nat)); + i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); + return 0; +} + +static void __net_exit nf_nat_net_exit(struct net *net) +{ + nf_ct_iterate_cleanup(net, &clean_nat, NULL); + synchronize_rcu(); + nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced, + nf_nat_htable_size); +} + +static struct pernet_operations nf_nat_net_ops = { + .init = nf_nat_net_init, + .exit = nf_nat_net_exit, +}; + static int __init nf_nat_init(void) { size_t i; @@ -599,12 +726,9 @@ static int __init nf_nat_init(void) /* Leave them the same for the moment. */ nf_nat_htable_size = nf_conntrack_htable_size; - bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, - &nf_nat_vmalloced); - if (!bysource) { - ret = -ENOMEM; + ret = register_pernet_subsys(&nf_nat_net_ops); + if (ret < 0) goto cleanup_extend; - } /* Sew in builtin protocols. */ spin_lock_bh(&nf_nat_lock); @@ -622,6 +746,9 @@ static int __init nf_nat_init(void) BUG_ON(nf_nat_seq_adjust_hook != NULL); rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust); + BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, + nfnetlink_parse_nat_setup); return 0; cleanup_extend: @@ -629,30 +756,18 @@ static int __init nf_nat_init(void) return ret; } -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct nf_conn *i, void *data) -{ - struct nf_conn_nat *nat = nfct_nat(i); - - if (!nat) - return 0; - memset(nat, 0, sizeof(*nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - static void __exit nf_nat_cleanup(void) { - nf_ct_iterate_cleanup(&clean_nat, NULL); - synchronize_rcu(); - nf_ct_free_hashtable(bysource, nf_nat_vmalloced, nf_nat_htable_size); + unregister_pernet_subsys(&nf_nat_net_ops); nf_ct_l3proto_put(l3proto); nf_ct_extend_unregister(&nat_extend); rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL); + rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL); synchronize_net(); } MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-ipv4"); module_init(nf_nat_init); module_exit(nf_nat_cleanup); diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index 11976ea29884..cf7a42bf9820 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -16,6 +16,7 @@ #include <linux/udp.h> #include <net/checksum.h> #include <net/tcp.h> +#include <net/route.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> @@ -192,7 +193,7 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb, nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, CTINFO2DIR(ctinfo)); - nf_conntrack_event_cache(IPCT_NATSEQADJ, skb); + nf_conntrack_event_cache(IPCT_NATSEQADJ, ct); } return 1; } diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index da3d91a5ef5c..9eb171056c63 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -40,6 +40,7 @@ MODULE_ALIAS("ip_nat_pptp"); static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); const struct nf_conn *master = ct->master; struct nf_conntrack_expect *other_exp; struct nf_conntrack_tuple t; @@ -73,7 +74,7 @@ static void pptp_nat_expected(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple_ip(&t); - other_exp = nf_ct_expect_find_get(&t); + other_exp = nf_ct_expect_find_get(net, &t); if (other_exp) { nf_ct_unexpect_related(other_exp); nf_ct_expect_put(other_exp); diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index e8b4d0d4439e..bea54a685109 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -33,7 +33,7 @@ static struct struct ipt_replace repl; struct ipt_standard entries[3]; struct ipt_error term; -} nat_initial_table __initdata = { +} nat_initial_table __net_initdata = { .repl = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, @@ -58,47 +58,42 @@ static struct .term = IPT_ERROR_INIT, /* ERROR */ }; -static struct xt_table __nat_table = { +static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, .lock = __RW_LOCK_UNLOCKED(__nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; -static struct xt_table *nat_table; /* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); - NF_CT_ASSERT(out); + NF_CT_ASSERT(par->out != NULL); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); } /* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) +static void warn_if_extra_mangle(struct net *net, __be32 dstip, __be32 srcip) { static int warned = 0; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; struct rtable *rt; - if (ip_route_output_key(&init_net, &rt, &fl) != 0) + if (ip_route_output_key(net, &rt, &fl) != 0) return; if (rt->rt_src != srcip && !warned) { @@ -110,40 +105,32 @@ static void warn_if_extra_mangle(__be32 dstip, __be32 srcip) ip_rt_put(rt); } -static unsigned int ipt_dnat_target(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +static unsigned int +ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - if (hooknum == NF_INET_LOCAL_OUT && + if (par->hooknum == NF_INET_LOCAL_OUT && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle(ip_hdr(skb)->daddr, + warn_if_extra_mangle(dev_net(par->out), ip_hdr(skb)->daddr, mr->range[0].min_ip); return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); } -static bool ipt_snat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -153,13 +140,9 @@ static bool ipt_snat_checkentry(const char *tablename, return true; } -static bool ipt_dnat_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -194,9 +177,10 @@ int nf_nat_rule_find(struct sk_buff *skb, const struct net_device *out, struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); int ret; - ret = ipt_do_table(skb, hooknum, in, out, nat_table); + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); if (ret == NF_ACCEPT) { if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) @@ -226,14 +210,32 @@ static struct xt_target ipt_dnat_reg __read_mostly = { .family = AF_INET, }; +static int __net_init nf_nat_rule_net_init(struct net *net) +{ + net->ipv4.nat_table = ipt_register_table(net, &nat_table, + &nat_initial_table.repl); + if (IS_ERR(net->ipv4.nat_table)) + return PTR_ERR(net->ipv4.nat_table); + return 0; +} + +static void __net_exit nf_nat_rule_net_exit(struct net *net) +{ + ipt_unregister_table(net->ipv4.nat_table); +} + +static struct pernet_operations nf_nat_rule_net_ops = { + .init = nf_nat_rule_net_init, + .exit = nf_nat_rule_net_exit, +}; + int __init nf_nat_rule_init(void) { int ret; - nat_table = ipt_register_table(&init_net, &__nat_table, - &nat_initial_table.repl); - if (IS_ERR(nat_table)) - return PTR_ERR(nat_table); + ret = register_pernet_subsys(&nf_nat_rule_net_ops); + if (ret != 0) + goto out; ret = xt_register_target(&ipt_snat_reg); if (ret != 0) goto unregister_table; @@ -247,8 +249,8 @@ int __init nf_nat_rule_init(void) unregister_snat: xt_unregister_target(&ipt_snat_reg); unregister_table: - ipt_unregister_table(nat_table); - + unregister_pernet_subsys(&nf_nat_rule_net_ops); + out: return ret; } @@ -256,5 +258,5 @@ void nf_nat_rule_cleanup(void) { xt_unregister_target(&ipt_dnat_reg); xt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(nat_table); + unregister_pernet_subsys(&nf_nat_rule_net_ops); } diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index ffeaffc3fffe..8303e4b406c0 100644 --- a/net/ipv4/netfilter/nf_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -742,6 +742,7 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); if (*obj == NULL) { + kfree(p); kfree(id); if (net_ratelimit()) printk("OOM in bsalg (%d)\n", __LINE__); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 8f5a403f6f6b..a631a1f110ca 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -237,43 +237,45 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_SENTINEL }; +static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals, + unsigned short *type, int count) +{ + int j; + + if (count) { + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %sType%u", + type[j] & 0x100 ? "Out" : "In", + type[j] & 0xff); + seq_printf(seq, "\nIcmpMsg:"); + for (j = 0; j < count; ++j) + seq_printf(seq, " %lu", vals[j]); + } +} + static void icmpmsg_put(struct seq_file *seq) { #define PERLINE 16 - int j, i, count; - static int out[PERLINE]; + int i, count; + unsigned short type[PERLINE]; + unsigned long vals[PERLINE], val; struct net *net = seq->private; count = 0; for (i = 0; i < ICMPMSG_MIB_MAX; i++) { - - if (snmp_fold_field((void **) net->mib.icmpmsg_statistics, i)) - out[count++] = i; - if (count < PERLINE) - continue; - - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %sType%u", i & 0x100 ? "Out" : "In", - i & 0xff); - seq_printf(seq, "\nIcmpMsg: "); - for (j = 0; j < PERLINE; ++j) - seq_printf(seq, " %lu", - snmp_fold_field((void **) net->mib.icmpmsg_statistics, - out[j])); - seq_putc(seq, '\n'); - } - if (count) { - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %sType%u", out[j] & 0x100 ? "Out" : - "In", out[j] & 0xff); - seq_printf(seq, "\nIcmpMsg:"); - for (j = 0; j < count; ++j) - seq_printf(seq, " %lu", snmp_fold_field((void **) - net->mib.icmpmsg_statistics, out[j])); + val = snmp_fold_field((void **) net->mib.icmpmsg_statistics, i); + if (val) { + type[count] = i; + vals[count++] = val; + } + if (count == PERLINE) { + icmpmsg_put_line(seq, vals, type, count); + count = 0; + } } + icmpmsg_put_line(seq, vals, type, count); #undef PERLINE } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6ee5354c9aa1..2ea6dcc3e2cc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -282,6 +282,8 @@ static struct rtable *rt_cache_get_first(struct seq_file *seq) struct rtable *r = NULL; for (st->bucket = rt_hash_mask; st->bucket >= 0; --st->bucket) { + if (!rt_hash_table[st->bucket].chain) + continue; rcu_read_lock_bh(); r = rcu_dereference(rt_hash_table[st->bucket].chain); while (r) { @@ -299,11 +301,14 @@ static struct rtable *__rt_cache_get_next(struct seq_file *seq, struct rtable *r) { struct rt_cache_iter_state *st = seq->private; + r = r->u.dst.rt_next; while (!r) { rcu_read_unlock_bh(); - if (--st->bucket < 0) - break; + do { + if (--st->bucket < 0) + return NULL; + } while (!rt_hash_table[st->bucket].chain); rcu_read_lock_bh(); r = rt_hash_table[st->bucket].chain; } @@ -1104,7 +1109,12 @@ restart: printk("\n"); } #endif - rt_hash_table[hash].chain = rt; + /* + * Since lookup is lockfree, we must make sure + * previous writes to rt are comitted to memory + * before making rt visible to other CPUS. + */ + rcu_assign_pointer(rt_hash_table[hash].chain, rt); spin_unlock_bh(rt_hash_lock_addr(hash)); *rp = rt; return 0; @@ -2356,11 +2366,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2372,6 +2377,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2390,9 +2400,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } @@ -2840,7 +2856,9 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) if (s_h < 0) s_h = 0; s_idx = idx = cb->args[1]; - for (h = s_h; h <= rt_hash_mask; h++) { + for (h = s_h; h <= rt_hash_mask; h++, s_idx = 0) { + if (!rt_hash_table[h].chain) + continue; rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { @@ -2859,7 +2877,6 @@ int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); - s_idx = 0; } done: @@ -2896,8 +2913,6 @@ static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write, } static int ipv4_sysctl_rtcache_flush_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, @@ -2960,16 +2975,13 @@ static int ipv4_sysctl_rt_secret_interval(ctl_table *ctl, int write, } static int ipv4_sysctl_rt_secret_interval_strategy(ctl_table *table, - int __user *name, - int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int old = ip_rt_secret_interval; - int ret = sysctl_jiffies(table, name, nlen, oldval, oldlenp, newval, - newlen); + int ret = sysctl_jiffies(table, oldval, oldlenp, newval, newlen); rt_secret_reschedule(old); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 9d38005abbac..d346c22aa6ae 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -16,6 +16,7 @@ #include <linux/cryptohash.h> #include <linux/kernel.h> #include <net/tcp.h> +#include <net/route.h> /* Timestamps: lowest 9 bits store TCP options */ #define TSBITS 9 @@ -296,6 +297,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; @@ -337,6 +339,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index e0689fd7b798..1bb10df8ce7d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -26,16 +26,13 @@ static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; -extern seqlock_t sysctl_port_range_lock; -extern int sysctl_local_port_range[2]; - /* Update system visible IP port range */ static void set_local_port_range(int range[2]) { - write_seqlock(&sysctl_port_range_lock); - sysctl_local_port_range[0] = range[0]; - sysctl_local_port_range[1] = range[1]; - write_sequnlock(&sysctl_port_range_lock); + write_seqlock(&sysctl_local_ports.lock); + sysctl_local_ports.range[0] = range[0]; + sysctl_local_ports.range[1] = range[1]; + write_sequnlock(&sysctl_local_ports.lock); } /* Validate changes from /proc interface. */ @@ -44,8 +41,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, size_t *lenp, loff_t *ppos) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -54,6 +50,7 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, .extra2 = &ip_local_port_range_max, }; + inet_get_local_port_range(range, range + 1); ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); if (write && ret == 0) { @@ -67,14 +64,13 @@ static int ipv4_local_port_range(ctl_table *table, int write, struct file *filp, } /* Validate changes from sysctl interface. */ -static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int ipv4_sysctl_local_port_range(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { int ret; - int range[2] = { sysctl_local_port_range[0], - sysctl_local_port_range[1] }; + int range[2]; ctl_table tmp = { .data = &range, .maxlen = sizeof(range), @@ -83,7 +79,8 @@ static int ipv4_sysctl_local_port_range(ctl_table *table, int __user *name, .extra2 = &ip_local_port_range_max, }; - ret = sysctl_intvec(&tmp, name, nlen, oldval, oldlenp, newval, newlen); + inet_get_local_port_range(range, range + 1); + ret = sysctl_intvec(&tmp, oldval, oldlenp, newval, newlen); if (ret == 0 && newval && newlen) { if (range[1] < range[0]) ret = -EINVAL; @@ -112,8 +109,8 @@ static int proc_tcp_congestion_control(ctl_table *ctl, int write, struct file * return ret; } -static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int sysctl_tcp_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { @@ -125,7 +122,7 @@ static int sysctl_tcp_congestion_control(ctl_table *table, int __user *name, int ret; tcp_get_default_congestion_control(val); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_default_congestion_control(val); return ret; @@ -168,8 +165,8 @@ static int proc_allowed_congestion_control(ctl_table *ctl, return ret; } -static int strategy_allowed_congestion_control(ctl_table *table, int __user *name, - int nlen, void __user *oldval, +static int strategy_allowed_congestion_control(ctl_table *table, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) @@ -182,7 +179,7 @@ static int strategy_allowed_congestion_control(ctl_table *table, int __user *nam return -ENOMEM; tcp_get_available_congestion_control(tbl.data, tbl.maxlen); - ret = sysctl_string(&tbl, name, nlen, oldval, oldlenp, newval, newlen); + ret = sysctl_string(&tbl, oldval, oldlenp, newval, newlen); if (ret == 1 && newval && newlen) ret = tcp_set_allowed_congestion_control(tbl.data); kfree(tbl.data); @@ -396,8 +393,8 @@ static struct ctl_table ipv4_table[] = { { .ctl_name = NET_IPV4_LOCAL_PORT_RANGE, .procname = "ip_local_port_range", - .data = &sysctl_local_port_range, - .maxlen = sizeof(sysctl_local_port_range), + .data = &sysctl_local_ports.range, + .maxlen = sizeof(sysctl_local_ports.range), .mode = 0644, .proc_handler = &ipv4_local_port_range, .strategy = &ipv4_sysctl_local_port_range, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1ab341e5d3e0..c5aca0bb116a 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -384,13 +384,17 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Connected? */ if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + int target = sock_rcvlowat(sk, 0, INT_MAX); + + if (tp->urg_seq == tp->copied_seq && + !sock_flag(sk, SOCK_URGINLINE) && + tp->urg_data) + target--; + /* Potential race condition. If read of tp below will * escape above sk->sk_state, we can be illegally awaken * in SYN_* states. */ - if ((tp->rcv_nxt != tp->copied_seq) && - (tp->urg_seq != tp->copied_seq || - tp->rcv_nxt != tp->copied_seq + 1 || - sock_flag(sk, SOCK_URGINLINE) || !tp->urg_data)) + if (tp->rcv_nxt - tp->copied_seq >= target) mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { @@ -493,10 +497,8 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) static inline void tcp_mark_urg(struct tcp_sock *tp, int flags, struct sk_buff *skb) { - if (flags & MSG_OOB) { - tp->urg_mode = 1; + if (flags & MSG_OOB) tp->snd_up = tp->write_seq; - } } static inline void tcp_push(struct sock *sk, int flags, int mss_now, @@ -1157,7 +1159,7 @@ static void tcp_prequeue_process(struct sock *sk) * necessary */ local_bh_disable(); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); local_bh_enable(); /* Clear memory counter. */ @@ -1372,8 +1374,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sk->sk_state == TCP_CLOSE || (sk->sk_shutdown & RCV_SHUTDOWN) || !timeo || - signal_pending(current) || - (flags & MSG_PEEK)) + signal_pending(current)) break; } else { if (sock_flag(sk, SOCK_DONE)) diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 6a250828b767..4ec5b4e97c4e 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -115,7 +115,7 @@ int tcp_set_default_congestion_control(const char *name) spin_lock(&tcp_cong_list_lock); ca = tcp_ca_find(name); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!ca && capable(CAP_SYS_MODULE)) { spin_unlock(&tcp_cong_list_lock); @@ -244,7 +244,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name) if (ca == icsk->icsk_ca_ops) goto out; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES /* not found attempt to autoload module */ if (!ca && capable(CAP_SYS_MODULE)) { rcu_read_unlock(); diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index af99776146ff..937549b8a921 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -69,9 +69,12 @@ static u32 htcp_cwnd_undo(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = ca->undo_last_cong; - ca->maxRTT = ca->undo_maxRTT; - ca->old_maxB = ca->undo_old_maxB; + if (ca->undo_last_cong) { + ca->last_cong = ca->undo_last_cong; + ca->maxRTT = ca->undo_maxRTT; + ca->old_maxB = ca->undo_old_maxB; + ca->undo_last_cong = 0; + } return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); } @@ -268,7 +271,10 @@ static void htcp_state(struct sock *sk, u8 new_state) case TCP_CA_Open: { struct htcp *ca = inet_csk_ca(sk); - ca->last_cong = jiffies; + if (ca->undo_last_cong) { + ca->last_cong = jiffies; + ca->undo_last_cong = 0; + } } break; case TCP_CA_CWR: diff --git a/net/ipv4/tcp_hybla.c b/net/ipv4/tcp_hybla.c index bfcbd148a89d..c209e054a634 100644 --- a/net/ipv4/tcp_hybla.c +++ b/net/ipv4/tcp_hybla.c @@ -150,7 +150,11 @@ static void hybla_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) ca->snd_cwnd_cents -= 128; tp->snd_cwnd_cnt = 0; } - + /* check when cwnd has not been incremented for a while */ + if (increment == 0 && odd == 0 && tp->snd_cwnd_cnt >= tp->snd_cwnd) { + tp->snd_cwnd++; + tp->snd_cwnd_cnt = 0; + } /* clamp down slowstart cwnd to ssthresh value. */ if (is_slowstart) tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_ssthresh); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 67ccce2a96bd..d77c0d29e239 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -979,6 +979,39 @@ static void tcp_update_reordering(struct sock *sk, const int metric, } } +/* This must be called before lost_out is incremented */ +static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) +{ + if ((tp->retransmit_skb_hint == NULL) || + before(TCP_SKB_CB(skb)->seq, + TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) + tp->retransmit_skb_hint = skb; + + if (!tp->lost_out || + after(TCP_SKB_CB(skb)->end_seq, tp->retransmit_high)) + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; +} + +static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tcp_verify_retransmit_hint(tp, skb); + + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + +void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) +{ + tcp_verify_retransmit_hint(tp, skb); + + if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { + tp->lost_out += tcp_skb_pcount(skb); + TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; + } +} + /* This procedure tags the retransmission queue when SACKs arrive. * * We have three tag bits: SACKED(S), RETRANS(R) and LOST(L). @@ -1155,13 +1188,7 @@ static void tcp_mark_lost_retrans(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; - - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out += tcp_skb_pcount(skb); - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSTRETRANSMIT); } else { if (before(ack_seq, new_low_seq)) @@ -1271,9 +1298,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); tp->lost_out -= tcp_skb_pcount(skb); tp->retrans_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } else { if (!(sacked & TCPCB_RETRANS)) { @@ -1292,9 +1316,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; tp->lost_out -= tcp_skb_pcount(skb); - - /* clear lost hint */ - tp->retransmit_skb_hint = NULL; } } @@ -1324,7 +1345,6 @@ static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk, if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); - tp->retransmit_skb_hint = NULL; } return flag; @@ -1726,6 +1746,8 @@ int tcp_use_frto(struct sock *sk) return 0; skb = tcp_write_queue_head(sk); + if (tcp_skb_is_last(sk, skb)) + return 1; skb = tcp_write_queue_next(sk, skb); /* Skips head */ tcp_for_write_queue_from(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1867,6 +1889,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -1883,7 +1906,7 @@ static void tcp_enter_frto_loss(struct sock *sk, int allowed_segments, int flag) tp->high_seq = tp->snd_nxt; TCP_ECN_queue_cwr(tp); - tcp_clear_retrans_hints_partial(tp); + tcp_clear_all_retrans_hints(tp); } static void tcp_clear_retrans_partial(struct tcp_sock *tp) @@ -1934,12 +1957,11 @@ void tcp_enter_loss(struct sock *sk, int how) /* Push undo marker, if it was plain RTO and nothing * was retransmitted. */ tp->undo_marker = tp->snd_una; - tcp_clear_retrans_hints_partial(tp); } else { tp->sacked_out = 0; tp->fackets_out = 0; - tcp_clear_all_retrans_hints(tp); } + tcp_clear_all_retrans_hints(tp); tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1952,6 +1974,7 @@ void tcp_enter_loss(struct sock *sk, int how) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); + tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } } tcp_verify_left_out(tp); @@ -2157,19 +2180,6 @@ static int tcp_time_to_recover(struct sock *sk) return 0; } -/* RFC: This is from the original, I doubt that this is necessary at all: - * clear xmit_retrans hint if seq of this skb is beyond hint. How could we - * retransmitted past LOST markings in the first place? I'm not fully sure - * about undo and end of connection cases, which can cause R without L? - */ -static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) -{ - if ((tp->retransmit_skb_hint != NULL) && - before(TCP_SKB_CB(skb)->seq, - TCP_SKB_CB(tp->retransmit_skb_hint)->seq)) - tp->retransmit_skb_hint = NULL; -} - /* Mark head of queue up as lost. With RFC3517 SACK, the packets is * is against sacked "cnt", otherwise it's against facked "cnt" */ @@ -2217,11 +2227,7 @@ static void tcp_mark_head_lost(struct sock *sk, int packets) cnt = packets; } - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tcp_verify_left_out(tp); } @@ -2263,11 +2269,7 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (!tcp_skb_timedout(sk, skb)) break; - if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - tcp_verify_retransmit_hint(tp, skb); - } + tcp_skb_mark_lost(tp, skb); } tp->scoreboard_skb_hint = skb; @@ -2378,10 +2380,6 @@ static void tcp_undo_cwr(struct sock *sk, const int undo) } tcp_moderate_cwnd(tp); tp->snd_cwnd_stamp = tcp_time_stamp; - - /* There is something screwy going on with the retrans hints after - an undo */ - tcp_clear_all_retrans_hints(tp); } static inline int tcp_may_undo(struct tcp_sock *tp) @@ -2838,7 +2836,8 @@ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) +static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, + u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2848,6 +2847,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) int flag = 0; u32 pkts_acked = 0; u32 reord = tp->packets_out; + u32 prior_sacked = tp->sacked_out; s32 seq_rtt = -1; s32 ca_seq_rtt = -1; ktime_t last_ackt = net_invalid_timestamp(); @@ -2904,9 +2904,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) if (sacked & TCPCB_LOST) tp->lost_out -= acked_pcount; - if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up))) - tp->urg_mode = 0; - tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; @@ -2929,9 +2926,16 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); - tcp_clear_all_retrans_hints(tp); + tp->scoreboard_skb_hint = NULL; + if (skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = NULL; + if (skb == tp->lost_skb_hint) + tp->lost_skb_hint = NULL; } + if (likely(between(tp->snd_up, prior_snd_una, tp->snd_una))) + tp->snd_up = tp->snd_una; + if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; @@ -2948,6 +2952,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets) /* Non-retransmitted hole got filled? That's reordering */ if (reord < prior_fackets) tcp_update_reordering(sk, tp->fackets_out - reord, 0); + + /* No need to care for underflows here because + * the lost_skb_hint gets NULLed if we're past it + * (or something non-trivial happened) + */ + if (tcp_is_fack(tp)) + tp->lost_cnt_hint -= pkts_acked; + else + tp->lost_cnt_hint -= prior_sacked - tp->sacked_out; } tp->fackets_out -= min(pkts_acked, tp->fackets_out); @@ -3299,7 +3312,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets); + flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una); if (tp->frto_counter) frto_cwnd = tcp_process_frto(sk, flag); @@ -3442,6 +3455,22 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, } } +static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) +{ + __be32 *ptr = (__be32 *)(th + 1); + + if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { + tp->rx_opt.saw_tstamp = 1; + ++ptr; + tp->rx_opt.rcv_tsval = ntohl(*ptr); + ++ptr; + tp->rx_opt.rcv_tsecr = ntohl(*ptr); + return 1; + } + return 0; +} + /* Fast parse options. This hopes to only see timestamps. * If it is wrong it falls back on tcp_parse_options(). */ @@ -3453,16 +3482,8 @@ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, return 0; } else if (tp->rx_opt.tstamp_ok && th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { - __be32 *ptr = (__be32 *)(th + 1); - if (*ptr == htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) { - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); + if (tcp_parse_aligned_timestamp(tp, th)) return 1; - } } tcp_parse_options(skb, &tp->rx_opt, 1); return 1; @@ -4138,7 +4159,7 @@ drop: skb1 = skb1->prev; } } - __skb_insert(skb, skb1, skb1->next, &tp->out_of_order_queue); + __skb_queue_after(&tp->out_of_order_queue, skb1, skb); /* And clean segments covered by new one as whole. */ while ((skb1 = skb->next) != @@ -4161,6 +4182,18 @@ add_sack: } } +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list) +{ + struct sk_buff *next = skb->next; + + __skb_unlink(skb, list); + __kfree_skb(skb); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); + + return next; +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * Segments with FIN/SYN are not collapsed (only because this @@ -4178,11 +4211,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, for (skb = head; skb != tail;) { /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); continue; } @@ -4228,7 +4257,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, memcpy(nskb->head, skb->head, header); memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_insert(nskb, skb->prev, skb, list); + __skb_queue_before(list, skb, nskb); skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4246,11 +4275,7 @@ tcp_collapse(struct sock *sk, struct sk_buff_head *list, start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - struct sk_buff *next = skb->next; - __skb_unlink(skb, list); - __kfree_skb(skb); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); - skb = next; + skb = tcp_collapse_one(sk, skb, list); if (skb == tail || tcp_hdr(skb)->syn || tcp_hdr(skb)->fin) @@ -4436,8 +4461,8 @@ static void tcp_new_space(struct sock *sk) if (tcp_should_expand_sndbuf(sk)) { int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) + - MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), - demanded = max_t(unsigned int, tp->snd_cwnd, + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff); + int demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); sndmem *= 2 * demanded; if (sndmem > sk->sk_sndbuf) @@ -4691,6 +4716,67 @@ out: } #endif /* CONFIG_NET_DMA */ +/* Does PAWS and seqno based validation of an incoming segment, flags will + * play significant role here. + */ +static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, + struct tcphdr *th, int syn_inerr) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* RFC1323: H1. Apply PAWS check first. */ + if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + tcp_paws_discard(sk, skb)) { + if (!th->rst) { + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); + tcp_send_dupack(sk, skb); + goto discard; + } + /* Reset is accepted even if it did not pass PAWS. */ + } + + /* Step 1: check sequence number */ + if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { + /* RFC793, page 37: "In all states except SYN-SENT, all reset + * (RST) segments are validated by checking their SEQ-fields." + * And page 69: "If an incoming segment is not acceptable, + * an acknowledgment should be sent in reply (unless the RST + * bit is set, if so drop the segment and return)". + */ + if (!th->rst) + tcp_send_dupack(sk, skb); + goto discard; + } + + /* Step 2: check RST bit */ + if (th->rst) { + tcp_reset(sk); + goto discard; + } + + /* ts_recent update must be made after we are sure that the packet + * is in window. + */ + tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); + + /* step 3: check security and precedence [ignored] */ + + /* step 4: Check for a SYN in window. */ + if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { + if (syn_inerr) + TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); + tcp_reset(sk); + return -1; + } + + return 1; + +discard: + __kfree_skb(skb); + return 0; +} + /* * TCP receive function for the ESTABLISHED state. * @@ -4718,6 +4804,7 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { struct tcp_sock *tp = tcp_sk(sk); + int res; /* * Header prediction. @@ -4756,19 +4843,10 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, /* Check timestamp */ if (tcp_header_len == sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) { - __be32 *ptr = (__be32 *)(th + 1); - /* No? Slow path! */ - if (*ptr != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP)) + if (!tcp_parse_aligned_timestamp(tp, th)) goto slow_path; - tp->rx_opt.saw_tstamp = 1; - ++ptr; - tp->rx_opt.rcv_tsval = ntohl(*ptr); - ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr); - /* If PAWS failed, check it more carefully in slow path */ if ((s32)(tp->rx_opt.rcv_tsval - tp->rx_opt.ts_recent) < 0) goto slow_path; @@ -4879,7 +4957,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, goto no_ack; } - __tcp_ack_snd_check(sk, 0); + if (!copied_early || tp->rcv_nxt != tp->rcv_wup) + __tcp_ack_snd_check(sk, 0); no_ack: #ifdef CONFIG_NET_DMA if (copied_early) @@ -4899,51 +4978,12 @@ slow_path: goto csum_error; /* - * RFC1323: H1. Apply PAWS check first. - */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Resets are accepted even if PAWS failed. - - ts_recent update must be made after we are sure - that the packet is in window. - */ - } - - /* * Standard slow path. */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - /* RFC793, page 37: "In all states except SYN-SENT, all reset - * (RST) segments are validated by checking their SEQ-fields." - * And page 69: "If an incoming segment is not acceptable, - * an acknowledgment should be sent in reply (unless the RST bit - * is set, if so drop the segment and return)". - */ - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 1); + if (res <= 0) + return -res; step5: if (th->ack) @@ -5225,6 +5265,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); int queued = 0; + int res; tp->rx_opt.saw_tstamp = 0; @@ -5277,42 +5318,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, return 0; } - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && - tcp_paws_discard(sk, skb)) { - if (!th->rst) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); - tcp_send_dupack(sk, skb); - goto discard; - } - /* Reset is accepted even if it did not pass PAWS. */ - } - - /* step 1: check sequence number */ - if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) { - if (!th->rst) - tcp_send_dupack(sk, skb); - goto discard; - } - - /* step 2: check RST bit */ - if (th->rst) { - tcp_reset(sk); - goto discard; - } - - tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq); - - /* step 3: check security and precedence [ignored] */ - - /* step 4: - * - * Check for a SYN in window. - */ - if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPABORTONSYN); - tcp_reset(sk); - return 1; - } + res = tcp_validate_incoming(sk, skb, th, 0); + if (res <= 0) + return -res; /* step 5: check the ACK field */ if (th->ack) { diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1b4fee20fc93..5c8fa7f1e327 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -583,14 +583,15 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) rep.th.doff = arg.iov[0].iov_len / 4; tcp_v4_md5_hash_hdr((__u8 *) &rep.opt[1], - key, ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr, &rep.th); + key, ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, &rep.th); } #endif arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ - sizeof(struct tcphdr), IPPROTO_TCP, 0); + arg.iov[0].iov_len, IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; net = dev_net(skb->dst->dev); ip_send_reply(net->ipv4.tcp_sock, skb, @@ -606,7 +607,8 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, int oif, - struct tcp_md5sig_key *key) + struct tcp_md5sig_key *key, + int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -618,7 +620,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ]; } rep; struct ip_reply_arg arg; - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb->dst->dev); memset(&rep.th, 0, sizeof(struct tcphdr)); memset(&arg, 0, sizeof(arg)); @@ -659,6 +661,7 @@ static void tcp_v4_send_ack(struct sk_buff *skb, u32 seq, u32 ack, ip_hdr(skb)->daddr, &rep.th); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); @@ -681,7 +684,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcptw->tw_ts_recent, tw->tw_bound_dev_if, - tcp_twsk_md5_key(tcptw) + tcp_twsk_md5_key(tcptw), + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0 ); inet_twsk_put(tw); @@ -694,7 +698,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent, 0, - tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr)); + tcp_v4_md5_do_lookup(sk, ip_hdr(skb)->daddr), + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1244,6 +1249,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); @@ -1364,6 +1370,10 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric(dst, RTAX_ADVMSS); + if (tcp_sk(sk)->rx_opt.user_mss && + tcp_sk(sk)->rx_opt.user_mss < newtp->advmss) + newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; + tcp_initialize_rcv_mss(newsk); #ifdef CONFIG_TCP_MD5SIG @@ -1567,8 +1577,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; - sk = __inet_lookup(net, &tcp_hashinfo, iph->saddr, - th->source, iph->daddr, th->dest, inet_iif(skb)); + sk = __inet_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; @@ -1946,6 +1955,12 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) return rc; } +static inline int empty_bucket(struct tcp_iter_state *st) +{ + return hlist_empty(&tcp_hashinfo.ehash[st->bucket].chain) && + hlist_empty(&tcp_hashinfo.ehash[st->bucket].twchain); +} + static void *established_get_first(struct seq_file *seq) { struct tcp_iter_state* st = seq->private; @@ -1958,6 +1973,10 @@ static void *established_get_first(struct seq_file *seq) struct inet_timewait_sock *tw; rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + /* Lockless fast path for the common case of empty buckets */ + if (empty_bucket(st)) + continue; + read_lock_bh(lock); sk_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { if (sk->sk_family != st->family || @@ -2008,13 +2027,15 @@ get_tw: read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); st->state = TCP_SEQ_STATE_ESTABLISHED; - if (++st->bucket < tcp_hashinfo.ehash_size) { - read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); - sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); - } else { - cur = NULL; - goto out; - } + /* Look for next non empty bucket */ + while (++st->bucket < tcp_hashinfo.ehash_size && + empty_bucket(st)) + ; + if (st->bucket >= tcp_hashinfo.ehash_size) + return NULL; + + read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + sk = sk_head(&tcp_hashinfo.ehash[st->bucket].chain); } else sk = sk_next(sk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f976fc57892c..779f2e9d0689 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -395,6 +395,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->pred_flags = 0; newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1; newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1; + newtp->snd_up = treq->snt_isn + 1; tcp_prequeue_init(newtp); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8165f5aa8c71..ba85d8831893 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -345,6 +345,11 @@ static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) TCP_SKB_CB(skb)->end_seq = seq; } +static inline int tcp_urg_mode(const struct tcp_sock *tp) +{ + return tp->snd_una != tp->snd_up; +} + #define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) @@ -357,6 +362,17 @@ struct tcp_out_options { __u32 tsval, tsecr; /* need to include OPTION_TS */ }; +/* Beware: Something in the Internet is very sensitive to the ordering of + * TCP options, we learned this through the hard way, so be careful here. + * Luckily we can at least blame others for their non-compliance but from + * inter-operatibility perspective it seems that we're somewhat stuck with + * the ordering which we have been using if we want to keep working with + * those broken things (not that it currently hurts anybody as there isn't + * particular reason why the ordering would need to be changed). + * + * At least SACK_PERM as the first option is known to lead to a disaster + * (but it may well be that other scenarios fail similarly). + */ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, const struct tcp_out_options *opts, __u8 **md5_hash) { @@ -371,6 +387,12 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *md5_hash = NULL; } + if (unlikely(opts->mss)) { + *ptr++ = htonl((TCPOPT_MSS << 24) | + (TCPOLEN_MSS << 16) | + opts->mss); + } + if (likely(OPTION_TS & opts->options)) { if (unlikely(OPTION_SACK_ADVERTISE & opts->options)) { *ptr++ = htonl((TCPOPT_SACK_PERM << 24) | @@ -387,12 +409,6 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, *ptr++ = htonl(opts->tsecr); } - if (unlikely(opts->mss)) { - *ptr++ = htonl((TCPOPT_MSS << 24) | - (TCPOLEN_MSS << 16) | - opts->mss); - } - if (unlikely(OPTION_SACK_ADVERTISE & opts->options && !(OPTION_TS & opts->options))) { *ptr++ = htonl((TCPOPT_NOP << 24) | @@ -427,7 +443,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, if (tp->rx_opt.dsack) { tp->rx_opt.dsack = 0; - tp->rx_opt.eff_sacks--; + tp->rx_opt.eff_sacks = tp->rx_opt.num_sacks; } } } @@ -646,7 +662,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, th->check = 0; th->urg_ptr = 0; - if (unlikely(tp->urg_mode && + /* The urg_mode check is necessary during a below snd_una win probe */ + if (unlikely(tcp_urg_mode(tp) && between(tp->snd_up, tcb->seq + 1, tcb->seq + 0xFFFF))) { th->urg_ptr = htons(tp->snd_up - tcb->seq); th->urg = 1; @@ -1012,7 +1029,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. * - * LARGESEND note: !urg_mode is overkill, only frames up to snd_up + * LARGESEND note: !tcp_urg_mode is overkill, only frames up to snd_up * cannot be large. However, taking into account rare use of URG, this * is not a big flaw. */ @@ -1029,7 +1046,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed) mss_now = tp->mss_cache; - if (large_allowed && sk_can_gso(sk) && !tp->urg_mode) + if (large_allowed && sk_can_gso(sk) && !tcp_urg_mode(tp)) doing_tso = 1; if (dst) { @@ -1193,7 +1210,7 @@ static inline int tcp_nagle_test(struct tcp_sock *tp, struct sk_buff *skb, /* Don't use the nagle rule for urgent data (or for the final FIN). * Nagle can be ignored during F-RTO too (see RFC4138). */ - if (tp->urg_mode || (tp->frto_counter == 2) || + if (tcp_urg_mode(tp) || (tp->frto_counter == 2) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) return 1; @@ -1824,6 +1841,8 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, /* changed transmit queue under us so clear hints */ tcp_clear_retrans_hints_partial(tp); + if (next_skb == tp->retransmit_skb_hint) + tp->retransmit_skb_hint = skb; sk_wmem_free_skb(sk, next_skb); } @@ -1838,7 +1857,7 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk, 0); - int lost = 0; + u32 prior_lost = tp->lost_out; tcp_for_write_queue(skb, sk) { if (skb == tcp_send_head(sk)) @@ -1849,17 +1868,13 @@ void tcp_simple_retransmit(struct sock *sk) TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; tp->retrans_out -= tcp_skb_pcount(skb); } - if (!(TCP_SKB_CB(skb)->sacked & TCPCB_LOST)) { - TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out += tcp_skb_pcount(skb); - lost = 1; - } + tcp_skb_mark_lost_uncond_verify(tp, skb); } } - tcp_clear_all_retrans_hints(tp); + tcp_clear_retrans_hints_partial(tp); - if (!lost) + if (prior_lost == tp->lost_out) return; if (tcp_is_reno(tp)) @@ -1934,8 +1949,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* Collapse two adjacent packets if worthwhile and we can. */ if (!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_SYN) && (skb->len < (cur_mss >> 1)) && - (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (!tcp_skb_is_last(sk, skb)) && + (tcp_write_queue_next(sk, skb) != tcp_send_head(sk)) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(tcp_write_queue_next(sk, skb))->nr_frags == 0) && (tcp_skb_pcount(skb) == 1 && @@ -1996,86 +2011,18 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) return err; } -/* This gets called after a retransmit timeout, and the initially - * retransmitted data is acknowledged. It tries to continue - * resending the rest of the retransmit queue, until either - * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. - */ -void tcp_xmit_retransmit_queue(struct sock *sk) +static int tcp_can_forward_retransmit(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - int packet_cnt; - - if (tp->retransmit_skb_hint) { - skb = tp->retransmit_skb_hint; - packet_cnt = tp->retransmit_cnt_hint; - } else { - skb = tcp_write_queue_head(sk); - packet_cnt = 0; - } - - /* First pass: retransmit lost packets. */ - if (tp->lost_out) { - tcp_for_write_queue_from(skb, sk) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; - - if (skb == tcp_send_head(sk)) - break; - /* we could do better than to assign each time */ - tp->retransmit_skb_hint = skb; - tp->retransmit_cnt_hint = packet_cnt; - - /* Assume this retransmit will generate - * only one packet for congestion window - * calculation purposes. This works because - * tcp_retransmit_skb() will chop up the - * packet to be MSS sized and all the - * packet counting works out. - */ - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - return; - - if (sacked & TCPCB_LOST) { - if (!(sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS))) { - int mib_idx; - - if (tcp_retransmit_skb(sk, skb)) { - tp->retransmit_skb_hint = NULL; - return; - } - if (icsk->icsk_ca_state != TCP_CA_Loss) - mib_idx = LINUX_MIB_TCPFASTRETRANS; - else - mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; - NET_INC_STATS_BH(sock_net(sk), mib_idx); - - if (skb == tcp_write_queue_head(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - inet_csk(sk)->icsk_rto, - TCP_RTO_MAX); - } - - packet_cnt += tcp_skb_pcount(skb); - if (packet_cnt >= tp->lost_out) - break; - } - } - } - - /* OK, demanded retransmission is finished. */ /* Forward retransmissions are possible only during Recovery. */ if (icsk->icsk_ca_state != TCP_CA_Recovery) - return; + return 0; /* No forward retransmissions in Reno are possible. */ if (tcp_is_reno(tp)) - return; + return 0; /* Yeah, we have to make difficult choice between forward transmission * and retransmission... Both ways have their merits... @@ -2086,43 +2033,104 @@ void tcp_xmit_retransmit_queue(struct sock *sk) */ if (tcp_may_send_now(sk)) - return; + return 0; - /* If nothing is SACKed, highest_sack in the loop won't be valid */ - if (!tp->sacked_out) - return; + return 1; +} - if (tp->forward_skb_hint) - skb = tp->forward_skb_hint; - else +/* This gets called after a retransmit timeout, and the initially + * retransmitted data is acknowledged. It tries to continue + * resending the rest of the retransmit queue, until either + * we've sent it all or the congestion window limit is reached. + * If doing SACK, the first ACK which comes back for a timeout + * based retransmit packet might feed us FACK information again. + * If so, we use it to avoid unnecessarily retransmissions. + */ +void tcp_xmit_retransmit_queue(struct sock *sk) +{ + const struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + struct sk_buff *hole = NULL; + u32 last_lost; + int mib_idx; + int fwd_rexmitting = 0; + + if (!tp->lost_out) + tp->retransmit_high = tp->snd_una; + + if (tp->retransmit_skb_hint) { + skb = tp->retransmit_skb_hint; + last_lost = TCP_SKB_CB(skb)->end_seq; + if (after(last_lost, tp->retransmit_high)) + last_lost = tp->retransmit_high; + } else { skb = tcp_write_queue_head(sk); + last_lost = tp->snd_una; + } + /* First pass: retransmit lost packets. */ tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - tp->forward_skb_hint = skb; + __u8 sacked = TCP_SKB_CB(skb)->sacked; - if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + if (skb == tcp_send_head(sk)) break; + /* we could do better than to assign each time */ + if (hole == NULL) + tp->retransmit_skb_hint = skb; + /* Assume this retransmit will generate + * only one packet for congestion window + * calculation purposes. This works because + * tcp_retransmit_skb() will chop up the + * packet to be MSS sized and all the + * packet counting works out. + */ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) - break; + return; + + if (fwd_rexmitting) { +begin_fwd: + if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) + break; + mib_idx = LINUX_MIB_TCPFORWARDRETRANS; + + } else if (!before(TCP_SKB_CB(skb)->seq, tp->retransmit_high)) { + tp->retransmit_high = last_lost; + if (!tcp_can_forward_retransmit(sk)) + break; + /* Backtrack if necessary to non-L'ed skb */ + if (hole != NULL) { + skb = hole; + hole = NULL; + } + fwd_rexmitting = 1; + goto begin_fwd; - if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + } else if (!(sacked & TCPCB_LOST)) { + if (hole == NULL && !(sacked & TCPCB_SACKED_RETRANS)) + hole = skb; continue; - /* Ok, retransmit it. */ - if (tcp_retransmit_skb(sk, skb)) { - tp->forward_skb_hint = NULL; - break; + } else { + last_lost = TCP_SKB_CB(skb)->end_seq; + if (icsk->icsk_ca_state != TCP_CA_Loss) + mib_idx = LINUX_MIB_TCPFASTRETRANS; + else + mib_idx = LINUX_MIB_TCPSLOWSTARTRETRANS; } + if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) + continue; + + if (tcp_retransmit_skb(sk, skb)) + return; + NET_INC_STATS_BH(sock_net(sk), mib_idx); + if (skb == tcp_write_queue_head(sk)) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX); - - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFORWARDRETRANS); } } @@ -2241,6 +2249,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct sk_buff *skb; struct tcp_md5sig_key *md5; __u8 *md5_hash_location; + int mss; skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (skb == NULL) @@ -2251,13 +2260,17 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, skb->dst = dst_clone(dst); + mss = dst_metric(dst, RTAX_ADVMSS); + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ __u8 rcv_wscale; /* Set this up on the first call only */ req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); /* tcp_full_space because it is guaranteed to be the first packet */ tcp_select_initial_window(tcp_full_space(sk), - dst_metric(dst, RTAX_ADVMSS) - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rcv_wnd, &req->window_clamp, ireq->wscale_ok, @@ -2266,9 +2279,13 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } memset(&opts, 0, sizeof(opts)); +#ifdef CONFIG_SYN_COOKIES + if (unlikely(req->cookie_ts)) + TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); + else +#endif TCP_SKB_CB(skb)->when = tcp_time_stamp; - tcp_header_size = tcp_synack_options(sk, req, - dst_metric(dst, RTAX_ADVMSS), + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, &md5) + sizeof(struct tcphdr); @@ -2280,7 +2297,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = inet_sk(sk)->sport; + th->source = ireq->loc_port; th->dest = ireq->rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) @@ -2292,11 +2309,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); -#ifdef CONFIG_SYN_COOKIES - if (unlikely(req->cookie_ts)) - TCP_SKB_CB(skb)->when = cookie_init_timestamp(req); - else -#endif tcp_options_write((__be32 *)(th + 1), tp, &opts, &md5_hash_location); th->doff = (tcp_header_size >> 2); TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS); @@ -2342,6 +2354,9 @@ static void tcp_connect_init(struct sock *sk) if (!tp->window_clamp) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < tp->advmss) + tp->advmss = tp->rx_opt.user_mss; + tcp_initialize_rcv_mss(sk); tcp_select_initial_window(tcp_full_space(sk), @@ -2360,6 +2375,7 @@ static void tcp_connect_init(struct sock *sk) tcp_init_wl(tp, tp->write_seq, 0); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; + tp->snd_up = tp->write_seq; tp->rcv_nxt = 0; tp->rcv_wup = 0; tp->copied_seq = 0; @@ -2569,8 +2585,7 @@ int tcp_write_wakeup(struct sock *sk) tcp_event_new_data_sent(sk, skb); return err; } else { - if (tp->urg_mode && - between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) + if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF)) tcp_xmit_probe_skb(sk, 1); return tcp_xmit_probe_skb(sk, 0); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5ab6ba19c3ce..6b6dff1164b9 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -201,7 +201,7 @@ static void tcp_delack_timer(unsigned long data) NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk->sk_backlog_rcv(sk, skb); + sk_backlog_rcv(sk, skb); tp->ucopy.memory = 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 57e26fa66185..98c1fd09be88 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -8,7 +8,7 @@ * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Arnt Gulbrandsen, <agulbra@nvg.unit.no> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Hirokazu Takahashi, <taka@valinux.co.jp> * * Fixes: @@ -108,9 +108,6 @@ * Snmp MIB for the UDP layer */ -DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -EXPORT_SYMBOL(udp_stats_in6); - struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); @@ -125,14 +122,23 @@ EXPORT_SYMBOL(sysctl_udp_wmem_min); atomic_t udp_memory_allocated; EXPORT_SYMBOL(udp_memory_allocated); -static inline int __udp_lib_lport_inuse(struct net *net, __u16 num, - const struct hlist_head udptable[]) +static int udp_lib_lport_inuse(struct net *net, __u16 num, + const struct hlist_head udptable[], + struct sock *sk, + int (*saddr_comp)(const struct sock *sk1, + const struct sock *sk2)) { - struct sock *sk; + struct sock *sk2; struct hlist_node *node; - sk_for_each(sk, node, &udptable[udp_hashfn(net, num)]) - if (net_eq(sock_net(sk), net) && sk->sk_hash == num) + sk_for_each(sk2, node, &udptable[udp_hashfn(net, num)]) + if (net_eq(sock_net(sk2), net) && + sk2 != sk && + sk2->sk_hash == num && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_comp)(sk, sk2)) return 1; return 0; } @@ -149,83 +155,37 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum, const struct sock *sk2 ) ) { struct hlist_head *udptable = sk->sk_prot->h.udp_hash; - struct hlist_node *node; - struct hlist_head *head; - struct sock *sk2; int error = 1; struct net *net = sock_net(sk); write_lock_bh(&udp_hash_lock); if (!snum) { - int i, low, high, remaining; - unsigned rover, best, best_size_so_far; + int low, high, remaining; + unsigned rand; + unsigned short first; inet_get_local_port_range(&low, &high); remaining = (high - low) + 1; - best_size_so_far = UINT_MAX; - best = rover = net_random() % remaining + low; - - /* 1st pass: look for empty (or shortest) hash chain */ - for (i = 0; i < UDP_HTABLE_SIZE; i++) { - int size = 0; - - head = &udptable[udp_hashfn(net, rover)]; - if (hlist_empty(head)) - goto gotit; - - sk_for_each(sk2, node, head) { - if (++size >= best_size_so_far) - goto next; - } - best_size_so_far = size; - best = rover; - next: - /* fold back if end of range */ - if (++rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); - - - } - - /* 2nd pass: find hole in shortest hash chain */ - rover = best; - for (i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++) { - if (! __udp_lib_lport_inuse(net, rover, udptable)) - goto gotit; - rover += UDP_HTABLE_SIZE; - if (rover > high) - rover = low + ((rover - low) - & (UDP_HTABLE_SIZE - 1)); + rand = net_random(); + snum = first = rand % remaining + low; + rand |= 1; + while (udp_lib_lport_inuse(net, snum, udptable, sk, + saddr_comp)) { + do { + snum = snum + rand; + } while (snum < low || snum > high); + if (snum == first) + goto fail; } - - - /* All ports in use! */ + } else if (udp_lib_lport_inuse(net, snum, udptable, sk, saddr_comp)) goto fail; -gotit: - snum = rover; - } else { - head = &udptable[udp_hashfn(net, snum)]; - - sk_for_each(sk2, node, head) - if (sk2->sk_hash == snum && - sk2 != sk && - net_eq(sock_net(sk2), net) && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_comp)(sk, sk2) ) - goto fail; - } - inet_sk(sk)->num = snum; sk->sk_hash = snum; if (sk_unhashed(sk)) { - head = &udptable[udp_hashfn(net, snum)]; - sk_add_node(sk, head); + sk_add_node(sk, &udptable[udp_hashfn(net, snum)]); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); } error = 0; @@ -302,7 +262,29 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } -static inline struct sock *udp_v4_mcast_next(struct sock *sk, +static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + struct sock *sk; + const struct iphdr *iph = ip_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp4_lib_lookup(dev_net(skb->dst->dev), iph->saddr, sport, + iph->daddr, dport, inet_iif(skb), + udptable); +} + +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + +static inline struct sock *udp_v4_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, int dif) @@ -314,7 +296,8 @@ static inline struct sock *udp_v4_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (s->sk_hash != hnum || + if (!net_eq(sock_net(s), net) || + s->sk_hash != hnum || (inet->daddr && inet->daddr != rmt_addr) || (inet->dport != rmt_port && inet->dport) || (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || @@ -650,6 +633,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .saddr = saddr, .tos = tos } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; @@ -1097,15 +1081,16 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (sk) { struct sock *sknext = NULL; do { struct sk_buff *skb1 = skb; - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); + sknext = udp_v4_mcast_next(net, sk_next(sk), uh->dest, + daddr, uh->source, saddr, + dif); if (sknext) skb1 = skb_clone(skb, GFP_ATOMIC); @@ -1201,8 +1186,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr, udptable); - sk = __udp4_lib_lookup(net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk != NULL) { int ret = udp_queue_rcv_skb(sk, skb); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 07735ed280d7..55dc6beab9aa 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -33,6 +33,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET; x->sel.prefixlen_d = 32; x->sel.prefixlen_s = 32; x->sel.proto = fl->proto; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7b6a584b62dd..d9da5eb9dcb2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2483,8 +2483,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, if (!idev && dev->mtu >= IPV6_MIN_MTU) idev = ipv6_add_dev(dev); - if (idev) + if (idev) { idev->if_flags |= IF_READY; + run_pending = 1; + } } else { if (!addrconf_qdisc_ok(dev)) { /* device is still not ready. */ @@ -3982,7 +3984,6 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, } static int addrconf_sysctl_forward_strategy(ctl_table *table, - int __user *name, int nlen, void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 95055f8c3f35..01edac888510 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -50,6 +50,7 @@ #include <net/ipip.h> #include <net/protocol.h> #include <net/inet_common.h> +#include <net/route.h> #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> @@ -794,61 +795,55 @@ static void ipv6_packet_cleanup(void) dev_remove_pack(&ipv6_packet_type); } -static int __init init_ipv6_mibs(void) +static int __net_init ipv6_init_mibs(struct net *net) { - if (snmp_mib_init((void **)ipv6_statistics, + if (snmp_mib_init((void **)net->mib.udp_stats_in6, + sizeof (struct udp_mib)) < 0) + return -ENOMEM; + if (snmp_mib_init((void **)net->mib.udplite_stats_in6, + sizeof (struct udp_mib)) < 0) + goto err_udplite_mib; + if (snmp_mib_init((void **)net->mib.ipv6_statistics, sizeof(struct ipstats_mib)) < 0) goto err_ip_mib; - if (snmp_mib_init((void **)icmpv6_statistics, + if (snmp_mib_init((void **)net->mib.icmpv6_statistics, sizeof(struct icmpv6_mib)) < 0) goto err_icmp_mib; - if (snmp_mib_init((void **)icmpv6msg_statistics, + if (snmp_mib_init((void **)net->mib.icmpv6msg_statistics, sizeof(struct icmpv6msg_mib)) < 0) goto err_icmpmsg_mib; - if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) - goto err_udp_mib; - if (snmp_mib_init((void **)udplite_stats_in6, - sizeof (struct udp_mib)) < 0) - goto err_udplite_mib; return 0; -err_udplite_mib: - snmp_mib_free((void **)udp_stats_in6); -err_udp_mib: - snmp_mib_free((void **)icmpv6msg_statistics); err_icmpmsg_mib: - snmp_mib_free((void **)icmpv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void **)ipv6_statistics); + snmp_mib_free((void **)net->mib.ipv6_statistics); err_ip_mib: + snmp_mib_free((void **)net->mib.udplite_stats_in6); +err_udplite_mib: + snmp_mib_free((void **)net->mib.udp_stats_in6); return -ENOMEM; - } -static void cleanup_ipv6_mibs(void) +static void __net_exit ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void **)ipv6_statistics); - snmp_mib_free((void **)icmpv6_statistics); - snmp_mib_free((void **)icmpv6msg_statistics); - snmp_mib_free((void **)udp_stats_in6); - snmp_mib_free((void **)udplite_stats_in6); + snmp_mib_free((void **)net->mib.udp_stats_in6); + snmp_mib_free((void **)net->mib.udplite_stats_in6); + snmp_mib_free((void **)net->mib.ipv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6_statistics); + snmp_mib_free((void **)net->mib.icmpv6msg_statistics); } -static int inet6_net_init(struct net *net) +static int __net_init inet6_net_init(struct net *net) { int err = 0; net->ipv6.sysctl.bindv6only = 0; - net->ipv6.sysctl.flush_delay = 0; - net->ipv6.sysctl.ip6_rt_max_size = 4096; - net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; - net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; - net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; - net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; - net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; - net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; net->ipv6.sysctl.icmpv6_time = 1*HZ; + err = ipv6_init_mibs(net); + if (err) + return err; #ifdef CONFIG_PROC_FS err = udp6_proc_init(net); if (err) @@ -859,7 +854,6 @@ static int inet6_net_init(struct net *net) err = ac6_proc_init(net); if (err) goto proc_ac6_fail; -out: #endif return err; @@ -868,7 +862,9 @@ proc_ac6_fail: tcp6_proc_exit(net); proc_tcp6_fail: udp6_proc_exit(net); - goto out; +out: + ipv6_cleanup_mibs(net); + return err; #endif } @@ -879,6 +875,7 @@ static void inet6_net_exit(struct net *net) tcp6_proc_exit(net); ac6_proc_exit(net); #endif + ipv6_cleanup_mibs(net); } static struct pernet_operations inet6_net_ops = { @@ -929,11 +926,6 @@ static int __init inet6_init(void) if (err) goto out_sock_register_fail; - /* Initialise ipv6 mibs */ - err = init_ipv6_mibs(); - if (err) - goto out_unregister_sock; - #ifdef CONFIG_SYSCTL err = ipv6_static_sysctl_register(); if (err) @@ -1067,8 +1059,6 @@ register_pernet_fail: ipv6_static_sysctl_unregister(); static_sysctl_fail: #endif - cleanup_ipv6_mibs(); -out_unregister_sock: sock_unregister(PF_INET6); rtnl_unregister_all(PF_INET6); out_sock_register_fail: @@ -1125,7 +1115,6 @@ static void __exit inet6_exit(void) #ifdef CONFIG_SYSCTL ipv6_static_sysctl_unregister(); #endif - cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); proto_unregister(&udplitev6_prot); proto_unregister(&udpv6_prot); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 410046a8cc91..e44deb8d4df2 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -661,6 +661,11 @@ int datagram_send_ctl(struct net *net, switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) { + err = -EINVAL; + goto exit_f; + } break; #endif default: diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 837c830d6d8e..6bfffec2371c 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -277,7 +277,7 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -301,7 +301,8 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) return 1; } - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); dst_release(dst); return -1; } @@ -319,7 +320,8 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) int n, i; struct ipv6_rt_hdr *hdr; struct rt0_hdr *rthdr; - int accept_source_route = dev_net(skb->dev)->ipv6.devconf_all->accept_source_route; + struct net *net = dev_net(skb->dev); + int accept_source_route = net->ipv6.devconf_all->accept_source_route; idev = in6_dev_get(skb->dev); if (idev) { @@ -331,7 +333,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -341,7 +343,7 @@ static int ipv6_rthdr_rcv(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) || skb->pkt_type != PACKET_HOST) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -356,7 +358,7 @@ looped_back: * processed by own */ if (!addr) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -382,7 +384,7 @@ looped_back: goto unknown_rh; /* Silently discard invalid RTH type 2 */ if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -1; @@ -401,7 +403,7 @@ looped_back: n = hdr->hdrlen >> 1; if (hdr->segments_left > n) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((&hdr->segments_left) - @@ -415,7 +417,7 @@ looped_back: if (skb_cloned(skb)) { /* the copy is a forwarded packet */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -1; @@ -438,13 +440,13 @@ looped_back: if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, (xfrm_address_t *)&ipv6_hdr(skb)->saddr, IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; } if (!ipv6_chk_home_addr(dev_net(skb->dst->dev), addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -456,7 +458,7 @@ looped_back: } if (ipv6_addr_is_multicast(addr)) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); return -1; @@ -476,7 +478,7 @@ looped_back: if (skb->dst->dev->flags&IFF_LOOPBACK) { if (ipv6_hdr(skb)->hop_limit <= 1) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); @@ -492,7 +494,7 @@ looped_back: return -1; unknown_rh: - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb_network_header(skb)); return -1; @@ -579,29 +581,33 @@ static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { const unsigned char *nh = skb_network_header(skb); u32 pkt_len; + struct net *net = dev_net(skb->dst->dev); if (nh[optoff + 1] != 4 || (optoff & 3) != 2) { LIMIT_NETDEBUG(KERN_DEBUG "ipv6_hop_jumbo: wrong jumbo opt length/alignment %d\n", nh[optoff+1]); - IP6_INC_STATS_BH(ipv6_skb_idev(skb), + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); goto drop; } pkt_len = ntohl(*(__be32 *)(nh + optoff + 2)); if (pkt_len <= IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2); return 0; } if (ipv6_hdr(skb)->payload_len) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff); return 0; } if (pkt_len > skb->len - sizeof(struct ipv6hdr)) { - IP6_INC_STATS_BH(ipv6_skb_idev(skb), IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, ipv6_skb_idev(skb), + IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b3157a0cc15d..9b7d19ae5ced 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -183,7 +183,7 @@ static inline int icmpv6_xrlim_allow(struct sock *sk, int type, */ dst = ip6_route_output(net, sk, fl); if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { res = 1; @@ -664,7 +664,7 @@ static int icmpv6_rcv(struct sk_buff *skb) skb_set_network_header(skb, nh); } - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INMSGS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INMSGS); saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -693,7 +693,7 @@ static int icmpv6_rcv(struct sk_buff *skb) type = hdr->icmp6_type; - ICMP6MSGIN_INC_STATS_BH(idev, type); + ICMP6MSGIN_INC_STATS_BH(dev_net(dev), idev, type); switch (type) { case ICMPV6_ECHO_REQUEST: @@ -772,7 +772,7 @@ static int icmpv6_rcv(struct sk_buff *skb) return 0; discard_it: - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(dev), idev, ICMP6_MIB_INERRORS); drop_no_count: kfree_skb(skb); return 0; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 7e14cccd0561..936f48946e20 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -59,6 +59,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt struct ipv6hdr *hdr; u32 pkt_len; struct inet6_dev *idev; + struct net *net = dev_net(skb->dev); if (skb->pkt_type == PACKET_OTHERHOST) { kfree_skb(skb); @@ -69,11 +70,11 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt idev = __in6_dev_get(skb->dev); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INRECEIVES); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INRECEIVES); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL || !idev || unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); goto out; } @@ -118,11 +119,12 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* pkt_len may be zero if Jumbo payload option is present */ if (pkt_len || hdr->nexthdr != NEXTHDR_HOP) { if (pkt_len + sizeof(struct ipv6hdr) > skb->len) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INTRUNCATEDPKTS); + IP6_INC_STATS_BH(net, + idev, IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); goto drop; } hdr = ipv6_hdr(skb); @@ -130,7 +132,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt if (hdr->nexthdr == NEXTHDR_HOP) { if (ipv6_parse_hopopts(skb) < 0) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); rcu_read_unlock(); return 0; } @@ -141,7 +143,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt return NF_HOOK(PF_INET6, NF_INET_PRE_ROUTING, skb, dev, NULL, ip6_rcv_finish); err: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); drop: rcu_read_unlock(); kfree_skb(skb); @@ -161,6 +163,7 @@ static int ip6_input_finish(struct sk_buff *skb) int nexthdr, raw; u8 hash; struct inet6_dev *idev; + struct net *net = dev_net(skb->dst->dev); /* * Parse extension headers @@ -205,24 +208,25 @@ resubmit: if (ret > 0) goto resubmit; else if (ret == 0) - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INUNKNOWNPROTOS); + IP6_INC_STATS_BH(net, idev, + IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_UNK_NEXTHDR, nhoff, skb->dev); } } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDELIVERS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); kfree_skb(skb); } rcu_read_unlock(); return 0; discard: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDISCARDS); rcu_read_unlock(); kfree_skb(skb); return 0; @@ -240,7 +244,8 @@ int ip6_mc_input(struct sk_buff *skb) struct ipv6hdr *hdr; int deliver; - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), + ip6_dst_idev(skb->dst), IPSTATS_MIB_INMCASTPKTS); hdr = ipv6_hdr(skb); deliver = ipv6_chk_mcast_addr(skb->dev, &hdr->daddr, NULL); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3df2c442d90b..c77db0b95e26 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -103,7 +103,8 @@ static int ip6_output_finish(struct sk_buff *skb) else if (dst->neighbour) return dst->neighbour->output(skb); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; @@ -150,13 +151,14 @@ static int ip6_output2(struct sk_buff *skb) ip6_dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(dev_net(dev), idev, + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } } - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCASTPKTS); } return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, @@ -175,7 +177,8 @@ int ip6_output(struct sk_buff *skb) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(dev_net(skb->dst->dev), idev, + IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } @@ -194,6 +197,7 @@ int ip6_output(struct sk_buff *skb) int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, struct ipv6_txoptions *opt, int ipfragok) { + struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); struct in6_addr *first_hop = &fl->fl6_dst; struct dst_entry *dst = skb->dst; @@ -216,7 +220,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); if (skb2 == NULL) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return -ENOBUFS; @@ -270,7 +274,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, mtu = dst_mtu(dst); if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTREQUESTS); return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); @@ -280,7 +284,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -422,7 +426,7 @@ int ip6_forward(struct sk_buff *skb) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } @@ -455,7 +459,8 @@ int ip6_forward(struct sk_buff *skb) skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0, skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -468,13 +473,14 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb->dst; @@ -523,14 +529,16 @@ int ip6_forward(struct sk_buff *skb) /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -540,12 +548,12 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: - IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS); drop: kfree_skb(skb); return -EINVAL; @@ -613,7 +621,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { - struct net_device *dev; struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb->dst; struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; @@ -623,8 +630,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; + struct net *net = dev_net(skb->dst->dev); - dev = rt->u.dst.dev; hlen = ip6_find_1stfragopt(skb, &prevhdr); nexthdr = *prevhdr; @@ -637,7 +644,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (!skb->local_df) { skb->dev = skb->dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } @@ -686,7 +694,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGFAILS); return -ENOMEM; } @@ -737,7 +746,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGCREATES); if (err || !frag) break; @@ -750,7 +760,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGOKS); dst_release(&rt->u.dst); return 0; } @@ -761,7 +772,8 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IPSTATS_MIB_FRAGFAILS); dst_release(&rt->u.dst); return err; } @@ -795,7 +807,7 @@ slow_path: if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; @@ -859,15 +871,16 @@ slow_path: if (err) goto fail; - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_FRAGCREATES); } - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGOKS); kfree_skb(skb); return err; fail: - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return err; @@ -982,7 +995,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; @@ -1387,7 +1400,7 @@ alloc_new_skb: return 0; error: inet->cork.length -= length; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } @@ -1411,6 +1424,7 @@ int ip6_push_pending_frames(struct sock *sk) struct in6_addr final_dst_buf, *final_dst = &final_dst_buf; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; @@ -1464,12 +1478,12 @@ int ip6_push_pending_frames(struct sock *sk) skb->mark = sk->sk_mark; skb->dst = dst_clone(&rt->u.dst); - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb->dst); - ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); @@ -1493,7 +1507,7 @@ void ip6_flush_pending_frames(struct sock *sk) while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) { if (skb->dst) - IP6_INC_STATS(ip6_dst_idev(skb->dst), + IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 17c7b098cdb0..64ce3d33d9c6 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1050,10 +1050,10 @@ ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) } switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ret = ip4ip6_tnl_xmit(skb, dev); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ret = ip6ip6_tnl_xmit(skb, dev); break; default: diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 095bc453ff4c..0524769632e7 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -224,7 +224,7 @@ static struct file_operations ip6mr_vif_fops = { .open = ip6mr_vif_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) @@ -338,7 +338,7 @@ static struct file_operations ip6mr_mfc_fops = { .open = ipmr_mfc_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_private, }; #endif @@ -981,14 +981,15 @@ int __init ip6_mr_init(void) goto proc_cache_fail; #endif return 0; -reg_notif_fail: - kmem_cache_destroy(mrt_cachep); #ifdef CONFIG_PROC_FS -proc_vif_fail: - unregister_netdevice_notifier(&ip6_mr_notifier); proc_cache_fail: proc_net_remove(&init_net, "ip6_mr_vif"); +proc_vif_fail: + unregister_netdevice_notifier(&ip6_mr_notifier); #endif +reg_notif_fail: + del_timer(&ipmr_expire_timer); + kmem_cache_destroy(mrt_cachep); return err; } @@ -1383,7 +1384,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) static inline int ip6mr_forward2_finish(struct sk_buff *skb) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst), + IPSTATS_MIB_OUTFORWDATAGRAMS); return dst_output(skb); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 4e5eac301f91..2aa294be0c79 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -366,11 +366,16 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } /* routing header option needs extra check */ + retv = -EINVAL; if (optname == IPV6_RTHDR && opt && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; switch (rthdr->type) { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) case IPV6_SRCRT_TYPE_2: + if (rthdr->hdrlen != 2 || + rthdr->segments_left != 1) + goto sticky_done; + break; #endif default: diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index e7c03bcc2788..d7b3c6d398ae 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1446,7 +1446,7 @@ static void mld_sendpack(struct sk_buff *skb) int err; struct flowi fl; - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); payload_len = (skb->tail - skb->network_header) - sizeof(*pip6); mldlen = skb->tail - skb->transport_header; pip6->payload_len = htons(payload_len); @@ -1474,11 +1474,11 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS_BH(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); @@ -1771,7 +1771,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) struct flowi fl; rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTREQUESTS); rcu_read_unlock(); if (type == ICMPV6_MGM_REDUCTION) @@ -1787,7 +1787,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) if (skb == NULL) { rcu_read_lock(); - IP6_INC_STATS(__in6_dev_get(dev), + IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); rcu_read_unlock(); return; @@ -1839,11 +1839,11 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS); + ICMP6MSGOUT_INC_STATS(net, idev, type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTMCASTPKTS); } else - IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); if (likely(idev != NULL)) in6_dev_put(idev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f1c62ba0f56b..172438320eec 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -516,13 +516,13 @@ static void __ndisc_send(struct net_device *dev, skb->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, type); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) @@ -1199,7 +1199,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } neigh->flags |= NTF_ROUTER; } else if (rt) { - rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); + rt->rt6i_flags = (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); } if (rt) @@ -1581,12 +1581,12 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, buff->dst = dst; idev = in6_dev_get(dst->dev); - IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev, dst_output); if (!err) { - ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT); - ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } if (likely(idev != NULL)) @@ -1730,9 +1730,8 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f return ret; } -int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, - int nlen, void __user *oldval, - size_t __user *oldlenp, +int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, + void __user *oldval, size_t __user *oldlenp, void __user *newval, size_t newlen) { struct net_device *dev = ctl->extra1; @@ -1745,13 +1744,11 @@ int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name, switch (ctl->ctl_name) { case NET_NEIGH_REACHABLE_TIME: - ret = sysctl_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen); break; case NET_NEIGH_RETRANS_TIME_MS: case NET_NEIGH_REACHABLE_TIME_MS: - ret = sysctl_ms_jiffies(ctl, name, nlen, - oldval, oldlenp, newval, newlen); + ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen); break; default: ret = 0; diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 8c6c5e71f210..fd5b3a4e3329 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -12,6 +12,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { + struct net *net = dev_net(skb->dst->dev); struct ipv6hdr *iph = ipv6_hdr(skb); struct dst_entry *dst; struct flowi fl = { @@ -23,7 +24,7 @@ int ip6_route_me_harder(struct sk_buff *skb) .saddr = iph->saddr, } }, }; - dst = ip6_route_output(&init_net, skb->sk, &fl); + dst = ip6_route_output(net, skb->sk, &fl); #ifdef CONFIG_XFRM if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -33,7 +34,7 @@ int ip6_route_me_harder(struct sk_buff *skb) #endif if (dst->error) { - IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); return -EINVAL; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 0cfcce7b18d8..53ea512c4608 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -55,30 +55,29 @@ config IP6_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP6_NF_IPTABLES + # The simple matches. -config IP6_NF_MATCH_RT - tristate '"rt" Routing header match support' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_AH + tristate '"ah" match support' depends on NETFILTER_ADVANCED help - rt matching allows you to match packets based on the routing - header of the packet. + This module allows one to match AH packets. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_OPTS - tristate '"hopbyhop" and "dst" opts header match support' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_EUI64 + tristate '"eui64" address check' depends on NETFILTER_ADVANCED help - This allows one to match packets based on the hop-by-hop - and destination options headers of a packet. + This module performs checking on the IPv6 source address + Compares the last 64 bits with the EUI64 (delivered + from the MAC address) address To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_FRAG tristate '"frag" Fragmentation header match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help frag matching allows you to match packets based on the fragmentation @@ -86,9 +85,17 @@ config IP6_NF_MATCH_FRAG To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_OPTS + tristate '"hbh" hop-by-hop and "dst" opts header match support' + depends on NETFILTER_ADVANCED + help + This allows one to match packets based on the hop-by-hop + and destination options headers of a packet. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_MATCH_HL tristate '"hl" match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help HL matching allows you to match packets based on the hop @@ -98,7 +105,6 @@ config IP6_NF_MATCH_HL config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' - depends on IP6_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This module allows one to match packets based upon @@ -106,54 +112,40 @@ config IP6_NF_MATCH_IPV6HEADER To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_AH - tristate '"ah" match support' - depends on IP6_NF_IPTABLES - depends on NETFILTER_ADVANCED - help - This module allows one to match AH packets. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_MATCH_MH tristate '"mh" match support' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help This module allows one to match MH packets. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_MATCH_EUI64 - tristate '"eui64" address check' - depends on IP6_NF_IPTABLES +config IP6_NF_MATCH_RT + tristate '"rt" Routing header match support' depends on NETFILTER_ADVANCED help - This module performs checking on the IPv6 source address - Compares the last 64 bits with the EUI64 (delivered - from the MAC address) address + rt matching allows you to match packets based on the routing + header of the packet. To compile it as a module, choose M here. If unsure, say N. # The targets -config IP6_NF_FILTER - tristate "Packet filtering" - depends on IP6_NF_IPTABLES +config IP6_NF_TARGET_LOG + tristate "LOG target support" default m if NETFILTER_ADVANCED=n help - Packet filtering defines a table `filter', which has a series of - rules for simple packet filtering at local input, forwarding and - local output. See the man page for iptables(8). + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_LOG - tristate "LOG target support" - depends on IP6_NF_FILTER +config IP6_NF_FILTER + tristate "Packet filtering" default m if NETFILTER_ADVANCED=n help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. @@ -170,7 +162,6 @@ config IP6_NF_TARGET_REJECT config IP6_NF_MANGLE tristate "Packet mangling" - depends on IP6_NF_IPTABLES default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for @@ -198,7 +189,6 @@ config IP6_NF_TARGET_HL config IP6_NF_RAW tristate 'raw table support (required for TRACE)' - depends on IP6_NF_IPTABLES depends on NETFILTER_ADVANCED help This option adds a `raw' table to ip6tables. This table is the very @@ -211,7 +201,6 @@ config IP6_NF_RAW # security table for MAC policy config IP6_NF_SECURITY tristate "Security table" - depends on IP6_NF_IPTABLES depends on SECURITY depends on NETFILTER_ADVANCED help @@ -220,5 +209,7 @@ config IP6_NF_SECURITY If unsure, say N. +endif # IP6_NF_IPTABLES + endmenu diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 0b4557e03431..a33485dc81cb 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -200,32 +200,25 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6) } static unsigned int -ip6t_error(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +ip6t_error(struct sk_buff *skb, const struct xt_target_param *par) { if (net_ratelimit()) - printk("ip6_tables: error: `%s'\n", (char *)targinfo); + printk("ip6_tables: error: `%s'\n", + (const char *)par->targinfo); return NF_DROP; } /* Performance critical - called for every packet */ static inline bool -do_match(struct ip6t_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - unsigned int protoff, - bool *hotdrop) +do_match(struct ip6t_entry_match *m, const struct sk_buff *skb, + struct xt_match_param *par) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data, - offset, protoff, hotdrop)) + if (!m->u.kernel.match->match(skb, par)) return true; else return false; @@ -355,8 +348,6 @@ ip6t_do_table(struct sk_buff *skb, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - int offset = 0; - unsigned int protoff = 0; bool hotdrop = false; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; @@ -364,6 +355,8 @@ ip6t_do_table(struct sk_buff *skb, void *table_base; struct ip6t_entry *e, *back; struct xt_table_info *private; + struct xt_match_param mtpar; + struct xt_target_param tgpar; /* Initialization */ indev = in ? in->name : nulldevname; @@ -374,6 +367,11 @@ ip6t_do_table(struct sk_buff *skb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ + mtpar.hotdrop = &hotdrop; + mtpar.in = tgpar.in = in; + mtpar.out = tgpar.out = out; + mtpar.family = tgpar.family = NFPROTO_IPV6; + tgpar.hooknum = hook; read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); @@ -388,12 +386,10 @@ ip6t_do_table(struct sk_buff *skb, IP_NF_ASSERT(e); IP_NF_ASSERT(back); if (ip6_packet_match(skb, indev, outdev, &e->ipv6, - &protoff, &offset, &hotdrop)) { + &mtpar.thoff, &mtpar.fragoff, &hotdrop)) { struct ip6t_entry_target *t; - if (IP6T_MATCH_ITERATE(e, do_match, - skb, in, out, - offset, protoff, &hotdrop) != 0) + if (IP6T_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0) goto no_match; ADD_COUNTER(e->counters, @@ -441,15 +437,15 @@ ip6t_do_table(struct sk_buff *skb, } else { /* Targets which reenter must return abs. verdicts */ + tgpar.target = t->u.kernel.target; + tgpar.targinfo = t->data; + #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = 0xeeeeeeec; #endif verdict = t->u.kernel.target->target(skb, - in, out, - hook, - t->u.kernel.target, - t->data); + &tgpar); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom @@ -602,12 +598,17 @@ mark_source_chains(struct xt_table_info *newinfo, static int cleanup_match(struct ip6t_entry_match *m, unsigned int *i) { + struct xt_mtdtor_param par; + if (i && (*i)-- == 0) return 1; - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); - module_put(m->u.kernel.match->me); + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV6; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); return 0; } @@ -632,34 +633,28 @@ check_entry(struct ip6t_entry *e, const char *name) return 0; } -static int check_match(struct ip6t_entry_match *m, const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, unsigned int *i) +static int check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, + unsigned int *i) { - struct xt_match *match; + const struct ip6t_ip6 *ipv6 = par->entryinfo; int ret; - match = m->u.kernel.match; - ret = xt_check_match(match, AF_INET6, m->u.match_size - sizeof(*m), - name, hookmask, ipv6->proto, - ipv6->invflags & IP6T_INV_PROTO); - if (!ret && m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, - hookmask)) { + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ipv6->proto, ipv6->invflags & IP6T_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - ret = -EINVAL; + par.match->name); + return ret; } - if (!ret) - (*i)++; - return ret; + ++*i; + return 0; } static int -find_check_match(struct ip6t_entry_match *m, - const char *name, - const struct ip6t_ip6 *ipv6, - unsigned int hookmask, +find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par, unsigned int *i) { struct xt_match *match; @@ -674,7 +669,7 @@ find_check_match(struct ip6t_entry_match *m, } m->u.kernel.match = match; - ret = check_match(m, name, ipv6, hookmask, i); + ret = check_match(m, par, i); if (ret) goto err; @@ -686,23 +681,26 @@ err: static int check_target(struct ip6t_entry *e, const char *name) { - struct ip6t_entry_target *t; - struct xt_target *target; + struct ip6t_entry_target *t = ip6t_get_target(e); + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV6, + }; int ret; t = ip6t_get_target(e); - target = t->u.kernel.target; - ret = xt_check_target(target, AF_INET6, t->u.target_size - sizeof(*t), - name, e->comefrom, e->ipv6.proto, - e->ipv6.invflags & IP6T_INV_PROTO); - if (!ret && t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, target, t->data, - e->comefrom)) { + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ipv6.proto, e->ipv6.invflags & IP6T_INV_PROTO); + if (ret < 0) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); - ret = -EINVAL; + return ret; } - return ret; + return 0; } static int @@ -713,14 +711,18 @@ find_check_entry(struct ip6t_entry *e, const char *name, unsigned int size, struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; ret = check_entry(e, name); if (ret) return ret; j = 0; - ret = IP6T_MATCH_ITERATE(e, find_check_match, name, &e->ipv6, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ipv6; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV6; + ret = IP6T_MATCH_ITERATE(e, find_check_match, &mtpar, &j); if (ret != 0) goto cleanup_matches; @@ -795,6 +797,7 @@ check_entry_size_and_hooks(struct ip6t_entry *e, static int cleanup_entry(struct ip6t_entry *e, unsigned int *i) { + struct xt_tgdtor_param par; struct ip6t_entry_target *t; if (i && (*i)-- == 0) @@ -803,9 +806,13 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) /* Cleanup all matches */ IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV6; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); return 0; } @@ -1677,10 +1684,14 @@ static int compat_check_entry(struct ip6t_entry *e, const char *name, { unsigned int j; int ret; + struct xt_mtchk_param mtpar; j = 0; - ret = IP6T_MATCH_ITERATE(e, check_match, name, &e->ipv6, - e->comefrom, &j); + mtpar.table = name; + mtpar.entryinfo = &e->ipv6; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV6; + ret = IP6T_MATCH_ITERATE(e, check_match, &mtpar, &j); if (ret) goto cleanup_matches; @@ -2146,30 +2157,23 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, } static bool -icmp6_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par) { const struct icmp6hdr *ic; struct icmp6hdr _icmph; - const struct ip6t_icmp *icmpinfo = matchinfo; + const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -2181,14 +2185,9 @@ icmp6_match(const struct sk_buff *skb, } /* Called when user tries to insert an entry of this type. */ -static bool -icmp6_checkentry(const char *tablename, - const void *entry, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool icmp6_checkentry(const struct xt_mtchk_param *par) { - const struct ip6t_icmp *icmpinfo = matchinfo; + const struct ip6t_icmp *icmpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(icmpinfo->invflags & ~IP6T_ICMP_INV); diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c index d5f8fd5f29d3..27b5adf670a2 100644 --- a/net/ipv6/netfilter/ip6t_HL.c +++ b/net/ipv6/netfilter/ip6t_HL.c @@ -19,12 +19,10 @@ MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); MODULE_LICENSE("GPL"); static unsigned int -hl_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ip6h; - const struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = par->targinfo; int new_hl; if (!skb_make_writable(skb, skb->len)) @@ -56,12 +54,9 @@ hl_tg6(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -hl_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool hl_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_HL_info *info = targinfo; + const struct ip6t_HL_info *info = par->targinfo; if (info->mode > IP6T_HL_MAXMODE) { printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", @@ -78,7 +73,7 @@ hl_tg6_check(const char *tablename, const void *entry, static struct xt_target hl_tg6_reg __read_mostly = { .name = "HL", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = hl_tg6, .targetsize = sizeof(struct ip6t_HL_info), .table = "mangle", diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 3a2316974f83..caa441d09567 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -385,7 +385,7 @@ static struct nf_loginfo default_loginfo = { }; static void -ip6t_log_packet(unsigned int pf, +ip6t_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -438,28 +438,24 @@ ip6t_log_packet(unsigned int pf, } static unsigned int -log_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +log_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ip6t_log_info *loginfo = targinfo; + const struct ip6t_log_info *loginfo = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_LOG; li.u.log.level = loginfo->level; li.u.log.logflags = loginfo->logflags; - ip6t_log_packet(PF_INET6, hooknum, skb, in, out, &li, loginfo->prefix); + ip6t_log_packet(NFPROTO_IPV6, par->hooknum, skb, par->in, par->out, + &li, loginfo->prefix); return XT_CONTINUE; } -static bool -log_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool log_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_log_info *loginfo = targinfo; + const struct ip6t_log_info *loginfo = par->targinfo; if (loginfo->level >= 8) { pr_debug("LOG: level %u >= 8\n", loginfo->level); @@ -475,7 +471,7 @@ log_tg6_check(const char *tablename, const void *entry, static struct xt_target log_tg6_reg __read_mostly = { .name = "LOG", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = log_tg6, .targetsize = sizeof(struct ip6t_log_info), .checkentry = log_tg6_check, @@ -495,7 +491,7 @@ static int __init log_tg6_init(void) ret = xt_register_target(&log_tg6_reg); if (ret < 0) return ret; - nf_log_register(PF_INET6, &ip6t_logger); + nf_log_register(NFPROTO_IPV6, &ip6t_logger); return 0; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 44c8d65a2431..0981b4ccb8b1 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -35,7 +35,7 @@ MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); /* Send RST reply */ -static void send_reset(struct sk_buff *oldskb) +static void send_reset(struct net *net, struct sk_buff *oldskb) { struct sk_buff *nskb; struct tcphdr otcph, *tcph; @@ -94,7 +94,7 @@ static void send_reset(struct sk_buff *oldskb) fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; security_skb_classify_flow(oldskb, &fl); - dst = ip6_route_output(&init_net, NULL, &fl); + dst = ip6_route_output(net, NULL, &fl); if (dst == NULL) return; if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0)) @@ -163,20 +163,20 @@ static void send_reset(struct sk_buff *oldskb) } static inline void -send_unreach(struct sk_buff *skb_in, unsigned char code, unsigned int hooknum) +send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum) { if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = init_net.loopback_dev; + skb_in->dev = net->loopback_dev; icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); } static unsigned int -reject_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +reject_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ip6t_reject_info *reject = targinfo; + const struct ip6t_reject_info *reject = par->targinfo; + struct net *net = dev_net((par->in != NULL) ? par->in : par->out); pr_debug("%s: medium point\n", __func__); /* WARNING: This code causes reentry within ip6tables. @@ -184,25 +184,25 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in, must return an absolute verdict. --RR */ switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(skb, ICMPV6_NOROUTE, hooknum); + send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(skb, ICMPV6_ADM_PROHIBITED, hooknum); + send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(skb, ICMPV6_NOT_NEIGHBOUR, hooknum); + send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(skb, ICMPV6_ADDR_UNREACH, hooknum); + send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(skb, ICMPV6_PORT_UNREACH, hooknum); + send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(skb); + send_reset(net, skb); break; default: if (net_ratelimit()) @@ -213,13 +213,10 @@ reject_tg6(struct sk_buff *skb, const struct net_device *in, return NF_DROP; } -static bool -reject_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool reject_tg6_check(const struct xt_tgchk_param *par) { - const struct ip6t_reject_info *rejinfo = targinfo; - const struct ip6t_entry *e = entry; + const struct ip6t_reject_info *rejinfo = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { printk("ip6t_REJECT: ECHOREPLY is not supported.\n"); @@ -237,7 +234,7 @@ reject_tg6_check(const char *tablename, const void *entry, static struct xt_target reject_tg6_reg __read_mostly = { .name = "REJECT", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = reject_tg6, .targetsize = sizeof(struct ip6t_reject_info), .table = "filter", diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c index 429629fd63b6..3a82f24746b9 100644 --- a/net/ipv6/netfilter/ip6t_ah.c +++ b/net/ipv6/netfilter/ip6t_ah.c @@ -36,14 +36,11 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -ah_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip_auth_hdr _ah; const struct ip_auth_hdr *ah; - const struct ip6t_ah *ahinfo = matchinfo; + const struct ip6t_ah *ahinfo = par->matchinfo; unsigned int ptr; unsigned int hdrlen = 0; int err; @@ -51,13 +48,13 @@ ah_mt6(const struct sk_buff *skb, const struct net_device *in, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah); if (ah == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -93,13 +90,9 @@ ah_mt6(const struct sk_buff *skb, const struct net_device *in, !(ahinfo->hdrres && ah->reserved); } -/* Called when user tries to insert an entry of this type. */ -static bool -ah_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ah_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ah *ahinfo = matchinfo; + const struct ip6t_ah *ahinfo = par->matchinfo; if (ahinfo->invflags & ~IP6T_AH_INV_MASK) { pr_debug("ip6t_ah: unknown flags %X\n", ahinfo->invflags); @@ -110,7 +103,7 @@ ah_mt6_check(const char *tablename, const void *entry, static struct xt_match ah_mt6_reg __read_mostly = { .name = "ah", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = ah_mt6, .matchsize = sizeof(struct ip6t_ah), .checkentry = ah_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index 8f331f12b2ec..db610bacbcce 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -20,18 +20,15 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -eui64_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { unsigned char eui64[8]; int i = 0; if (!(skb_mac_header(skb) >= skb->head && skb_mac_header(skb) + ETH_HLEN <= skb->data) && - offset != 0) { - *hotdrop = true; + par->fragoff != 0) { + *par->hotdrop = true; return false; } @@ -60,7 +57,7 @@ eui64_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match eui64_mt6_reg __read_mostly = { .name = "eui64", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = eui64_mt6, .matchsize = sizeof(int), .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c index e2bbc63dba5b..673aa0a5084e 100644 --- a/net/ipv6/netfilter/ip6t_frag.c +++ b/net/ipv6/netfilter/ip6t_frag.c @@ -35,27 +35,24 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) } static bool -frag_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct frag_hdr _frag; const struct frag_hdr *fh; - const struct ip6t_frag *fraginfo = matchinfo; + const struct ip6t_frag *fraginfo = par->matchinfo; unsigned int ptr; int err; err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag); if (fh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -110,13 +107,9 @@ frag_mt6(const struct sk_buff *skb, const struct net_device *in, && (ntohs(fh->frag_off) & IP6_MF)); } -/* Called when user tries to insert an entry of this type. */ -static bool -frag_mt6_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool frag_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_frag *fraginfo = matchinfo; + const struct ip6t_frag *fraginfo = par->matchinfo; if (fraginfo->invflags & ~IP6T_FRAG_INV_MASK) { pr_debug("ip6t_frag: unknown flags %X\n", fraginfo->invflags); @@ -127,7 +120,7 @@ frag_mt6_check(const char *tablename, const void *ip, static struct xt_match frag_mt6_reg __read_mostly = { .name = "frag", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = frag_mt6, .matchsize = sizeof(struct ip6t_frag), .checkentry = frag_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c index 26654b26d7fa..cbe8dec9744b 100644 --- a/net/ipv6/netfilter/ip6t_hbh.c +++ b/net/ipv6/netfilter/ip6t_hbh.c @@ -42,14 +42,11 @@ MODULE_ALIAS("ip6t_dst"); */ static bool -hbh_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ipv6_opt_hdr _optsh; const struct ipv6_opt_hdr *oh; - const struct ip6t_opts *optinfo = matchinfo; + const struct ip6t_opts *optinfo = par->matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; @@ -61,16 +58,16 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, unsigned int optlen; int err; - err = ipv6_find_hdr(skb, &ptr, match->data, NULL); + err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); if (oh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -163,13 +160,9 @@ hbh_mt6(const struct sk_buff *skb, const struct net_device *in, return false; } -/* Called when user tries to insert an entry of this type. */ -static bool -hbh_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hbh_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_opts *optsinfo = matchinfo; + const struct ip6t_opts *optsinfo = par->matchinfo; if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { pr_debug("ip6t_opts: unknown flags %X\n", optsinfo->invflags); @@ -187,7 +180,7 @@ hbh_mt6_check(const char *tablename, const void *entry, static struct xt_match hbh_mt6_reg[] __read_mostly = { { .name = "hbh", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, @@ -196,7 +189,7 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = { }, { .name = "dst", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hbh_mt6, .matchsize = sizeof(struct ip6t_opts), .checkentry = hbh_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c index 345671673845..c964dca1132d 100644 --- a/net/ipv6/netfilter/ip6t_hl.c +++ b/net/ipv6/netfilter/ip6t_hl.c @@ -19,12 +19,9 @@ MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); MODULE_LICENSE("GPL"); -static bool -hl_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_hl_info *info = matchinfo; + const struct ip6t_hl_info *info = par->matchinfo; const struct ipv6hdr *ip6h = ipv6_hdr(skb); switch (info->mode) { @@ -51,7 +48,7 @@ hl_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match hl_mt6_reg __read_mostly = { .name = "hl", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c index 317a8960a757..14e6724d5672 100644 --- a/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/net/ipv6/netfilter/ip6t_ipv6header.c @@ -27,12 +27,9 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match"); MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>"); static bool -ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_ipv6header_info *info = matchinfo; + const struct ip6t_ipv6header_info *info = par->matchinfo; unsigned int temp; int len; u8 nexthdr; @@ -121,12 +118,9 @@ ipv6header_mt6(const struct sk_buff *skb, const struct net_device *in, } } -static bool -ipv6header_mt6_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool ipv6header_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ipv6header_info *info = matchinfo; + const struct ip6t_ipv6header_info *info = par->matchinfo; /* invflags is 0 or 0xff in hard mode */ if ((!info->modeflag) && info->invflags != 0x00 && @@ -138,7 +132,7 @@ ipv6header_mt6_check(const char *tablename, const void *ip, static struct xt_match ipv6header_mt6_reg __read_mostly = { .name = "ipv6header", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = ipv6header_mt6, .matchsize = sizeof(struct ip6t_ipv6header_info), .checkentry = ipv6header_mt6_check, diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c index e06678d07ec8..aafe4e66577b 100644 --- a/net/ipv6/netfilter/ip6t_mh.c +++ b/net/ipv6/netfilter/ip6t_mh.c @@ -37,32 +37,29 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert) return (type >= min && type <= max) ^ invert; } -static bool -mh_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ip6_mh _mh; const struct ip6_mh *mh; - const struct ip6t_mh *mhinfo = matchinfo; + const struct ip6t_mh *mhinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - mh = skb_header_pointer(skb, protoff, sizeof(_mh), &_mh); + mh = skb_header_pointer(skb, par->thoff, sizeof(_mh), &_mh); if (mh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil MH tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } if (mh->ip6mh_proto != IPPROTO_NONE) { duprintf("Dropping invalid MH Payload Proto: %u\n", mh->ip6mh_proto); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -70,13 +67,9 @@ mh_mt6(const struct sk_buff *skb, const struct net_device *in, !!(mhinfo->invflags & IP6T_MH_INV_TYPE)); } -/* Called when user tries to insert an entry of this type. */ -static bool -mh_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool mh_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_mh *mhinfo = matchinfo; + const struct ip6t_mh *mhinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(mhinfo->invflags & ~IP6T_MH_INV_MASK); @@ -84,7 +77,7 @@ mh_mt6_check(const char *tablename, const void *entry, static struct xt_match mh_mt6_reg __read_mostly = { .name = "mh", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = mh_mt6_check, .match = mh_mt6, .matchsize = sizeof(struct ip6t_mh), diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c index 81aaf7aaaabf..356b8d6f6baa 100644 --- a/net/ipv6/netfilter/ip6t_rt.c +++ b/net/ipv6/netfilter/ip6t_rt.c @@ -36,14 +36,11 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert) return r; } -static bool -rt_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { struct ipv6_rt_hdr _route; const struct ipv6_rt_hdr *rh; - const struct ip6t_rt *rtinfo = matchinfo; + const struct ip6t_rt *rtinfo = par->matchinfo; unsigned int temp; unsigned int ptr; unsigned int hdrlen = 0; @@ -55,13 +52,13 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in, err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL); if (err < 0) { if (err != -ENOENT) - *hotdrop = true; + *par->hotdrop = true; return false; } rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route); if (rh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -189,13 +186,9 @@ rt_mt6(const struct sk_buff *skb, const struct net_device *in, return false; } -/* Called when user tries to insert an entry of this type. */ -static bool -rt_mt6_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool rt_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_rt *rtinfo = matchinfo; + const struct ip6t_rt *rtinfo = par->matchinfo; if (rtinfo->invflags & ~IP6T_RT_INV_MASK) { pr_debug("ip6t_rt: unknown flags %X\n", rtinfo->invflags); @@ -214,7 +207,7 @@ rt_mt6_check(const char *tablename, const void *entry, static struct xt_match rt_mt6_reg __read_mostly = { .name = "rt", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = rt_mt6, .matchsize = sizeof(struct ip6t_rt), .checkentry = rt_mt6_check, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 55a2c290bad4..b110a8a85a14 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -68,7 +68,7 @@ ip6t_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ip6t_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv6.ip6table_filter); + dev_net(in)->ipv6.ip6table_filter); } static unsigned int @@ -79,7 +79,7 @@ ip6t_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ip6t_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv6.ip6table_filter); + dev_net(in)->ipv6.ip6table_filter); } static unsigned int @@ -100,7 +100,7 @@ ip6t_local_out_hook(unsigned int hook, #endif return ip6t_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv6.ip6table_filter); + dev_net(out)->ipv6.ip6table_filter); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index f405cea21a8b..d0b31b259d4d 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -67,17 +67,29 @@ static struct xt_table packet_mangler = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_route_hook(unsigned int hook, +ip6t_in_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_mangle); } static unsigned int -ip6t_local_hook(unsigned int hook, +ip6t_post_routing_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_mangle); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -108,7 +120,8 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) @@ -122,35 +135,35 @@ ip6t_local_hook(unsigned int hook, static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_route_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_route_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_IN, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_route_hook, + .hook = ip6t_in_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_FORWARD, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_local_hook, + .hook = ip6t_local_out_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_MANGLE, }, { - .hook = ip6t_route_hook, + .hook = ip6t_post_routing_hook, .owner = THIS_MODULE, .pf = PF_INET6, .hooknum = NF_INET_POST_ROUTING, diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 92b91077ac29..109fab6f831a 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -45,25 +45,37 @@ static struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int -ip6t_hook(unsigned int hook, +ip6t_pre_routing_hook(unsigned int hook, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(skb, hook, in, out, init_net.ipv6.ip6table_raw); + return ip6t_do_table(skb, hook, in, out, + dev_net(in)->ipv6.ip6table_raw); +} + +static unsigned int +ip6t_local_out_hook(unsigned int hook, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(skb, hook, in, out, + dev_net(out)->ipv6.ip6table_raw); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { { - .hook = ip6t_hook, + .hook = ip6t_pre_routing_hook, .pf = PF_INET6, .hooknum = NF_INET_PRE_ROUTING, .priority = NF_IP6_PRI_FIRST, .owner = THIS_MODULE, }, { - .hook = ip6t_hook, + .hook = ip6t_local_out_hook, .pf = PF_INET6, .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_FIRST, diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 6e7131036bc6..20bc52f13e43 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -72,7 +72,7 @@ ip6t_local_in_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ip6t_do_table(skb, hook, in, out, - nf_local_in_net(in, out)->ipv6.ip6table_security); + dev_net(in)->ipv6.ip6table_security); } static unsigned int @@ -83,7 +83,7 @@ ip6t_forward_hook(unsigned int hook, int (*okfn)(struct sk_buff *)) { return ip6t_do_table(skb, hook, in, out, - nf_forward_net(in, out)->ipv6.ip6table_security); + dev_net(in)->ipv6.ip6table_security); } static unsigned int @@ -95,7 +95,7 @@ ip6t_local_out_hook(unsigned int hook, { /* TBD: handle short packets via raw socket */ return ip6t_do_table(skb, hook, in, out, - nf_local_out_net(in, out)->ipv6.ip6table_security); + dev_net(out)->ipv6.ip6table_security); } static struct nf_hook_ops ip6t_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 85050c072abd..e91db16611d9 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -211,11 +211,10 @@ static unsigned int ipv6_defrag(unsigned int hooknum, return NF_STOLEN; } -static unsigned int ipv6_conntrack_in(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int __ipv6_conntrack_in(struct net *net, + unsigned int hooknum, + struct sk_buff *skb, + int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; @@ -225,7 +224,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, if (!reasm->nfct) { unsigned int ret; - ret = nf_conntrack_in(PF_INET6, hooknum, reasm); + ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); if (ret != NF_ACCEPT) return ret; } @@ -235,7 +234,16 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, return NF_ACCEPT; } - return nf_conntrack_in(PF_INET6, hooknum, skb); + return nf_conntrack_in(net, PF_INET6, hooknum, skb); +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -250,7 +258,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, printk("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return ipv6_conntrack_in(hooknum, skb, in, out, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 14d47d833545..05726177903f 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -81,7 +81,7 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* Try to delete connection immediately after all replies: @@ -93,7 +93,7 @@ static int icmpv6_packet(struct nf_conn *ct, nf_ct_kill_acct(ct, ctinfo, skb); } else { atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); } @@ -122,7 +122,8 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, } static int -icmpv6_error_message(struct sk_buff *skb, +icmpv6_error_message(struct net *net, + struct sk_buff *skb, unsigned int icmp6off, enum ip_conntrack_info *ctinfo, unsigned int hooknum) @@ -156,7 +157,7 @@ icmpv6_error_message(struct sk_buff *skb, *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&intuple); + h = nf_conntrack_find_get(net, &intuple); if (!h) { pr_debug("icmpv6_error: no match\n"); return -NF_ACCEPT; @@ -172,21 +173,21 @@ icmpv6_error_message(struct sk_buff *skb, } static int -icmpv6_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmpv6_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { const struct icmp6hdr *icmp6h; struct icmp6hdr _ih; icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(IPPROTO_ICMPV6)) + if (LOG_INVALID(net, IPPROTO_ICMPV6)) nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: short packet "); return -NF_ACCEPT; } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, "nf_ct_icmpv6: ICMPv6 checksum failed\n"); @@ -197,7 +198,7 @@ icmpv6_error(struct sk_buff *skb, unsigned int dataoff, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); + return icmpv6_error_message(net, skb, dataoff, ctinfo, hooknum); } #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 52d06dd4b817..9967ac7a01a8 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -27,7 +27,6 @@ #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> -#include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> @@ -103,39 +102,12 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { }; #endif -static unsigned int ip6qhashfn(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr) -{ - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += nf_frags.rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); - - return c & (INETFRAGS_HASHSZ - 1); -} - static unsigned int nf_hashfn(struct inet_frag_queue *q) { const struct nf_ct_frag6_queue *nq; nq = container_of(q, struct nf_ct_frag6_queue, q); - return ip6qhashfn(nq->id, &nq->saddr, &nq->daddr); + return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } static void nf_skb_free(struct sk_buff *skb) @@ -209,7 +181,7 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; read_lock_bh(&nf_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); local_bh_enable(); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 0179b66864f1..97c17fdd6f75 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -29,8 +29,6 @@ #include <net/transp_v6.h> #include <net/ipv6.h> -static struct proc_dir_entry *proc_net_devsnmp6; - static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -48,6 +46,19 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) return 0; } +static int sockstat6_seq_open(struct inode *inode, struct file *file) +{ + return single_open_net(inode, file, sockstat6_seq_show); +} + +static const struct file_operations sockstat6_seq_fops = { + .owner = THIS_MODULE, + .open = sockstat6_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release_net, +}; + static struct snmp_mib snmp6_ipstats_list[] = { /* ipv6 mib according to RFC 2465 */ SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INRECEIVES), @@ -121,7 +132,7 @@ static struct snmp_mib snmp6_udplite6_list[] = { static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { - static char name[32]; + char name[32]; int i; /* print by name -- deprecated items */ @@ -133,7 +144,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) p = icmp6type2name[icmptype]; if (!p) /* don't print un-named types here */ continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%s%s", + snprintf(name, sizeof(name), "Icmp6%s%s", i & 0x100 ? "Out" : "In", p); seq_printf(seq, "%-32s\t%lu\n", name, snmp_fold_field(mib, i)); @@ -146,7 +157,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) val = snmp_fold_field(mib, i); if (!val) continue; - (void) snprintf(name, sizeof(name)-1, "Icmp6%sType%u", + snprintf(name, sizeof(name), "Icmp6%sType%u", i & 0x100 ? "Out" : "In", i & 0xff); seq_printf(seq, "%-32s\t%lu\n", name, val); } @@ -164,44 +175,52 @@ snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist) static int snmp6_seq_show(struct seq_file *seq, void *v) { - struct inet6_dev *idev = (struct inet6_dev *)seq->private; - - if (idev) { - seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); - } else { - snmp6_seq_show_item(seq, (void **)ipv6_statistics, snmp6_ipstats_list); - snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); - snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); - snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); - } + struct net *net = (struct net *)seq->private; + + snmp6_seq_show_item(seq, (void **)net->mib.ipv6_statistics, + snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void **)net->mib.icmpv6_statistics, + snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)net->mib.icmpv6msg_statistics); + snmp6_seq_show_item(seq, (void **)net->mib.udp_stats_in6, + snmp6_udp6_list); + snmp6_seq_show_item(seq, (void **)net->mib.udplite_stats_in6, + snmp6_udplite6_list); return 0; } -static int sockstat6_seq_open(struct inode *inode, struct file *file) +static int snmp6_seq_open(struct inode *inode, struct file *file) { - return single_open_net(inode, file, sockstat6_seq_show); + return single_open_net(inode, file, snmp6_seq_show); } -static const struct file_operations sockstat6_seq_fops = { +static const struct file_operations snmp6_seq_fops = { .owner = THIS_MODULE, - .open = sockstat6_seq_open, + .open = snmp6_seq_open, .read = seq_read, .llseek = seq_lseek, .release = single_release_net, }; -static int snmp6_seq_open(struct inode *inode, struct file *file) +static int snmp6_dev_seq_show(struct seq_file *seq, void *v) { - return single_open(file, snmp6_seq_show, PDE(inode)->data); + struct inet6_dev *idev = (struct inet6_dev *)seq->private; + + seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); + snmp6_seq_show_item(seq, (void **)idev->stats.ipv6, snmp6_ipstats_list); + snmp6_seq_show_item(seq, (void **)idev->stats.icmpv6, snmp6_icmp6_list); + snmp6_seq_show_icmpv6msg(seq, (void **)idev->stats.icmpv6msg); + return 0; } -static const struct file_operations snmp6_seq_fops = { +static int snmp6_dev_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, snmp6_dev_seq_show, PDE(inode)->data); +} + +static const struct file_operations snmp6_dev_seq_fops = { .owner = THIS_MODULE, - .open = snmp6_seq_open, + .open = snmp6_dev_seq_open, .read = seq_read, .llseek = seq_lseek, .release = single_release, @@ -210,18 +229,18 @@ static const struct file_operations snmp6_seq_fops = { int snmp6_register_dev(struct inet6_dev *idev) { struct proc_dir_entry *p; + struct net *net; if (!idev || !idev->dev) return -EINVAL; - if (!net_eq(dev_net(idev->dev), &init_net)) - return 0; - - if (!proc_net_devsnmp6) + net = dev_net(idev->dev); + if (!net->mib.proc_net_devsnmp6) return -ENOENT; p = proc_create_data(idev->dev->name, S_IRUGO, - proc_net_devsnmp6, &snmp6_seq_fops, idev); + net->mib.proc_net_devsnmp6, + &snmp6_dev_seq_fops, idev); if (!p) return -ENOMEM; @@ -231,12 +250,13 @@ int snmp6_register_dev(struct inet6_dev *idev) int snmp6_unregister_dev(struct inet6_dev *idev) { - if (!proc_net_devsnmp6) + struct net *net = dev_net(idev->dev); + if (!net->mib.proc_net_devsnmp6) return -ENOENT; if (!idev || !idev->stats.proc_dir_entry) return -EINVAL; remove_proc_entry(idev->stats.proc_dir_entry->name, - proc_net_devsnmp6); + net->mib.proc_net_devsnmp6); idev->stats.proc_dir_entry = NULL; return 0; } @@ -246,12 +266,27 @@ static int ipv6_proc_init_net(struct net *net) if (!proc_net_fops_create(net, "sockstat6", S_IRUGO, &sockstat6_seq_fops)) return -ENOMEM; + + if (!proc_net_fops_create(net, "snmp6", S_IRUGO, &snmp6_seq_fops)) + goto proc_snmp6_fail; + + net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net); + if (!net->mib.proc_net_devsnmp6) + goto proc_dev_snmp6_fail; return 0; + +proc_snmp6_fail: + proc_net_remove(net, "sockstat6"); +proc_dev_snmp6_fail: + proc_net_remove(net, "dev_snmp6"); + return -ENOMEM; } static void ipv6_proc_exit_net(struct net *net) { proc_net_remove(net, "sockstat6"); + proc_net_remove(net, "dev_snmp6"); + proc_net_remove(net, "snmp6"); } static struct pernet_operations ipv6_proc_ops = { @@ -261,33 +296,11 @@ static struct pernet_operations ipv6_proc_ops = { int __init ipv6_misc_proc_init(void) { - int rc = 0; - - if (register_pernet_subsys(&ipv6_proc_ops)) - goto proc_net_fail; - - if (!proc_net_fops_create(&init_net, "snmp6", S_IRUGO, &snmp6_seq_fops)) - goto proc_snmp6_fail; - - proc_net_devsnmp6 = proc_mkdir("dev_snmp6", init_net.proc_net); - if (!proc_net_devsnmp6) - goto proc_dev_snmp6_fail; -out: - return rc; - -proc_dev_snmp6_fail: - proc_net_remove(&init_net, "snmp6"); -proc_snmp6_fail: - unregister_pernet_subsys(&ipv6_proc_ops); -proc_net_fail: - rc = -ENOMEM; - goto out; + return register_pernet_subsys(&ipv6_proc_ops); } void ipv6_misc_proc_exit(void) { - proc_net_remove(&init_net, "dev_snmp6"); - proc_net_remove(&init_net, "snmp6"); unregister_pernet_subsys(&ipv6_proc_ops); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e53e493606c5..2ba04d41dc25 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -638,7 +638,7 @@ static int rawv6_send_hdrinc(struct sock *sk, void *from, int length, if (err) goto error_fault; - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS); err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output); if (err > 0) @@ -652,7 +652,7 @@ error_fault: err = -EFAULT; kfree_skb(skb); error: - IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 89184b576e23..af12de071f4c 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -99,8 +99,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, - struct in6_addr *daddr) +unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr, u32 rnd) { u32 a, b, c; @@ -110,7 +110,7 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; - c += ip6_frags.rnd; + c += rnd; __jhash_mix(a, b, c); a += (__force u32)saddr->s6_addr32[3]; @@ -125,13 +125,14 @@ static unsigned int ip6qhashfn(__be32 id, struct in6_addr *saddr, return c & (INETFRAGS_HASHSZ - 1); } +EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return ip6qhashfn(fq->id, &fq->saddr, &fq->daddr); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); } int ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -188,7 +189,7 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev) evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); if (evicted) - IP6_ADD_STATS_BH(idev, IPSTATS_MIB_REASMFAILS, evicted); + IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); } static void ip6_frag_expire(unsigned long data) @@ -212,8 +213,8 @@ static void ip6_frag_expire(unsigned long data) goto out; rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); /* Don't send error if the first segment did not arrive. */ @@ -247,7 +248,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.dst = dst; read_lock(&ip6_frags.lock); - hash = ip6qhashfn(id, src, dst); + hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); if (q == NULL) @@ -256,7 +257,7 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, return container_of(q, struct frag_queue, q); oom: - IP6_INC_STATS_BH(idev, IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS); return NULL; } @@ -266,6 +267,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct sk_buff *prev, *next; struct net_device *dev; int offset, end; + struct net *net = dev_net(skb->dst->dev); if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -275,7 +277,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, ((u8 *)&fhdr->frag_off - @@ -308,7 +310,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); @@ -432,7 +434,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; err: - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; } @@ -548,7 +551,8 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->csum); rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; return 1; @@ -562,7 +566,8 @@ out_oom: printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); - IP6_INC_STATS_BH(__in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(dev_net(dev), + __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); rcu_read_unlock(); return -1; } @@ -572,24 +577,17 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_hdr *fhdr; struct frag_queue *fq; struct ipv6hdr *hdr = ipv6_hdr(skb); - struct net *net; + struct net *net = dev_net(skb->dst->dev); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - skb_network_header_len(skb)); - return -1; - } + if (hdr->payload_len==0) + goto fail_hdr; + if (!pskb_may_pull(skb, (skb_transport_offset(skb) + - sizeof(struct frag_hdr)))) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, - skb_network_header_len(skb)); - return -1; - } + sizeof(struct frag_hdr)))) + goto fail_hdr; hdr = ipv6_hdr(skb); fhdr = (struct frag_hdr *)skb_transport_header(skb); @@ -597,13 +595,13 @@ static int ipv6_frag_rcv(struct sk_buff *skb) if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ skb->transport_header += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); + IP6_INC_STATS_BH(net, + ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); return 1; } - net = dev_net(skb->dev); if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) ip6_evictor(net, ip6_dst_idev(skb->dst)); @@ -620,9 +618,14 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return ret; } - IP6_INC_STATS_BH(ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; + +fail_hdr: + IP6_INC_STATS(net, ip6_dst_idev(skb->dst), IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); + return -1; } static struct inet6_protocol frag_protocol = diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9af6115f0f50..89dc69924340 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1003,6 +1003,25 @@ int icmp6_dst_gc(void) return more; } +static void icmp6_clean_all(int (*func)(struct rt6_info *rt, void *arg), + void *arg) +{ + struct dst_entry *dst, **pprev; + + spin_lock_bh(&icmp6_dst_lock); + pprev = &icmp6_dst_gc_list; + while ((dst = *pprev) != NULL) { + struct rt6_info *rt = (struct rt6_info *) dst; + if (func(rt, arg)) { + *pprev = dst->next; + dst_free(dst); + } else { + pprev = &dst->next; + } + } + spin_unlock_bh(&icmp6_dst_lock); +} + static int ip6_dst_gc(struct dst_ops *ops) { unsigned long now = jiffies; @@ -1814,16 +1833,19 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg) static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes) { int type; + struct dst_entry *dst = skb->dst; switch (ipstats_mib_noroutes) { case IPSTATS_MIB_INNOROUTES: type = ipv6_addr_type(&ipv6_hdr(skb)->daddr); if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) { - IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + IPSTATS_MIB_INADDRERRORS); break; } /* FALLTHROUGH */ case IPSTATS_MIB_OUTNOROUTES: - IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes); + IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), + ipstats_mib_noroutes); break; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); @@ -1930,6 +1952,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) }; fib6_clean_all(net, fib6_ifdown, 0, &adn); + icmp6_clean_all(fib6_ifdown, &adn); } struct rt6_mtu_change_arg @@ -2611,10 +2634,8 @@ static int ip6_route_net_init(struct net *net) net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); - if (!net->ipv6.ip6_prohibit_entry) { - kfree(net->ipv6.ip6_null_entry); - goto out; - } + if (!net->ipv6.ip6_prohibit_entry) + goto out_ip6_null_entry; net->ipv6.ip6_prohibit_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_prohibit_entry; net->ipv6.ip6_prohibit_entry->u.dst.ops = net->ipv6.ip6_dst_ops; @@ -2622,16 +2643,22 @@ static int ip6_route_net_init(struct net *net) net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template, sizeof(*net->ipv6.ip6_blk_hole_entry), GFP_KERNEL); - if (!net->ipv6.ip6_blk_hole_entry) { - kfree(net->ipv6.ip6_null_entry); - kfree(net->ipv6.ip6_prohibit_entry); - goto out; - } + if (!net->ipv6.ip6_blk_hole_entry) + goto out_ip6_prohibit_entry; net->ipv6.ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)net->ipv6.ip6_blk_hole_entry; net->ipv6.ip6_blk_hole_entry->u.dst.ops = net->ipv6.ip6_dst_ops; #endif + net->ipv6.sysctl.flush_delay = 0; + net->ipv6.sysctl.ip6_rt_max_size = 4096; + net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2; + net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ; + net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ; + net->ipv6.sysctl.ip6_rt_gc_elasticity = 9; + net->ipv6.sysctl.ip6_rt_mtu_expires = 10*60*HZ; + net->ipv6.sysctl.ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40; + #ifdef CONFIG_PROC_FS proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops); proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops); @@ -2642,6 +2669,12 @@ static int ip6_route_net_init(struct net *net) out: return ret; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +out_ip6_prohibit_entry: + kfree(net->ipv6.ip6_prohibit_entry); +out_ip6_null_entry: + kfree(net->ipv6.ip6_null_entry); +#endif out_ip6_dst_ops: release_net(net->ipv6.ip6_dst_ops->dst_net); kfree(net->ipv6.ip6_dst_ops); @@ -2688,6 +2721,8 @@ int __init ip6_route_init(void) if (ret) goto out_kmem_cache; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; + /* Registering of the loopback is done before this portion of code, * the loopback reference in rt6_info will not be taken, do it * manually for init_net */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index ec394cf5a19b..676c80b5b14b 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -204,6 +204,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) req->mss = mss; ireq->rmt_port = th->source; + ireq->loc_port = th->dest; ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr); ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr); if (ipv6_opt_accepted(sk, skb) || diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index b585c850a89a..b6b356b7912a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -330,7 +330,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, th->dest, &hdr->saddr, th->source, skb->dev->ifindex); if (sk == NULL) { - ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), + ICMP6_MIB_INERRORS); return; } @@ -475,7 +476,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req) fl.fl6_flowlabel = 0; fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); opt = np->opt; @@ -941,117 +942,14 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) return 0; } -static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) +static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, + u32 ts, struct tcp_md5sig_key *key, int rst) { struct tcphdr *th = tcp_hdr(skb), *t1; struct sk_buff *buff; struct flowi fl; struct net *net = dev_net(skb->dst->dev); struct sock *ctl_sk = net->ipv6.tcp_sk; - unsigned int tot_len = sizeof(*th); -#ifdef CONFIG_TCP_MD5SIG - struct tcp_md5sig_key *key; -#endif - - if (th->rst) - return; - - if (!ipv6_unicast_destination(skb)) - return; - -#ifdef CONFIG_TCP_MD5SIG - if (sk) - key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); - else - key = NULL; - - if (key) - tot_len += TCPOLEN_MD5SIG_ALIGNED; -#endif - - /* - * We need to grab some memory, and put together an RST, - * and then put it into the queue to be sent. - */ - - buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, - GFP_ATOMIC); - if (buff == NULL) - return; - - skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); - - t1 = (struct tcphdr *) skb_push(buff, tot_len); - - /* Swap the send and the receive. */ - memset(t1, 0, sizeof(*t1)); - t1->dest = th->source; - t1->source = th->dest; - t1->doff = tot_len / 4; - t1->rst = 1; - - if(th->ack) { - t1->seq = th->ack_seq; - } else { - t1->ack = 1; - t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin - + skb->len - (th->doff<<2)); - } - -#ifdef CONFIG_TCP_MD5SIG - if (key) { - __be32 *opt = (__be32*)(t1 + 1); - opt[0] = htonl((TCPOPT_NOP << 24) | - (TCPOPT_NOP << 16) | - (TCPOPT_MD5SIG << 8) | - TCPOLEN_MD5SIG); - tcp_v6_md5_hash_hdr((__u8 *)&opt[1], key, - &ipv6_hdr(skb)->daddr, - &ipv6_hdr(skb)->saddr, t1); - } -#endif - - buff->csum = csum_partial((char *)t1, sizeof(*t1), 0); - - memset(&fl, 0, sizeof(fl)); - ipv6_addr_copy(&fl.fl6_dst, &ipv6_hdr(skb)->saddr); - ipv6_addr_copy(&fl.fl6_src, &ipv6_hdr(skb)->daddr); - - t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst, - sizeof(*t1), IPPROTO_TCP, - buff->csum); - - fl.proto = IPPROTO_TCP; - fl.oif = inet6_iif(skb); - fl.fl_ip_dport = t1->dest; - fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); - - /* Pass a socket to ip6_dst_lookup either it is for RST - * Underlying function will use this to retrieve the network - * namespace - */ - if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { - - if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { - ip6_xmit(ctl_sk, buff, &fl, NULL, 0); - TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); - TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); - return; - } - } - - kfree_skb(buff); -} - -static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, - struct tcp_md5sig_key *key) -{ - struct tcphdr *th = tcp_hdr(skb), *t1; - struct sk_buff *buff; - struct flowi fl; - struct net *net = dev_net(skb->dev); - struct sock *ctl_sk = net->ipv6.tcp_sk; unsigned int tot_len = sizeof(struct tcphdr); __be32 *topt; @@ -1069,16 +967,17 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); - t1 = (struct tcphdr *) skb_push(buff,tot_len); + t1 = (struct tcphdr *) skb_push(buff, tot_len); /* Swap the send and the receive. */ memset(t1, 0, sizeof(*t1)); t1->dest = th->source; t1->source = th->dest; - t1->doff = tot_len/4; + t1->doff = tot_len / 4; t1->seq = htonl(seq); t1->ack_seq = htonl(ack); - t1->ack = 1; + t1->ack = !rst || !th->ack; + t1->rst = rst; t1->window = htons(win); topt = (__be32 *)(t1 + 1); @@ -1087,7 +986,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP); *topt++ = htonl(tcp_time_stamp); - *topt = htonl(ts); + *topt++ = htonl(ts); } #ifdef CONFIG_TCP_MD5SIG @@ -1116,10 +1015,16 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.fl_ip_sport = t1->source; security_skb_classify_flow(skb, &fl); + /* Pass a socket to ip6_dst_lookup either it is for RST + * Underlying function will use this to retrieve the network + * namespace + */ if (!ip6_dst_lookup(ctl_sk, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { ip6_xmit(ctl_sk, buff, &fl, NULL, 0); TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS); + if (rst) + TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS); return; } } @@ -1127,6 +1032,38 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 kfree_skb(buff); } +static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) +{ + struct tcphdr *th = tcp_hdr(skb); + u32 seq = 0, ack_seq = 0; + struct tcp_md5sig_key *key = NULL; + + if (th->rst) + return; + + if (!ipv6_unicast_destination(skb)) + return; + +#ifdef CONFIG_TCP_MD5SIG + if (sk) + key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr); +#endif + + if (th->ack) + seq = ntohl(th->ack_seq); + else + ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - + (th->doff << 2); + + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1); +} + +static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts, + struct tcp_md5sig_key *key) +{ + tcp_v6_send_response(skb, seq, ack, win, ts, key, 0); +} + static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) { struct inet_timewait_sock *tw = inet_twsk(sk); @@ -1286,7 +1223,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst) { - struct inet6_request_sock *treq = inet6_rsk(req); + struct inet6_request_sock *treq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); struct tcp6_sock *newtcp6sk; struct inet_sock *newinet; @@ -1350,6 +1287,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, return newsk; } + treq = inet6_rsk(req); opt = np->opt; if (sk_acceptq_is_full(sk)) @@ -1371,7 +1309,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr); fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; - fl.fl_ip_sport = inet_sk(sk)->sport; + fl.fl_ip_sport = inet_rsk(req)->loc_port; security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) @@ -1680,11 +1618,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb)); TCP_SKB_CB(skb)->sacked = 0; - sk = __inet6_lookup(net, &tcp_hashinfo, - &ipv6_hdr(skb)->saddr, th->source, - &ipv6_hdr(skb)->daddr, ntohs(th->dest), - inet6_iif(skb)); - + sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest); if (!sk) goto no_tcp_socket; @@ -1931,7 +1865,7 @@ static void get_openreq6(struct seq_file *seq, i, src->s6_addr32[0], src->s6_addr32[1], src->s6_addr32[2], src->s6_addr32[3], - ntohs(inet_sk(sk)->sport), + ntohs(inet_rsk(req)->loc_port), dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->rmt_port), diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a6aecf76a71b..8b48512ebf6a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -107,6 +107,21 @@ static struct sock *__udp6_lib_lookup(struct net *net, return result; } +static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, + __be16 sport, __be16 dport, + struct hlist_head udptable[]) +{ + struct sock *sk; + struct ipv6hdr *iph = ipv6_hdr(skb); + + if (unlikely(sk = skb_steal_sock(skb))) + return sk; + else + return __udp6_lib_lookup(dev_net(skb->dst->dev), &iph->saddr, sport, + &iph->daddr, dport, inet6_iif(skb), + udptable); +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -123,6 +138,7 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int peeked; int err; int is_udplite = IS_UDPLITE(sk); + int is_udp4; if (addr_len) *addr_len=sizeof(struct sockaddr_in6); @@ -143,6 +159,8 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; + is_udp4 = (skb->protocol == htons(ETH_P_IP)); + /* * If checksum is needed at all, try to do it while copying the * data. If the data is truncated, or if we only want a partial @@ -165,9 +183,14 @@ try_again: if (err) goto out_free; - if (!peeked) - UDP6_INC_STATS_USER(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + if (!peeked) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INDATAGRAMS, is_udplite); + } sock_recv_timestamp(msg, sk, skb); @@ -181,7 +204,7 @@ try_again: sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = 0; - if (skb->protocol == htons(ETH_P_IP)) + if (is_udp4) ipv6_addr_set(&sin6->sin6_addr, 0, 0, htonl(0xffff), ip_hdr(skb)->saddr); else { @@ -192,7 +215,7 @@ try_again: } } - if (skb->protocol == htons(ETH_P_IP)) { + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { @@ -213,8 +236,14 @@ out: csum_copy_err: lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + if (!skb_kill_datagram(sk, skb, flags)) { + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } release_sock(sk); if (flags & MSG_DONTWAIT) @@ -313,7 +342,7 @@ drop: return -1; } -static struct sock *udp_v6_mcast_next(struct sock *sk, +static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, __be16 loc_port, struct in6_addr *loc_addr, __be16 rmt_port, struct in6_addr *rmt_addr, int dif) @@ -325,7 +354,7 @@ static struct sock *udp_v6_mcast_next(struct sock *sk, sk_for_each_from(s, node) { struct inet_sock *inet = inet_sk(s); - if (sock_net(s) != sock_net(sk)) + if (!net_eq(sock_net(s), net)) continue; if (s->sk_hash == num && s->sk_family == PF_INET6) { @@ -368,14 +397,14 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, read_lock(&udp_hash_lock); sk = sk_head(&udptable[udp_hashfn(net, ntohs(uh->dest))]); dif = inet6_iif(skb); - sk = udp_v6_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); if (!sk) { kfree_skb(skb); goto out; } sk2 = sk; - while ((sk2 = udp_v6_mcast_next(sk_next(sk2), uh->dest, daddr, + while ((sk2 = udp_v6_mcast_next(net, sk_next(sk2), uh->dest, daddr, uh->source, saddr, dif))) { struct sk_buff *buff = skb_clone(skb, GFP_ATOMIC); if (buff) { @@ -488,8 +517,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], * check socket cache ... must talk to Alan about his plans * for sock caches... i'll skip this for now. */ - sk = __udp6_lib_lookup(net, saddr, uh->source, - daddr, uh->dest, inet6_iif(skb), udptable); + sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk == NULL) { if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index f6cdcb348e05..3cd1a1ac3d6c 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -13,8 +13,6 @@ */ #include "udp_impl.h" -DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6) __read_mostly; - static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, udplite_hash, IPPROTO_UDPLITE); diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 89884a4f23aa..60c78cfc2737 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -34,6 +34,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->sel.dport_mask = htons(0xffff); x->sel.sport = xfrm_flowi_sport(fl); x->sel.sport_mask = htons(0xffff); + x->sel.family = AF_INET6; x->sel.prefixlen_d = 128; x->sel.prefixlen_s = 128; x->sel.proto = fl->proto; diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 705959b31e24..d7b54b5bfa69 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -524,7 +524,6 @@ static int iucv_enable(void) get_online_cpus(); for_each_online_cpu(cpu) smp_call_function_single(cpu, iucv_declare_cpu, NULL, 1); - preempt_enable(); if (cpus_empty(iucv_buffer_cpumask)) /* No cpu could declare an iucv buffer. */ goto out_path; @@ -547,7 +546,9 @@ out: */ static void iucv_disable(void) { + get_online_cpus(); on_each_cpu(iucv_retrieve_cpu, NULL, 1); + put_online_cpus(); kfree(iucv_path_table); } diff --git a/net/key/af_key.c b/net/key/af_key.c index d628df97e02e..5b22e011653b 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -58,6 +58,7 @@ struct pfkey_sock { struct xfrm_policy_walk policy; struct xfrm_state_walk state; } u; + struct sk_buff *skb; } dump; }; @@ -73,22 +74,22 @@ static int pfkey_can_dump(struct sock *sk) return 0; } -static int pfkey_do_dump(struct pfkey_sock *pfk) +static void pfkey_terminate_dump(struct pfkey_sock *pfk) { - int rc; - - rc = pfk->dump.dump(pfk); - if (rc == -ENOBUFS) - return 0; - - pfk->dump.done(pfk); - pfk->dump.dump = NULL; - pfk->dump.done = NULL; - return rc; + if (pfk->dump.dump) { + if (pfk->dump.skb) { + kfree_skb(pfk->dump.skb); + pfk->dump.skb = NULL; + } + pfk->dump.done(pfk); + pfk->dump.dump = NULL; + pfk->dump.done = NULL; + } } static void pfkey_sock_destruct(struct sock *sk) { + pfkey_terminate_dump(pfkey_sk(sk)); skb_queue_purge(&sk->sk_receive_queue); if (!sock_flag(sk, SOCK_DEAD)) { @@ -310,6 +311,31 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, return err; } +static int pfkey_do_dump(struct pfkey_sock *pfk) +{ + struct sadb_msg *hdr; + int rc; + + rc = pfk->dump.dump(pfk); + if (rc == -ENOBUFS) + return 0; + + if (pfk->dump.skb) { + if (!pfkey_can_dump(&pfk->sk)) + return 0; + + hdr = (struct sadb_msg *) pfk->dump.skb->data; + hdr->sadb_msg_seq = 0; + hdr->sadb_msg_errno = rc; + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = NULL; + } + + pfkey_terminate_dump(pfk); + return rc; +} + static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig) { *new = *orig; @@ -372,6 +398,7 @@ static u8 sadb_ext_min_len[] = { [SADB_X_EXT_NAT_T_DPORT] = (u8) sizeof(struct sadb_x_nat_t_port), [SADB_X_EXT_NAT_T_OA] = (u8) sizeof(struct sadb_address), [SADB_X_EXT_SEC_CTX] = (u8) sizeof(struct sadb_x_sec_ctx), + [SADB_X_EXT_KMADDRESS] = (u8) sizeof(struct sadb_x_kmaddress), }; /* Verify sadb_address_{len,prefixlen} against sa_family. */ @@ -1736,9 +1763,14 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -2043,7 +2075,6 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, in req_size += socklen * 2; } else { size -= 2*socklen; - socklen = 0; } rq = (void*)skb_put(skb, req_size); pol->sadb_x_policy_len += req_size/8; @@ -2237,7 +2268,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return 0; out: - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return err; } @@ -2309,6 +2340,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; + c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2353,24 +2385,21 @@ static int pfkey_sockaddr_pair_size(sa_family_t family) return PFKEY_ALIGN8(pfkey_sockaddr_len(family) * 2); } -static int parse_sockaddr_pair(struct sadb_x_ipsecrequest *rq, +static int parse_sockaddr_pair(struct sockaddr *sa, int ext_len, xfrm_address_t *saddr, xfrm_address_t *daddr, u16 *family) { - u8 *sa = (u8 *) (rq + 1); int af, socklen; - if (rq->sadb_x_ipsecrequest_len < - pfkey_sockaddr_pair_size(((struct sockaddr *)sa)->sa_family)) + if (ext_len < pfkey_sockaddr_pair_size(sa->sa_family)) return -EINVAL; - af = pfkey_sockaddr_extract((struct sockaddr *) sa, - saddr); + af = pfkey_sockaddr_extract(sa, saddr); if (!af) return -EINVAL; socklen = pfkey_sockaddr_len(af); - if (pfkey_sockaddr_extract((struct sockaddr *) (sa + socklen), + if (pfkey_sockaddr_extract((struct sockaddr *) (((u8 *)sa) + socklen), daddr) != af) return -EINVAL; @@ -2390,7 +2419,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* old endoints */ - err = parse_sockaddr_pair(rq1, &m->old_saddr, &m->old_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq1 + 1), + rq1->sadb_x_ipsecrequest_len, + &m->old_saddr, &m->old_daddr, &m->old_family); if (err) return err; @@ -2403,7 +2434,9 @@ static int ipsecrequests_to_migrate(struct sadb_x_ipsecrequest *rq1, int len, return -EINVAL; /* new endpoints */ - err = parse_sockaddr_pair(rq2, &m->new_saddr, &m->new_daddr, + err = parse_sockaddr_pair((struct sockaddr *)(rq2 + 1), + rq2->sadb_x_ipsecrequest_len, + &m->new_saddr, &m->new_daddr, &m->new_family); if (err) return err; @@ -2429,29 +2462,40 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, int i, len, ret, err = -EINVAL; u8 dir; struct sadb_address *sa; + struct sadb_x_kmaddress *kma; struct sadb_x_policy *pol; struct sadb_x_ipsecrequest *rq; struct xfrm_selector sel; struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress k; if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC - 1], - ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || + ext_hdrs[SADB_EXT_ADDRESS_DST - 1]) || !ext_hdrs[SADB_X_EXT_POLICY - 1]) { err = -EINVAL; goto out; } + kma = ext_hdrs[SADB_X_EXT_KMADDRESS - 1]; pol = ext_hdrs[SADB_X_EXT_POLICY - 1]; - if (!pol) { - err = -EINVAL; - goto out; - } if (pol->sadb_x_policy_dir >= IPSEC_DIR_MAX) { err = -EINVAL; goto out; } + if (kma) { + /* convert sadb_x_kmaddress to xfrm_kmaddress */ + k.reserved = kma->sadb_x_kmaddress_reserved; + ret = parse_sockaddr_pair((struct sockaddr *)(kma + 1), + 8*(kma->sadb_x_kmaddress_len) - sizeof(*kma), + &k.local, &k.remote, &k.family); + if (ret < 0) { + err = ret; + goto out; + } + } + dir = pol->sadb_x_policy_dir - 1; memset(&sel, 0, sizeof(sel)); @@ -2496,7 +2540,8 @@ static int pfkey_migrate(struct sock *sk, struct sk_buff *skb, goto out; } - return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i); + return xfrm_migrate(&sel, dir, XFRM_POLICY_TYPE_MAIN, m, i, + kma ? &k : NULL); out: return err; @@ -2575,9 +2620,14 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_type = SADB_X_SPDDUMP; out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; - out_hdr->sadb_msg_seq = count; + out_hdr->sadb_msg_seq = count + 1; out_hdr->sadb_msg_pid = pfk->dump.msg_pid; - pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk); + + if (pfk->dump.skb) + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, + &pfk->sk); + pfk->dump.skb = out_skb; + return 0; } @@ -3138,6 +3188,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, return xp; out: + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -3283,6 +3334,32 @@ static int set_sadb_address(struct sk_buff *skb, int sasize, int type, return 0; } + +static int set_sadb_kmaddress(struct sk_buff *skb, struct xfrm_kmaddress *k) +{ + struct sadb_x_kmaddress *kma; + u8 *sa; + int family = k->family; + int socklen = pfkey_sockaddr_len(family); + int size_req; + + size_req = (sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(family)); + + kma = (struct sadb_x_kmaddress *)skb_put(skb, size_req); + memset(kma, 0, size_req); + kma->sadb_x_kmaddress_len = size_req / 8; + kma->sadb_x_kmaddress_exttype = SADB_X_EXT_KMADDRESS; + kma->sadb_x_kmaddress_reserved = k->reserved; + + sa = (u8 *)(kma + 1); + if (!pfkey_sockaddr_fill(&k->local, 0, (struct sockaddr *)sa, family) || + !pfkey_sockaddr_fill(&k->remote, 0, (struct sockaddr *)(sa+socklen), family)) + return -EINVAL; + + return 0; +} + static int set_ipsecrequest(struct sk_buff *skb, uint8_t proto, uint8_t mode, int level, uint32_t reqid, uint8_t family, @@ -3315,7 +3392,8 @@ static int set_ipsecrequest(struct sk_buff *skb, #ifdef CONFIG_NET_KEY_MIGRATE static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { int i; int sasize_sel; @@ -3332,6 +3410,12 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, if (num_bundles <= 0 || num_bundles > XFRM_MAX_DEPTH) return -EINVAL; + if (k != NULL) { + /* addresses for KM */ + size += PFKEY_ALIGN8(sizeof(struct sadb_x_kmaddress) + + pfkey_sockaddr_pair_size(k->family)); + } + /* selector */ sasize_sel = pfkey_sockaddr_size(sel->family); if (!sasize_sel) @@ -3368,6 +3452,10 @@ static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, hdr->sadb_msg_seq = 0; hdr->sadb_msg_pid = 0; + /* Addresses to be used by KM for negotiation, if ext is available */ + if (k != NULL && (set_sadb_kmaddress(skb, k) < 0)) + return -EINVAL; + /* selector src */ set_sadb_address(skb, sasize_sel, SADB_EXT_ADDRESS_SRC, sel); @@ -3413,7 +3501,8 @@ err: } #else static int pfkey_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_bundles) + struct xfrm_migrate *m, int num_bundles, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig index 80d693392b0f..7f710a27e91c 100644 --- a/net/mac80211/Kconfig +++ b/net/mac80211/Kconfig @@ -22,6 +22,11 @@ config MAC80211_RC_PID mac80211 that uses a PID controller to select the TX rate. +config MAC80211_RC_MINSTREL + bool "Minstrel" + ---help--- + This option enables the 'minstrel' TX rate control algorithm + choice prompt "Default rate control algorithm" default MAC80211_RC_DEFAULT_PID @@ -39,11 +44,19 @@ config MAC80211_RC_DEFAULT_PID default rate control algorithm. You should choose this unless you know what you are doing. +config MAC80211_RC_DEFAULT_MINSTREL + bool "Minstrel" + depends on MAC80211_RC_MINSTREL + ---help--- + Select Minstrel as the default rate control algorithm. + + endchoice config MAC80211_RC_DEFAULT string default "pid" if MAC80211_RC_DEFAULT_PID + default "minstrel" if MAC80211_RC_DEFAULT_MINSTREL default "" endmenu @@ -179,19 +192,6 @@ config MAC80211_VERBOSE_MPL_DEBUG Do not select this option. -config MAC80211_LOWTX_FRAME_DUMP - bool "Debug frame dumping" - depends on MAC80211_DEBUG_MENU - ---help--- - Selecting this option will cause the stack to - print a message for each frame that is handed - to the lowlevel driver for transmission. This - message includes all MAC addresses and the - frame control field. - - If unsure, say N and insert the debugging code - you require into the driver you are debugging. - config MAC80211_DEBUG_COUNTERS bool "Extra statistics for TX/RX debugging" depends on MAC80211_DEBUG_MENU diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index a169b0201d61..31cfd1f89a72 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -7,6 +7,8 @@ mac80211-y := \ sta_info.o \ wep.o \ wpa.o \ + scan.o \ + ht.o \ mlme.o \ iface.o \ rate.o \ @@ -15,6 +17,7 @@ mac80211-y := \ aes_ccm.o \ cfg.o \ rx.o \ + spectmgmt.o \ tx.o \ key.o \ util.o \ @@ -38,4 +41,8 @@ mac80211-$(CONFIG_MAC80211_MESH) += \ rc80211_pid-y := rc80211_pid_algo.o rc80211_pid-$(CONFIG_MAC80211_DEBUGFS) += rc80211_pid_debugfs.o +rc80211_minstrel-y := rc80211_minstrel.o +rc80211_minstrel-$(CONFIG_MAC80211_DEBUGFS) += rc80211_minstrel_debugfs.o + mac80211-$(CONFIG_MAC80211_RC_PID) += $(rc80211_pid-y) +mac80211-$(CONFIG_MAC80211_RC_MINSTREL) += $(rc80211_minstrel-y) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 297c257864c7..855126a3039d 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -17,26 +17,26 @@ #include "rate.h" #include "mesh.h" -static enum ieee80211_if_types -nl80211_type_to_mac80211_type(enum nl80211_iftype type) +struct ieee80211_hw *wiphy_to_hw(struct wiphy *wiphy) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + return &local->hw; +} +EXPORT_SYMBOL(wiphy_to_hw); + +static bool nl80211_type_check(enum nl80211_iftype type) { switch (type) { - case NL80211_IFTYPE_UNSPECIFIED: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_ADHOC: - return IEEE80211_IF_TYPE_IBSS; case NL80211_IFTYPE_STATION: - return IEEE80211_IF_TYPE_STA; case NL80211_IFTYPE_MONITOR: - return IEEE80211_IF_TYPE_MNTR; #ifdef CONFIG_MAC80211_MESH case NL80211_IFTYPE_MESH_POINT: - return IEEE80211_IF_TYPE_MESH_POINT; #endif case NL80211_IFTYPE_WDS: - return IEEE80211_IF_TYPE_WDS; + return true; default: - return IEEE80211_IF_TYPE_INVALID; + return false; } } @@ -45,17 +45,15 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, struct vif_params *params) { struct ieee80211_local *local = wiphy_priv(wiphy); - enum ieee80211_if_types itype; struct net_device *dev; struct ieee80211_sub_if_data *sdata; int err; - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; - err = ieee80211_if_add(local, name, &dev, itype, params); - if (err || itype != IEEE80211_IF_TYPE_MNTR || !flags) + err = ieee80211_if_add(local, name, &dev, type, params); + if (err || type != NL80211_IFTYPE_MONITOR || !flags) return err; sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -66,13 +64,16 @@ static int ieee80211_add_iface(struct wiphy *wiphy, char *name, static int ieee80211_del_iface(struct wiphy *wiphy, int ifindex) { struct net_device *dev; + struct ieee80211_sub_if_data *sdata; /* we're under RTNL */ dev = __dev_get_by_index(&init_net, ifindex); if (!dev) return -ENODEV; - ieee80211_if_remove(dev); + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_if_remove(sdata); return 0; } @@ -81,9 +82,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, enum nl80211_iftype type, u32 *flags, struct vif_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct net_device *dev; - enum ieee80211_if_types itype; struct ieee80211_sub_if_data *sdata; int ret; @@ -92,25 +91,24 @@ static int ieee80211_change_iface(struct wiphy *wiphy, int ifindex, if (!dev) return -ENODEV; - itype = nl80211_type_to_mac80211_type(type); - if (itype == IEEE80211_IF_TYPE_INVALID) + if (!nl80211_type_check(type)) return -EINVAL; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ret = ieee80211_if_change_type(sdata, itype); + ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; + if (netif_running(sdata->dev)) + return -EBUSY; + if (ieee80211_vif_is_mesh(&sdata->vif) && params->mesh_id_len) - ieee80211_if_sta_set_mesh_id(&sdata->u.sta, - params->mesh_id_len, - params->mesh_id); + ieee80211_sdata_set_mesh_id(sdata, + params->mesh_id_len, + params->mesh_id); - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || !flags) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !flags) return 0; sdata->u.mntr_flags = *flags; @@ -121,16 +119,12 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr, struct key_params *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (params->cipher) { @@ -175,14 +169,10 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx, u8 *mac_addr) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta; int ret; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -223,7 +213,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, void (*callback)(void *cookie, struct key_params *params)) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; u8 seq[6] = {0}; @@ -233,9 +222,6 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, u16 iv16; int err = -ENOENT; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -311,12 +297,8 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, struct net_device *dev, u8 key_idx) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; - if (dev == local->mdev) - return -EOPNOTSUPP; - rcu_read_lock(); sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -365,7 +347,7 @@ static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, sta = sta_info_get_by_idx(local, idx, dev); if (sta) { ret = 0; - memcpy(mac, sta->addr, ETH_ALEN); + memcpy(mac, sta->sta.addr, ETH_ALEN); sta_set_sinfo(sta, sinfo); } @@ -497,16 +479,12 @@ static int ieee80211_config_beacon(struct ieee80211_sub_if_data *sdata, static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -520,16 +498,12 @@ static int ieee80211_add_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, struct beacon_parameters *params) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -542,16 +516,12 @@ static int ieee80211_set_beacon(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_beacon(struct wiphy *wiphy, struct net_device *dev) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct beacon_data *old; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; old = sdata->u.ap.beacon; @@ -594,7 +564,7 @@ static void ieee80211_send_layer2_update(struct sta_info *sta) * Update response frame; IEEE Std 802.2-1998, 5.4.1.2.1 */ memset(msg->da, 0xff, ETH_ALEN); - memcpy(msg->sa, sta->addr, ETH_ALEN); + memcpy(msg->sa, sta->sta.addr, ETH_ALEN); msg->len = htons(6); msg->dsap = 0; msg->ssap = 0x01; /* NULL LSAP, CR Bit: Response */ @@ -649,9 +619,9 @@ static void sta_apply_parameters(struct ieee80211_local *local, */ if (params->aid) { - sta->aid = params->aid; - if (sta->aid > IEEE80211_MAX_AID) - sta->aid = 0; /* XXX: should this be an error? */ + sta->sta.aid = params->aid; + if (sta->sta.aid > IEEE80211_MAX_AID) + sta->sta.aid = 0; /* XXX: should this be an error? */ } if (params->listen_interval >= 0) @@ -668,7 +638,12 @@ static void sta_apply_parameters(struct ieee80211_local *local, rates |= BIT(j); } } - sta->supp_rates[local->oper_channel->band] = rates; + sta->sta.supp_rates[local->oper_channel->band] = rates; + } + + if (params->ht_capa) { + ieee80211_ht_cap_ie_to_ht_info(params->ht_capa, + &sta->sta.ht_info); } if (ieee80211_vif_is_mesh(&sdata->vif) && params->plink_action) { @@ -691,9 +666,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; int err; - if (dev == local->mdev || params->vlan == local->mdev) - return -EOPNOTSUPP; - /* Prevent a race with changing the rate control algorithm */ if (!netif_running(dev)) return -ENETDOWN; @@ -701,8 +673,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->vlan) { sdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (sdata->vif.type != IEEE80211_IF_TYPE_VLAN && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -721,7 +693,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sta_apply_parameters(local, sta, params); - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); rcu_read_lock(); @@ -732,8 +704,8 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, return err; } - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN || - sdata->vif.type == IEEE80211_IF_TYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_AP) ieee80211_send_layer2_update(sta); rcu_read_unlock(); @@ -748,9 +720,6 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata; struct sta_info *sta; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (mac) { @@ -782,9 +751,6 @@ static int ieee80211_change_station(struct wiphy *wiphy, struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; - if (dev == local->mdev || params->vlan == local->mdev) - return -EOPNOTSUPP; - rcu_read_lock(); /* XXX: get sta belonging to dev */ @@ -797,8 +763,8 @@ static int ieee80211_change_station(struct wiphy *wiphy, if (params->vlan && params->vlan != sta->sdata->dev) { vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); - if (vlansdata->vif.type != IEEE80211_IF_TYPE_VLAN && - vlansdata->vif.type != IEEE80211_IF_TYPE_AP) { + if (vlansdata->vif.type != NL80211_IFTYPE_AP_VLAN && + vlansdata->vif.type != NL80211_IFTYPE_AP) { rcu_read_unlock(); return -EINVAL; } @@ -824,15 +790,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta; int err; - if (dev == local->mdev) - return -EOPNOTSUPP; - if (!netif_running(dev)) return -ENETDOWN; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -842,13 +805,13 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } - err = mesh_path_add(dst, dev); + err = mesh_path_add(dst, sdata); if (err) { rcu_read_unlock(); return err; } - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENXIO; @@ -862,10 +825,12 @@ static int ieee80211_add_mpath(struct wiphy *wiphy, struct net_device *dev, static int ieee80211_del_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst) { + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + if (dst) - return mesh_path_del(dst, dev); + return mesh_path_del(dst, sdata); - mesh_path_flush(dev); + mesh_path_flush(sdata); return 0; } @@ -878,15 +843,12 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, struct mesh_path *mpath; struct sta_info *sta; - if (dev == local->mdev) - return -EOPNOTSUPP; - if (!netif_running(dev)) return -ENETDOWN; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); @@ -897,7 +859,7 @@ static int ieee80211_change_mpath(struct wiphy *wiphy, return -ENOENT; } - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -913,7 +875,7 @@ static void mpath_set_pinfo(struct mesh_path *mpath, u8 *next_hop, struct mpath_info *pinfo) { if (mpath->next_hop) - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); else memset(next_hop, 0, ETH_ALEN); @@ -952,20 +914,16 @@ static int ieee80211_get_mpath(struct wiphy *wiphy, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); - mpath = mesh_path_lookup(dst, dev); + mpath = mesh_path_lookup(dst, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -980,20 +938,16 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, int idx, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) { - struct ieee80211_local *local = wiphy_priv(wiphy); struct ieee80211_sub_if_data *sdata; struct mesh_path *mpath; - if (dev == local->mdev) - return -EOPNOTSUPP; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_MESH_POINT) return -ENOTSUPP; rcu_read_lock(); - mpath = mesh_path_lookup_by_idx(idx, dev); + mpath = mesh_path_lookup_by_idx(idx, sdata); if (!mpath) { rcu_read_unlock(); return -ENOENT; @@ -1005,6 +959,38 @@ static int ieee80211_dump_mpath(struct wiphy *wiphy, struct net_device *dev, } #endif +static int ieee80211_change_bss(struct wiphy *wiphy, + struct net_device *dev, + struct bss_parameters *params) +{ + struct ieee80211_sub_if_data *sdata; + u32 changed = 0; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + if (sdata->vif.type != NL80211_IFTYPE_AP) + return -EINVAL; + + if (params->use_cts_prot >= 0) { + sdata->bss_conf.use_cts_prot = params->use_cts_prot; + changed |= BSS_CHANGED_ERP_CTS_PROT; + } + if (params->use_short_preamble >= 0) { + sdata->bss_conf.use_short_preamble = + params->use_short_preamble; + changed |= BSS_CHANGED_ERP_PREAMBLE; + } + if (params->use_short_slot_time >= 0) { + sdata->bss_conf.use_short_slot = + params->use_short_slot_time; + changed |= BSS_CHANGED_ERP_SLOT; + } + + ieee80211_bss_info_change_notify(sdata, changed); + + return 0; +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1028,4 +1014,5 @@ struct cfg80211_ops mac80211_config_ops = { .get_mpath = ieee80211_get_mpath, .dump_mpath = ieee80211_dump_mpath, #endif + .change_bss = ieee80211_change_bss, }; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index ee509f1109e2..24ce54463310 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -51,8 +51,6 @@ DEBUGFS_READONLY_FILE(antenna_sel_tx, 20, "%d", local->hw.conf.antenna_sel_tx); DEBUGFS_READONLY_FILE(antenna_sel_rx, 20, "%d", local->hw.conf.antenna_sel_rx); -DEBUGFS_READONLY_FILE(bridge_packets, 20, "%d", - local->bridge_packets); DEBUGFS_READONLY_FILE(rts_threshold, 20, "%d", local->rts_threshold); DEBUGFS_READONLY_FILE(fragmentation_threshold, 20, "%d", @@ -206,7 +204,6 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(frequency); DEBUGFS_ADD(antenna_sel_tx); DEBUGFS_ADD(antenna_sel_rx); - DEBUGFS_ADD(bridge_packets); DEBUGFS_ADD(rts_threshold); DEBUGFS_ADD(fragmentation_threshold); DEBUGFS_ADD(short_retry_limit); @@ -263,7 +260,6 @@ void debugfs_hw_del(struct ieee80211_local *local) DEBUGFS_DEL(frequency); DEBUGFS_DEL(antenna_sel_tx); DEBUGFS_DEL(antenna_sel_rx); - DEBUGFS_DEL(bridge_packets); DEBUGFS_DEL(rts_threshold); DEBUGFS_DEL(fragmentation_threshold); DEBUGFS_DEL(short_retry_limit); diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index cf82acec913a..a3294d109322 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -206,7 +206,8 @@ void ieee80211_debugfs_key_add(struct ieee80211_key *key) rcu_read_lock(); sta = rcu_dereference(key->sta); if (sta) - sprintf(buf, "../../stations/%s", print_mac(mac, sta->addr)); + sprintf(buf, "../../stations/%s", + print_mac(mac, sta->sta.addr)); rcu_read_unlock(); /* using sta as a boolean is fine outside RCU lock */ diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 8165df578c92..2ad504fc3414 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -173,7 +173,6 @@ IEEE80211_IF_FILE(assoc_tries, u.sta.assoc_tries, DEC); IEEE80211_IF_FILE(auth_algs, u.sta.auth_algs, HEX); IEEE80211_IF_FILE(auth_alg, u.sta.auth_alg, DEC); IEEE80211_IF_FILE(auth_transaction, u.sta.auth_transaction, DEC); -IEEE80211_IF_FILE(num_beacons_sta, u.sta.num_beacons, DEC); static ssize_t ieee80211_if_fmt_flags( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -192,7 +191,6 @@ __IEEE80211_IF_FILE(flags); /* AP attributes */ IEEE80211_IF_FILE(num_sta_ps, u.ap.num_sta_ps, ATOMIC); IEEE80211_IF_FILE(dtim_count, u.ap.dtim_count, DEC); -IEEE80211_IF_FILE(num_beacons, u.ap.num_beacons, DEC); static ssize_t ieee80211_if_fmt_num_buffered_multicast( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -207,37 +205,37 @@ IEEE80211_IF_FILE(peer, u.wds.remote_addr, MAC); #ifdef CONFIG_MAC80211_MESH /* Mesh stats attributes */ -IEEE80211_IF_FILE(fwded_frames, u.sta.mshstats.fwded_frames, DEC); -IEEE80211_IF_FILE(dropped_frames_ttl, u.sta.mshstats.dropped_frames_ttl, DEC); +IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC); +IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC); IEEE80211_IF_FILE(dropped_frames_no_route, - u.sta.mshstats.dropped_frames_no_route, DEC); -IEEE80211_IF_FILE(estab_plinks, u.sta.mshstats.estab_plinks, ATOMIC); + u.mesh.mshstats.dropped_frames_no_route, DEC); +IEEE80211_IF_FILE(estab_plinks, u.mesh.mshstats.estab_plinks, ATOMIC); /* Mesh parameters */ IEEE80211_IF_WFILE(dot11MeshMaxRetries, - u.sta.mshcfg.dot11MeshMaxRetries, DEC, u8); + u.mesh.mshcfg.dot11MeshMaxRetries, DEC, u8); IEEE80211_IF_WFILE(dot11MeshRetryTimeout, - u.sta.mshcfg.dot11MeshRetryTimeout, DEC, u16); + u.mesh.mshcfg.dot11MeshRetryTimeout, DEC, u16); IEEE80211_IF_WFILE(dot11MeshConfirmTimeout, - u.sta.mshcfg.dot11MeshConfirmTimeout, DEC, u16); + u.mesh.mshcfg.dot11MeshConfirmTimeout, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHoldingTimeout, - u.sta.mshcfg.dot11MeshHoldingTimeout, DEC, u16); -IEEE80211_IF_WFILE(dot11MeshTTL, u.sta.mshcfg.dot11MeshTTL, DEC, u8); -IEEE80211_IF_WFILE(auto_open_plinks, u.sta.mshcfg.auto_open_plinks, DEC, u8); + u.mesh.mshcfg.dot11MeshHoldingTimeout, DEC, u16); +IEEE80211_IF_WFILE(dot11MeshTTL, u.mesh.mshcfg.dot11MeshTTL, DEC, u8); +IEEE80211_IF_WFILE(auto_open_plinks, u.mesh.mshcfg.auto_open_plinks, DEC, u8); IEEE80211_IF_WFILE(dot11MeshMaxPeerLinks, - u.sta.mshcfg.dot11MeshMaxPeerLinks, DEC, u16); + u.mesh.mshcfg.dot11MeshMaxPeerLinks, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPactivePathTimeout, - u.sta.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32); + u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC, u32); IEEE80211_IF_WFILE(dot11MeshHWMPpreqMinInterval, - u.sta.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16); + u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPnetDiameterTraversalTime, - u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16); + u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC, u16); IEEE80211_IF_WFILE(dot11MeshHWMPmaxPREQretries, - u.sta.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8); + u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries, DEC, u8); IEEE80211_IF_WFILE(path_refresh_time, - u.sta.mshcfg.path_refresh_time, DEC, u32); + u.mesh.mshcfg.path_refresh_time, DEC, u32); IEEE80211_IF_WFILE(min_discovery_timeout, - u.sta.mshcfg.min_discovery_timeout, DEC, u16); + u.mesh.mshcfg.min_discovery_timeout, DEC, u16); #endif @@ -265,7 +263,6 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(auth_alg, sta); DEBUGFS_ADD(auth_transaction, sta); DEBUGFS_ADD(flags, sta); - DEBUGFS_ADD(num_beacons_sta, sta); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) @@ -276,7 +273,6 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(num_sta_ps, ap); DEBUGFS_ADD(dtim_count, ap); - DEBUGFS_ADD(num_beacons, ap); DEBUGFS_ADD(num_buffered_multicast, ap); } @@ -345,26 +341,26 @@ static void add_files(struct ieee80211_sub_if_data *sdata) return; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH add_mesh_stats(sdata); add_mesh_config(sdata); #endif - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: add_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: add_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: add_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: add_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: add_vlan_files(sdata); break; default: @@ -398,7 +394,6 @@ static void del_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_DEL(auth_alg, sta); DEBUGFS_DEL(auth_transaction, sta); DEBUGFS_DEL(flags, sta); - DEBUGFS_DEL(num_beacons_sta, sta); } static void del_ap_files(struct ieee80211_sub_if_data *sdata) @@ -409,7 +404,6 @@ static void del_ap_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_DEL(num_sta_ps, ap); DEBUGFS_DEL(dtim_count, ap); - DEBUGFS_DEL(num_beacons, ap); DEBUGFS_DEL(num_buffered_multicast, ap); } @@ -482,26 +476,26 @@ static void del_files(struct ieee80211_sub_if_data *sdata) return; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: #ifdef CONFIG_MAC80211_MESH del_mesh_stats(sdata); del_mesh_config(sdata); #endif - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: del_sta_files(sdata); break; - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: del_ap_files(sdata); break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: del_wds_files(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: del_monitor_files(sdata); break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: del_vlan_files(sdata); break; default: @@ -551,8 +545,12 @@ static int netdev_notify(struct notifier_block *nb, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sprintf(buf, "netdev:%s", dev->name); dir = sdata->debugfsdir; + + if (!dir) + return 0; + + sprintf(buf, "netdev:%s", dev->name); if (!debugfs_rename(dir->d_parent, dir, dir->d_parent, buf)) printk(KERN_ERR "mac80211: debugfs: failed to rename debugfs " "dir to %s\n", buf); diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index 79a062782d52..b85c4f27b361 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -50,7 +50,7 @@ static const struct file_operations sta_ ##name## _ops = { \ STA_READ_##format(name, field) \ STA_OPS(name) -STA_FILE(aid, aid, D); +STA_FILE(aid, sta.aid, D); STA_FILE(dev, sdata->dev->name, S); STA_FILE(rx_packets, rx_packets, LU); STA_FILE(tx_packets, tx_packets, LU); @@ -173,10 +173,9 @@ static ssize_t sta_agg_status_write(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { struct sta_info *sta = file->private_data; - struct net_device *dev = sta->sdata->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sta->sdata->local; struct ieee80211_hw *hw = &local->hw; - u8 *da = sta->addr; + u8 *da = sta->sta.addr; static int tid_static_tx[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; static int tid_static_rx[16] = {1, 1, 1, 1, 1, 1, 1, 1, @@ -200,8 +199,8 @@ static ssize_t sta_agg_status_write(struct file *file, /* toggle Rx aggregation command */ tid_num = tid_num - 100; if (tid_static_rx[tid_num] == 1) { - strcpy(state, "off "); - ieee80211_sta_stop_rx_ba_session(dev, da, tid_num, 0, + strcpy(state, "off"); + ieee80211_sta_stop_rx_ba_session(sta->sdata, da, tid_num, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); sta->ampdu_mlme.tid_state_rx[tid_num] |= HT_AGG_STATE_DEBUGFS_CTL; @@ -250,11 +249,22 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DECLARE_MAC_BUF(mbuf); u8 *mac; + sta->debugfs.add_has_run = true; + if (!stations_dir) return; - mac = print_mac(mbuf, sta->addr); - + mac = print_mac(mbuf, sta->sta.addr); + + /* + * This might fail due to a race condition: + * When mac80211 unlinks a station, the debugfs entries + * remain, but it is already possible to link a new + * station with the same address which triggers adding + * it to debugfs; therefore, if the old station isn't + * destroyed quickly enough the old station's debugfs + * dir might still be around. + */ sta->debugfs.dir = debugfs_create_dir(mac, stations_dir); if (!sta->debugfs.dir) return; diff --git a/net/mac80211/event.c b/net/mac80211/event.c index 2280f40b4560..8de60de70bc9 100644 --- a/net/mac80211/event.c +++ b/net/mac80211/event.c @@ -8,7 +8,6 @@ * mac80211 - events */ -#include <linux/netdevice.h> #include <net/iw_handler.h> #include "ieee80211_i.h" @@ -17,7 +16,7 @@ * (in the variable hdr) must be long enough to extract the TKIP * fields like TSC */ -void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, +void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, struct ieee80211_hdr *hdr) { union iwreq_data wrqu; @@ -32,7 +31,7 @@ void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, print_mac(mac, hdr->addr2)); memset(&wrqu, 0, sizeof(wrqu)); wrqu.data.length = strlen(buf); - wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); + wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf); kfree(buf); } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c new file mode 100644 index 000000000000..dc7d9a3d70d5 --- /dev/null +++ b/net/mac80211/ht.c @@ -0,0 +1,992 @@ +/* + * HT handling + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * Copyright 2007-2008, Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ieee80211.h> +#include <net/wireless.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "sta_info.h" +#include "wme.h" + +int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_ht_info *ht_info) +{ + + if (ht_info == NULL) + return -EINVAL; + + memset(ht_info, 0, sizeof(*ht_info)); + + if (ht_cap_ie) { + u8 ampdu_info = ht_cap_ie->ampdu_params_info; + + ht_info->ht_supported = 1; + ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); + ht_info->ampdu_factor = + ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; + ht_info->ampdu_density = + (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; + memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); + } else + ht_info->ht_supported = 0; + + return 0; +} + +int ieee80211_ht_addt_info_ie_to_ht_bss_info( + struct ieee80211_ht_addt_info *ht_add_info_ie, + struct ieee80211_ht_bss_info *bss_info) +{ + if (bss_info == NULL) + return -EINVAL; + + memset(bss_info, 0, sizeof(*bss_info)); + + if (ht_add_info_ie) { + u16 op_mode; + op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); + + bss_info->primary_channel = ht_add_info_ie->control_chan; + bss_info->bss_cap = ht_add_info_ie->ht_param; + bss_info->bss_op_mode = (u8)(op_mode & 0xff); + } + + return 0; +} + +static void ieee80211_send_addba_request(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u8 dialog_token, u16 start_seq_num, + u16 agg_size, u16 timeout) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer " + "for addba request frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); + + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + + mgmt->u.action.u.addba_req.dialog_token = dialog_token; + capab = (u16)(1 << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ + + mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + + mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_req.start_seq_num = + cpu_to_le16(start_seq_num << 4); + + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *da, u16 tid, + u8 dialog_token, u16 status, u16 policy, + u16 buf_size, u16 timeout) +{ + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 capab; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer " + "for addba resp frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; + mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + + capab = (u16)(policy << 1); /* bit 1 aggregation policy */ + capab |= (u16)(tid << 2); /* bit 5:2 TID number */ + capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ + + mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); + mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); + mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + + ieee80211_tx_skb(sdata, skb, 0); +} + +static void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, + const u8 *da, u16 tid, + u16 initiator, u16 reason_code) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u16 params; + + skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer " + "for delba frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + memcpy(mgmt->da, da, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (sdata->vif.type == NL80211_IFTYPE_AP) + memcpy(mgmt->bssid, sdata->dev->dev_addr, ETH_ALEN); + else + memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); + + mgmt->u.action.category = WLAN_CATEGORY_BACK; + mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; + params = (u16)(initiator << 11); /* bit 11 initiator */ + params |= (u16)(tid << 12); /* bit 15:12 TID number */ + + mgmt->u.action.u.delba.params = cpu_to_le16(params); + mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_bar *bar; + u16 bar_control = 0; + + skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "bar frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); + memset(bar, 0, sizeof(*bar)); + bar->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL | + IEEE80211_STYPE_BACK_REQ); + memcpy(bar->ra, ra, ETH_ALEN); + memcpy(bar->ta, sdata->dev->dev_addr, ETH_ALEN); + bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; + bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; + bar_control |= (u16)(tid << 12); + bar->control = cpu_to_le16(bar_control); + bar->start_seq_num = cpu_to_le16(ssn); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, + u16 initiator, u16 reason) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + int ret, i; + DECLARE_MAC_BUF(mac); + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return; + } + + /* check if TID is in operational state */ + spin_lock_bh(&sta->lock); + if (sta->ampdu_mlme.tid_state_rx[tid] + != HT_AGG_STATE_OPERATIONAL) { + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + sta->ampdu_mlme.tid_state_rx[tid] = + HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + spin_unlock_bh(&sta->lock); + + /* stop HW Rx aggregation. ampdu_action existence + * already verified in session init so we add the BUG_ON */ + BUG_ON(!local->ops->ampdu_action); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, + &sta->sta, tid, NULL); + if (ret) + printk(KERN_DEBUG "HW problem - can not stop rx " + "aggregation for tid %d\n", tid); + + /* shutdown timer has not expired */ + if (initiator != WLAN_BACK_TIMER) + del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + /* check if this is a self generated aggregation halt */ + if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) + ieee80211_send_delba(sdata, ra, tid, 0, reason); + + /* free the reordering buffer */ + for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { + if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { + /* release the reordered frames */ + dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); + sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; + sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; + } + } + /* free resources */ + kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); + kfree(sta->ampdu_mlme.tid_rx[tid]); + sta->ampdu_mlme.tid_rx[tid] = NULL; + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; + + rcu_read_unlock(); +} + + +/* + * After sending add Block Ack request we activated a timer until + * add Block Ack response will arrive from the recipient. + * If this timer expires sta_addba_resp_timer_expired will be executed. + */ +static void sta_addba_resp_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and both sta_info and TID are needed, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u16 tid = *(u8 *)data; + struct sta_info *temp_sta = container_of((void *)data, + struct sta_info, timer_to_tid[tid]); + + struct ieee80211_local *local = temp_sta->local; + struct ieee80211_hw *hw = &local->hw; + struct sta_info *sta; + u8 *state; + + rcu_read_lock(); + + sta = sta_info_get(local, temp_sta->sta.addr); + if (!sta) { + rcu_read_unlock(); + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID waits for addBA response */ + spin_lock_bh(&sta->lock); + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "timer expired on tid %d but we are not " + "expecting addBA response there", tid); +#endif + goto timer_expired_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); +#endif + + /* go through the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, temp_sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + +timer_expired_exit: + rcu_read_unlock(); +} + +void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr) +{ + struct ieee80211_local *local = sdata->local; + int i; + + for (i = 0; i < STA_TID_NUM; i++) { + ieee80211_stop_tx_ba_session(&local->hw, addr, i, + WLAN_BACK_INITIATOR); + ieee80211_sta_stop_rx_ba_session(sdata, addr, i, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS); + } +} + +int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + struct ieee80211_sub_if_data *sdata; + u16 start_seq_num; + u8 *state; + int ret; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) + return -EINVAL; + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Open BA session requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find the station\n"); +#endif + ret = -ENOENT; + goto exit; + } + + spin_lock_bh(&sta->lock); + + /* we have tried too many times, receiver does not want A-MPDU */ + if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { + ret = -EBUSY; + goto err_unlock_sta; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + /* check if the TID is not in aggregation flow already */ + if (*state != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - session is not " + "idle on tid %u\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + ret = -EAGAIN; + goto err_unlock_sta; + } + + /* prepare A-MPDU MLME for Tx aggregation */ + sta->ampdu_mlme.tid_tx[tid] = + kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_tx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate tx mlme to tid %d failed\n", + tid); +#endif + ret = -ENOMEM; + goto err_unlock_sta; + } + /* Tx timer */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = + sta_addba_resp_timer_expired; + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); + + /* create a new queue for this aggregation */ + ret = ieee80211_ht_agg_queue_add(local, sta, tid); + + /* case no queue is available to aggregation + * don't switch to aggregation */ + if (ret) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - queue unavailable for" + " tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto err_unlock_queue; + } + sdata = sta->sdata; + + /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the + * call back right away, it must see that the flow has begun */ + *state |= HT_ADDBA_REQUESTED_MSK; + + /* This is slightly racy because the queue isn't stopped */ + start_seq_num = sta->tid_seq[tid]; + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, + &sta->sta, tid, &start_seq_num); + + if (ret) { + /* No need to requeue the packets in the agg queue, since we + * held the tx lock: no packet could be enqueued to the newly + * allocated queue */ + ieee80211_ht_agg_queue_remove(local, sta, tid, 0); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "BA request denied - HW unavailable for" + " tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + *state = HT_AGG_STATE_IDLE; + goto err_unlock_queue; + } + + /* Will put all the packets in the new SW queue */ + ieee80211_requeue(local, ieee802_1d_to_ac[tid]); + spin_unlock_bh(&sta->lock); + + /* send an addBA request */ + sta->ampdu_mlme.dialog_token_allocator++; + sta->ampdu_mlme.tid_tx[tid]->dialog_token = + sta->ampdu_mlme.dialog_token_allocator; + sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; + + + ieee80211_send_addba_request(sta->sdata, ra, tid, + sta->ampdu_mlme.tid_tx[tid]->dialog_token, + sta->ampdu_mlme.tid_tx[tid]->ssn, + 0x40, 5000); + /* activate the timer for the recipient's addBA response */ + sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = + jiffies + ADDBA_RESP_INTERVAL; + add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); +#endif + goto exit; + +err_unlock_queue: + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + ret = -EBUSY; +err_unlock_sta: + spin_unlock_bh(&sta->lock); +exit: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_session); + +int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, + u8 *ra, u16 tid, + enum ieee80211_back_parties initiator) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int ret = 0; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) + return -EINVAL; + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); + return -ENOENT; + } + + /* check if the TID is in aggregation */ + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (*state != HT_AGG_STATE_OPERATIONAL) { + ret = -ENOENT; + goto stop_BA_exit; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); + + *state = HT_AGG_STATE_REQ_STOP_BA_MSK | + (initiator << HT_AGG_STATE_INITIATOR_SHIFT); + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, + &sta->sta, tid, NULL); + + /* case HW denied going back to legacy */ + if (ret) { + WARN_ON(ret != -EBUSY); + *state = HT_AGG_STATE_OPERATIONAL; + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + goto stop_BA_exit; + } + +stop_BA_exit: + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); + +void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { + rcu_read_unlock(); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %s\n", + print_mac(mac, ra)); +#endif + return; + } + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", + *state); +#endif + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); + return; + } + + WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); + + *state |= HT_ADDBA_DRV_READY_MSK; + + if (*state == HT_AGG_STATE_OPERATIONAL) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); +#endif + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + } + spin_unlock_bh(&sta->lock); + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); + +void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct sta_info *sta; + u8 *state; + int agg_queue; + DECLARE_MAC_BUF(mac); + + if (tid >= STA_TID_NUM) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", + tid, STA_TID_NUM); +#endif + return; + } + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n", + print_mac(mac, ra), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + rcu_read_lock(); + sta = sta_info_get(local, ra); + if (!sta) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Could not find station: %s\n", + print_mac(mac, ra)); +#endif + rcu_read_unlock(); + return; + } + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + /* NOTE: no need to use sta->lock in this state check, as + * ieee80211_stop_tx_ba_session will let only one stop call to + * pass through per sta/tid + */ + if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); +#endif + rcu_read_unlock(); + return; + } + + if (*state & HT_AGG_STATE_INITIATOR_MSK) + ieee80211_send_delba(sta->sdata, ra, tid, + WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + + agg_queue = sta->tid_to_tx_q[tid]; + + ieee80211_ht_agg_queue_remove(local, sta, tid, 1); + + /* We just requeued the all the frames that were in the + * removed queue, and since we might miss a softirq we do + * netif_schedule_queue. ieee80211_wake_queue is not used + * here as this queue is not necessarily stopped + */ + netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); + spin_lock_bh(&sta->lock); + *state = HT_AGG_STATE_IDLE; + sta->ampdu_mlme.addba_req_num[tid] = 0; + kfree(sta->ampdu_mlme.tid_tx[tid]); + sta->ampdu_mlme.tid_tx[tid] = NULL; + spin_unlock_bh(&sta->lock); + + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); + +void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping start BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_ADDBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); + +void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, + const u8 *ra, u16 tid) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_ra_tid *ra_tid; + struct sk_buff *skb = dev_alloc_skb(0); + + if (unlikely(!skb)) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_WARNING "%s: Not enough memory, " + "dropping stop BA session", skb->dev->name); +#endif + return; + } + ra_tid = (struct ieee80211_ra_tid *) &skb->cb; + memcpy(&ra_tid->ra, ra, ETH_ALEN); + ra_tid->tid = tid; + + skb->pkt_type = IEEE80211_DELBA_MSG; + skb_queue_tail(&local->skb_queue, skb); + tasklet_schedule(&local->tasklet); +} +EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); + +/* + * After accepting the AddBA Request we activated a timer, + * resetting it after each frame that arrives from the originator. + * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. + */ +static void sta_rx_agg_session_timer_expired(unsigned long data) +{ + /* not an elegant detour, but there is no choice as the timer passes + * only one argument, and various sta_info are needed here, so init + * flow in sta_info_create gives the TID as data, while the timer_to_id + * array gives the sta through container_of */ + u8 *ptid = (u8 *)data; + u8 *timer_to_id = ptid - *ptid; + struct sta_info *sta = container_of(timer_to_id, struct sta_info, + timer_to_tid[0]); + +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); +#endif + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, + (u16)*ptid, WLAN_BACK_TIMER, + WLAN_REASON_QSTA_TIMEOUT); +} + +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + struct ieee80211_conf *conf = &hw->conf; + struct tid_ampdu_rx *tid_agg_rx; + u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; + u8 dialog_token; + int ret = -EOPNOTSUPP; + DECLARE_MAC_BUF(mac); + + /* extract session parameters from addba request frame */ + dialog_token = mgmt->u.action.u.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + start_seq_num = + le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + + capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; + + status = WLAN_STATUS_REQUEST_DECLINED; + + /* sanity check for incoming parameters: + * check if configuration can support the BA policy + * and if buffer size does not exceeds max value */ + if (((ba_policy != 1) + && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) + || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { + status = WLAN_STATUS_INVALID_QOS_PARAM; +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "AddBA Req with bad params from " + "%s on tid %u. policy %d, buffer size %d\n", + print_mac(mac, mgmt->sa), tid, ba_policy, + buf_size); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end_no_lock; + } + /* determine default buffer size */ + if (buf_size == 0) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[conf->channel->band]; + buf_size = IEEE80211_MIN_AMPDU_BUF; + buf_size = buf_size << sband->ht_info.ampdu_factor; + } + + + /* examine state machine */ + spin_lock_bh(&sta->lock); + + if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "unexpected AddBA Req from " + "%s on tid %u\n", + print_mac(mac, mgmt->sa), tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + goto end; + } + + /* prepare A-MPDU MLME for Rx aggregation */ + sta->ampdu_mlme.tid_rx[tid] = + kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); + if (!sta->ampdu_mlme.tid_rx[tid]) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "allocate rx mlme to tid %d failed\n", + tid); +#endif + goto end; + } + /* rx timer */ + sta->ampdu_mlme.tid_rx[tid]->session_timer.function = + sta_rx_agg_session_timer_expired; + sta->ampdu_mlme.tid_rx[tid]->session_timer.data = + (unsigned long)&sta->timer_to_tid[tid]; + init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); + + tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; + + /* prepare reordering buffer */ + tid_agg_rx->reorder_buf = + kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); + if (!tid_agg_rx->reorder_buf) { +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_ERR "can not allocate reordering buffer " + "to tid %d\n", tid); +#endif + kfree(sta->ampdu_mlme.tid_rx[tid]); + goto end; + } + memset(tid_agg_rx->reorder_buf, 0, + buf_size * sizeof(struct sk_buff *)); + + if (local->ops->ampdu_action) + ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, + &sta->sta, tid, &start_seq_num); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (ret) { + kfree(tid_agg_rx->reorder_buf); + kfree(tid_agg_rx); + sta->ampdu_mlme.tid_rx[tid] = NULL; + goto end; + } + + /* change state and send addba resp */ + sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; + tid_agg_rx->dialog_token = dialog_token; + tid_agg_rx->ssn = start_seq_num; + tid_agg_rx->head_seq_num = start_seq_num; + tid_agg_rx->buf_size = buf_size; + tid_agg_rx->timeout = timeout; + tid_agg_rx->stored_mpdu_num = 0; + status = WLAN_STATUS_SUCCESS; +end: + spin_unlock_bh(&sta->lock); + +end_no_lock: + ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, + dialog_token, status, 1, buf_size, timeout); +} + +void ieee80211_process_addba_resp(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + struct ieee80211_hw *hw = &local->hw; + u16 capab; + u16 tid; + u8 *state; + + capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); + tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + + state = &sta->ampdu_mlme.tid_state_tx[tid]; + + spin_lock_bh(&sta->lock); + + if (!(*state & HT_ADDBA_REQUESTED_MSK)) { + spin_unlock_bh(&sta->lock); + return; + } + + if (mgmt->u.action.u.addba_resp.dialog_token != + sta->ampdu_mlme.tid_tx[tid]->dialog_token) { + spin_unlock_bh(&sta->lock); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + return; + } + + del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); +#ifdef CONFIG_MAC80211_HT_DEBUG + printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + == WLAN_STATUS_SUCCESS) { + *state |= HT_ADDBA_RECEIVED_MSK; + sta->ampdu_mlme.addba_req_num[tid] = 0; + + if (*state == HT_AGG_STATE_OPERATIONAL) + ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + + spin_unlock_bh(&sta->lock); + } else { + sta->ampdu_mlme.addba_req_num[tid]++; + /* this will allow the state check in stop_BA_session */ + *state = HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(hw, sta->sta.addr, tid, + WLAN_BACK_INITIATOR); + } +} + +void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, size_t len) +{ + struct ieee80211_local *local = sdata->local; + u16 tid, params; + u16 initiator; + DECLARE_MAC_BUF(mac); + + params = le16_to_cpu(mgmt->u.action.u.delba.params); + tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; + initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; + +#ifdef CONFIG_MAC80211_HT_DEBUG + if (net_ratelimit()) + printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n", + print_mac(mac, mgmt->sa), + initiator ? "initiator" : "recipient", tid, + mgmt->u.action.u.delba.reason_code); +#endif /* CONFIG_MAC80211_HT_DEBUG */ + + if (initiator == WLAN_BACK_INITIATOR) + ieee80211_sta_stop_rx_ba_session(sdata, sta->sta.addr, tid, + WLAN_BACK_INITIATOR, 0); + else { /* WLAN_BACK_RECIPIENT */ + spin_lock_bh(&sta->lock); + sta->ampdu_mlme.tid_state_tx[tid] = + HT_AGG_STATE_OPERATIONAL; + spin_unlock_bh(&sta->lock); + ieee80211_stop_tx_ba_session(&local->hw, sta->sta.addr, tid, + WLAN_BACK_RECIPIENT); + } +} diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 4498d8713652..156e42a003ae 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -29,17 +29,6 @@ #include "key.h" #include "sta_info.h" -/* ieee80211.o internal definitions, etc. These are not included into - * low-level drivers. */ - -#ifndef ETH_P_PAE -#define ETH_P_PAE 0x888E /* Port Access Entity (IEEE 802.1X) */ -#endif /* ETH_P_PAE */ - -#define WLAN_FC_DATA_PRESENT(fc) (((fc) & 0x4c) == 0x08) - -#define IEEE80211_FC(type, subtype) cpu_to_le16(type | subtype) - struct ieee80211_local; /* Maximum number of broadcast/multicast frames to buffer when some of the @@ -61,6 +50,12 @@ struct ieee80211_local; * increased memory use (about 2 kB of RAM per entry). */ #define IEEE80211_FRAGMENT_MAX 4 +/* + * Time after which we ignore scan results and no longer report/use + * them in any way. + */ +#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) + struct ieee80211_fragment_entry { unsigned long first_frag_time; unsigned int seq; @@ -73,9 +68,9 @@ struct ieee80211_fragment_entry { }; -struct ieee80211_sta_bss { +struct ieee80211_bss { struct list_head list; - struct ieee80211_sta_bss *hnext; + struct ieee80211_bss *hnext; size_t ssid_len; atomic_t users; @@ -87,16 +82,11 @@ struct ieee80211_sta_bss { enum ieee80211_band band; int freq; int signal, noise, qual; - u8 *wpa_ie; - size_t wpa_ie_len; - u8 *rsn_ie; - size_t rsn_ie_len; - u8 *wmm_ie; - size_t wmm_ie_len; - u8 *ht_ie; - size_t ht_ie_len; - u8 *ht_add_ie; - size_t ht_add_ie_len; + u8 *ies; /* all information elements from the last Beacon or Probe + * Response frames; note Beacon frame is not allowed to + * override values from Probe Response */ + size_t ies_len; + bool wmm_used; #ifdef CONFIG_MAC80211_MESH u8 *mesh_id; size_t mesh_id_len; @@ -108,7 +98,7 @@ struct ieee80211_sta_bss { u64 timestamp; int beacon_int; - bool probe_resp; + unsigned long last_probe_resp; unsigned long last_update; /* during assocation, we save an ERP value from a probe response so @@ -119,7 +109,7 @@ struct ieee80211_sta_bss { u8 erp_value; }; -static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss) +static inline u8 *bss_mesh_cfg(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_cfg; @@ -127,7 +117,7 @@ static inline u8 *bss_mesh_cfg(struct ieee80211_sta_bss *bss) return NULL; } -static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss) +static inline u8 *bss_mesh_id(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_id; @@ -135,7 +125,7 @@ static inline u8 *bss_mesh_id(struct ieee80211_sta_bss *bss) return NULL; } -static inline u8 bss_mesh_id_len(struct ieee80211_sta_bss *bss) +static inline u8 bss_mesh_id_len(struct ieee80211_bss *bss) { #ifdef CONFIG_MAC80211_MESH return bss->mesh_id_len; @@ -174,7 +164,7 @@ struct ieee80211_tx_data { struct sk_buff **extra_frag; int num_extra_frag; - u16 fc, ethertype; + u16 ethertype; unsigned int flags; }; @@ -202,7 +192,7 @@ struct ieee80211_rx_data { struct ieee80211_rx_status *status; struct ieee80211_rate *rate; - u16 fc, ethertype; + u16 ethertype; unsigned int flags; int sent_ps_buffered; int queue; @@ -239,7 +229,6 @@ struct ieee80211_if_ap { struct sk_buff_head ps_bc_buf; atomic_t num_sta_ps; /* number of stations in PS mode */ int dtim_count; - int num_beacons; /* number of TXed beacon frames for this BSS */ }; struct ieee80211_if_wds { @@ -300,48 +289,37 @@ struct mesh_config { #define IEEE80211_STA_AUTO_BSSID_SEL BIT(11) #define IEEE80211_STA_AUTO_CHANNEL_SEL BIT(12) #define IEEE80211_STA_PRIVACY_INVOKED BIT(13) +/* flags for MLME request */ +#define IEEE80211_STA_REQ_SCAN 0 +#define IEEE80211_STA_REQ_DIRECT_PROBE 1 +#define IEEE80211_STA_REQ_AUTH 2 +#define IEEE80211_STA_REQ_RUN 3 + +/* STA/IBSS MLME states */ +enum ieee80211_sta_mlme_state { + IEEE80211_STA_MLME_DISABLED, + IEEE80211_STA_MLME_DIRECT_PROBE, + IEEE80211_STA_MLME_AUTHENTICATE, + IEEE80211_STA_MLME_ASSOCIATE, + IEEE80211_STA_MLME_ASSOCIATED, + IEEE80211_STA_MLME_IBSS_SEARCH, + IEEE80211_STA_MLME_IBSS_JOINED, +}; + +/* bitfield of allowed auth algs */ +#define IEEE80211_AUTH_ALG_OPEN BIT(0) +#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) +#define IEEE80211_AUTH_ALG_LEAP BIT(2) + struct ieee80211_if_sta { struct timer_list timer; struct work_struct work; u8 bssid[ETH_ALEN], prev_bssid[ETH_ALEN]; u8 ssid[IEEE80211_MAX_SSID_LEN]; - enum { - IEEE80211_DISABLED, IEEE80211_AUTHENTICATE, - IEEE80211_ASSOCIATE, IEEE80211_ASSOCIATED, - IEEE80211_IBSS_SEARCH, IEEE80211_IBSS_JOINED, - IEEE80211_MESH_UP - } state; + enum ieee80211_sta_mlme_state state; size_t ssid_len; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; -#ifdef CONFIG_MAC80211_MESH - struct timer_list mesh_path_timer; - u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; - size_t mesh_id_len; - /* Active Path Selection Protocol Identifier */ - u8 mesh_pp_id[4]; - /* Active Path Selection Metric Identifier */ - u8 mesh_pm_id[4]; - /* Congestion Control Mode Identifier */ - u8 mesh_cc_id[4]; - /* Local mesh Destination Sequence Number */ - u32 dsn; - /* Last used PREQ ID */ - u32 preq_id; - atomic_t mpaths; - /* Timestamp of last DSN update */ - unsigned long last_dsn_update; - /* Timestamp of last DSN sent */ - unsigned long last_preq; - struct mesh_rmc *rmc; - spinlock_t mesh_preq_queue_lock; - struct mesh_preq_queue preq_queue; - int preq_queue_len; - struct mesh_stats mshstats; - struct mesh_config mshcfg; - u32 mesh_seqnum; - bool accepting_plinks; -#endif u16 aid; u16 ap_capab, capab; u8 *extra_ie; /* to be added to the end of AssocReq */ @@ -353,20 +331,17 @@ struct ieee80211_if_sta { struct sk_buff_head skb_queue; - int auth_tries, assoc_tries; + int assoc_scan_tries; /* number of scans done pre-association */ + int direct_probe_tries; /* retries for direct probes */ + int auth_tries; /* retries for auth req */ + int assoc_tries; /* retries for assoc req */ unsigned long request; unsigned long last_probe; unsigned int flags; -#define IEEE80211_STA_REQ_SCAN 0 -#define IEEE80211_STA_REQ_AUTH 1 -#define IEEE80211_STA_REQ_RUN 2 -#define IEEE80211_AUTH_ALG_OPEN BIT(0) -#define IEEE80211_AUTH_ALG_SHARED_KEY BIT(1) -#define IEEE80211_AUTH_ALG_LEAP BIT(2) unsigned int auth_algs; /* bitfield of allowed auth algs */ int auth_alg; /* currently used IEEE 802.11 authentication algorithm */ int auth_transaction; @@ -376,31 +351,70 @@ struct ieee80211_if_sta { u32 supp_rates_bits[IEEE80211_NUM_BANDS]; int wmm_last_param_set; - int num_beacons; /* number of TXed beacon frames by this STA */ }; -static inline void ieee80211_if_sta_set_mesh_id(struct ieee80211_if_sta *ifsta, - u8 mesh_id_len, u8 *mesh_id) -{ -#ifdef CONFIG_MAC80211_MESH - ifsta->mesh_id_len = mesh_id_len; - memcpy(ifsta->mesh_id, mesh_id, mesh_id_len); -#endif -} +struct ieee80211_if_mesh { + struct work_struct work; + struct timer_list housekeeping_timer; + struct timer_list mesh_path_timer; + struct sk_buff_head skb_queue; + + bool housekeeping; + + u8 mesh_id[IEEE80211_MAX_MESH_ID_LEN]; + size_t mesh_id_len; + /* Active Path Selection Protocol Identifier */ + u8 mesh_pp_id[4]; + /* Active Path Selection Metric Identifier */ + u8 mesh_pm_id[4]; + /* Congestion Control Mode Identifier */ + u8 mesh_cc_id[4]; + /* Local mesh Destination Sequence Number */ + u32 dsn; + /* Last used PREQ ID */ + u32 preq_id; + atomic_t mpaths; + /* Timestamp of last DSN update */ + unsigned long last_dsn_update; + /* Timestamp of last DSN sent */ + unsigned long last_preq; + struct mesh_rmc *rmc; + spinlock_t mesh_preq_queue_lock; + struct mesh_preq_queue preq_queue; + int preq_queue_len; + struct mesh_stats mshstats; + struct mesh_config mshcfg; + u32 mesh_seqnum; + bool accepting_plinks; +}; #ifdef CONFIG_MAC80211_MESH -#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name) \ - do { (sta)->mshstats.name++; } while (0) +#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \ + do { (msh)->mshstats.name++; } while (0) #else -#define IEEE80211_IFSTA_MESH_CTR_INC(sta, name) \ +#define IEEE80211_IFSTA_MESH_CTR_INC(msh, name) \ do { } while (0) #endif -/* flags used in struct ieee80211_sub_if_data.flags */ -#define IEEE80211_SDATA_ALLMULTI BIT(0) -#define IEEE80211_SDATA_PROMISC BIT(1) -#define IEEE80211_SDATA_USERSPACE_MLME BIT(2) -#define IEEE80211_SDATA_OPERATING_GMODE BIT(3) +/** + * enum ieee80211_sub_if_data_flags - virtual interface flags + * + * @IEEE80211_SDATA_ALLMULTI: interface wants all multicast packets + * @IEEE80211_SDATA_PROMISC: interface is promisc + * @IEEE80211_SDATA_USERSPACE_MLME: userspace MLME is active + * @IEEE80211_SDATA_OPERATING_GMODE: operating in G-only mode + * @IEEE80211_SDATA_DONT_BRIDGE_PACKETS: bridge packets between + * associated stations and deliver multicast frames both + * back to wireless media and to the local net stack. + */ +enum ieee80211_sub_if_data_flags { + IEEE80211_SDATA_ALLMULTI = BIT(0), + IEEE80211_SDATA_PROMISC = BIT(1), + IEEE80211_SDATA_USERSPACE_MLME = BIT(2), + IEEE80211_SDATA_OPERATING_GMODE = BIT(3), + IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(4), +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -416,11 +430,6 @@ struct ieee80211_sub_if_data { int drop_unencrypted; - /* - * basic rates of this AP or the AP we're associated to - */ - u64 basic_rates; - /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; @@ -447,6 +456,9 @@ struct ieee80211_sub_if_data { struct ieee80211_if_wds wds; struct ieee80211_if_vlan vlan; struct ieee80211_if_sta sta; +#ifdef CONFIG_MAC80211_MESH + struct ieee80211_if_mesh mesh; +#endif u32 mntr_flags; } u; @@ -469,7 +481,6 @@ struct ieee80211_sub_if_data { struct dentry *auth_alg; struct dentry *auth_transaction; struct dentry *flags; - struct dentry *num_beacons_sta; struct dentry *force_unicast_rateidx; struct dentry *max_ratectrl_rateidx; } sta; @@ -477,7 +488,6 @@ struct ieee80211_sub_if_data { struct dentry *drop_unencrypted; struct dentry *num_sta_ps; struct dentry *dtim_count; - struct dentry *num_beacons; struct dentry *force_unicast_rateidx; struct dentry *max_ratectrl_rateidx; struct dentry *num_buffered_multicast; @@ -540,6 +550,19 @@ struct ieee80211_sub_if_data *vif_to_sdata(struct ieee80211_vif *p) return container_of(p, struct ieee80211_sub_if_data, vif); } +static inline void +ieee80211_sdata_set_mesh_id(struct ieee80211_sub_if_data *sdata, + u8 mesh_id_len, u8 *mesh_id) +{ +#ifdef CONFIG_MAC80211_MESH + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + ifmsh->mesh_id_len = mesh_id_len; + memcpy(ifmsh->mesh_id, mesh_id, mesh_id_len); +#else + WARN_ON(1); +#endif +} + enum { IEEE80211_RX_MSG = 1, IEEE80211_TX_STATUS_MSG = 2, @@ -550,6 +573,10 @@ enum { /* maximum number of hardware queues we support. */ #define QD_MAX_QUEUES (IEEE80211_MAX_AMPDU_QUEUES + IEEE80211_MAX_QUEUES) +struct ieee80211_master_priv { + struct ieee80211_local *local; +}; + struct ieee80211_local { /* embed the driver visible part. * don't cast (use the static inlines below), but we keep @@ -613,10 +640,6 @@ struct ieee80211_local { struct crypto_blkcipher *wep_rx_tfm; u32 wep_iv; - int bridge_packets; /* bridge packets between associated stations and - * deliver multicast frames both back to wireless - * media and to the local net stack */ - struct list_head interfaces; /* @@ -626,21 +649,21 @@ struct ieee80211_local { spinlock_t key_lock; - bool sta_sw_scanning; - bool sta_hw_scanning; + /* Scanning and BSS list */ + bool sw_scanning, hw_scanning; int scan_channel_idx; enum ieee80211_band scan_band; enum { SCAN_SET_CHANNEL, SCAN_SEND_PROBE } scan_state; unsigned long last_scan_completed; struct delayed_work scan_work; - struct net_device *scan_dev; + struct ieee80211_sub_if_data *scan_sdata; struct ieee80211_channel *oper_channel, *scan_channel; u8 scan_ssid[IEEE80211_MAX_SSID_LEN]; size_t scan_ssid_len; - struct list_head sta_bss_list; - struct ieee80211_sta_bss *sta_bss_hash[STA_HASH_SIZE]; - spinlock_t sta_bss_lock; + struct list_head bss_list; + struct ieee80211_bss *bss_hash[STA_HASH_SIZE]; + spinlock_t bss_lock; /* SNMP counters */ /* dot11CountersTable */ @@ -701,10 +724,11 @@ struct ieee80211_local { #ifdef CONFIG_MAC80211_DEBUGFS struct local_debugfsdentries { + struct dentry *rcdir; + struct dentry *rcname; struct dentry *frequency; struct dentry *antenna_sel_tx; struct dentry *antenna_sel_rx; - struct dentry *bridge_packets; struct dentry *rts_threshold; struct dentry *fragmentation_threshold; struct dentry *short_retry_limit; @@ -774,6 +798,9 @@ struct ieee80211_ra_tid { /* Parsed Information Elements */ struct ieee802_11_elems { + u8 *ie_start; + size_t total_len; + /* pointers to IEs */ u8 *ssid; u8 *supp_rates; @@ -789,8 +816,8 @@ struct ieee802_11_elems { u8 *ext_supp_rates; u8 *wmm_info; u8 *wmm_param; - u8 *ht_cap_elem; - u8 *ht_info_elem; + struct ieee80211_ht_cap *ht_cap_elem; + struct ieee80211_ht_addt_info *ht_info_elem; u8 *mesh_config; u8 *mesh_id; u8 *peer_link; @@ -817,8 +844,6 @@ struct ieee802_11_elems { u8 ext_supp_rates_len; u8 wmm_info_len; u8 wmm_param_len; - u8 ht_cap_elem_len; - u8 ht_info_elem_len; u8 mesh_config_len; u8 mesh_id_len; u8 peer_link_len; @@ -857,86 +882,82 @@ static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) } -/* ieee80211.c */ int ieee80211_hw_config(struct ieee80211_local *local); int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed); void ieee80211_tx_set_protected(struct ieee80211_tx_data *tx); u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, struct ieee80211_ht_info *req_ht_cap, struct ieee80211_ht_bss_info *req_bss_cap); +void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, + u32 changed); +void ieee80211_configure_filter(struct ieee80211_local *local); -/* ieee80211_ioctl.c */ +/* wireless extensions */ extern const struct iw_handler_def ieee80211_iw_handler_def; -int ieee80211_set_freq(struct net_device *dev, int freq); -/* ieee80211_sta.c */ -void ieee80211_sta_timer(unsigned long data); -void ieee80211_sta_work(struct work_struct *work); -void ieee80211_sta_scan_work(struct work_struct *work); -void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, +/* STA/IBSS code */ +void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata); +void ieee80211_scan_work(struct work_struct *work); +void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status); -int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len); -int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len); -int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid); -int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len); -void ieee80211_sta_req_auth(struct net_device *dev, +int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len); +int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len); +int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid); +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta); -int ieee80211_sta_scan_results(struct net_device *dev, - struct iw_request_info *info, - char *buf, size_t len); -ieee80211_rx_result ieee80211_sta_rx_scan( - struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status); -void ieee80211_rx_bss_list_init(struct ieee80211_local *local); -void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local); -int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len); -struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev, +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, u8 *bssid, u8 *addr, u64 supp_rates); -int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason); -int ieee80211_sta_disassociate(struct net_device *dev, u16 reason); -void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, - u32 changed); -u32 ieee80211_reset_erp_info(struct net_device *dev); -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info); -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info); -void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, - u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout); -void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, - u16 initiator, u16 reason_code); -void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn); - -void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *da, - u16 tid, u16 initiator, u16 reason); -void sta_addba_resp_timer_expired(unsigned long data); -void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr); +int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason); +int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason); +u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata); u64 ieee80211_sta_get_rates(struct ieee80211_local *local, struct ieee802_11_elems *elems, enum ieee80211_band band); -void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, - int encrypt); -void ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems); - -#ifdef CONFIG_MAC80211_MESH -void ieee80211_start_mesh(struct net_device *dev); -#else -static inline void ieee80211_start_mesh(struct net_device *dev) -{} -#endif +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len); + +/* scan/BSS handling */ +int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, + u8 *ssid, size_t ssid_len); +int ieee80211_scan_results(struct ieee80211_local *local, + struct iw_request_info *info, + char *buf, size_t len); +ieee80211_rx_result +ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); +void ieee80211_rx_bss_list_init(struct ieee80211_local *local); +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local); +int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, + char *ie, size_t len); + +void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local); +int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, + u8 *ssid, size_t ssid_len); +struct ieee80211_bss * +ieee80211_bss_info_update(struct ieee80211_local *local, + struct ieee80211_rx_status *rx_status, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee802_11_elems *elems, + int freq, bool beacon); +struct ieee80211_bss * +ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len); +struct ieee80211_bss * +ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len); +void ieee80211_rx_bss_put(struct ieee80211_local *local, + struct ieee80211_bss *bss); /* interface handling */ -void ieee80211_if_setup(struct net_device *dev); int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum ieee80211_if_types type, + struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params); int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type); -void ieee80211_if_remove(struct net_device *dev); + enum nl80211_iftype type); +void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata); void ieee80211_remove_interfaces(struct ieee80211_local *local); /* tx handling */ @@ -946,16 +967,52 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev); int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev); +/* HT */ +int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, + struct ieee80211_ht_info *ht_info); +int ieee80211_ht_addt_info_ie_to_ht_bss_info( + struct ieee80211_ht_addt_info *ht_add_info_ie, + struct ieee80211_ht_bss_info *bss_info); +void ieee80211_send_bar(struct ieee80211_sub_if_data *sdata, u8 *ra, u16 tid, u16 ssn); + +void ieee80211_sta_stop_rx_ba_session(struct ieee80211_sub_if_data *sdata, u8 *da, + u16 tid, u16 initiator, u16 reason); +void ieee80211_sta_tear_down_BA_sessions(struct ieee80211_sub_if_data *sdata, u8 *addr); +void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, size_t len); +void ieee80211_process_addba_resp(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len); +void ieee80211_process_addba_request(struct ieee80211_local *local, + struct sta_info *sta, + struct ieee80211_mgmt *mgmt, + size_t len); + +/* Spectrum management */ +void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len); + /* utility functions/constants */ extern void *mac80211_wiphy_privid; /* for wiphy privid */ extern const unsigned char rfc1042_header[6]; extern const unsigned char bridge_tunnel_header[6]; u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type); + enum nl80211_iftype type); int ieee80211_frame_duration(struct ieee80211_local *local, size_t len, int rate, int erp, int short_preamble); -void mac80211_ev_michael_mic_failure(struct net_device *dev, int keyidx, +void mac80211_ev_michael_mic_failure(struct ieee80211_sub_if_data *sdata, int keyidx, struct ieee80211_hdr *hdr); +void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata); +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + int encrypt); +void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems); +int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freq); +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band); #ifdef CONFIG_MAC80211_NOINLINE #define debug_noinline noinline diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 610ed1d9893a..8336fee68d3e 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1,4 +1,6 @@ /* + * Interface handling (except master interface) + * * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright (c) 2006 Jiri Benc <jbenc@suse.cz> @@ -17,7 +19,539 @@ #include "sta_info.h" #include "debugfs_netdev.h" #include "mesh.h" +#include "led.h" + +static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) +{ + int meshhdrlen; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + meshhdrlen = (sdata->vif.type == NL80211_IFTYPE_MESH_POINT) ? 5 : 0; + + /* FIX: what would be proper limits for MTU? + * This interface uses 802.3 frames. */ + if (new_mtu < 256 || + new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { + return -EINVAL; + } + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); +#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ + dev->mtu = new_mtu; + return 0; +} + +static inline int identical_mac_addr_allowed(int type1, int type2) +{ + return type1 == NL80211_IFTYPE_MONITOR || + type2 == NL80211_IFTYPE_MONITOR || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_WDS) || + (type1 == NL80211_IFTYPE_WDS && + (type2 == NL80211_IFTYPE_WDS || + type2 == NL80211_IFTYPE_AP)) || + (type1 == NL80211_IFTYPE_AP && type2 == NL80211_IFTYPE_AP_VLAN) || + (type1 == NL80211_IFTYPE_AP_VLAN && + (type2 == NL80211_IFTYPE_AP || + type2 == NL80211_IFTYPE_AP_VLAN)); +} + +static int ieee80211_open(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *nsdata; + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + struct ieee80211_if_init_conf conf; + u32 changed = 0; + int res; + bool need_hw_reconfig = 0; + u8 null_addr[ETH_ALEN] = {0}; + + /* fail early if user set an invalid address */ + if (compare_ether_addr(dev->dev_addr, null_addr) && + !is_valid_ether_addr(dev->dev_addr)) + return -EADDRNOTAVAIL; + + /* we hold the RTNL here so can safely walk the list */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; + + if (ndev != dev && netif_running(ndev)) { + /* + * Allow only a single IBSS interface to be up at any + * time. This is restricted because beacon distribution + * cannot work properly if both are in the same IBSS. + * + * To remove this restriction we'd have to disallow them + * from setting the same SSID on different IBSS interfaces + * belonging to the same hardware. Then, however, we're + * faced with having to adopt two different TSF timers... + */ + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + nsdata->vif.type == NL80211_IFTYPE_ADHOC) + return -EBUSY; + + /* + * The remaining checks are only performed for interfaces + * with the same MAC address. + */ + if (compare_ether_addr(dev->dev_addr, ndev->dev_addr)) + continue; + + /* + * check whether it may have the same address + */ + if (!identical_mac_addr_allowed(sdata->vif.type, + nsdata->vif.type)) + return -ENOTUNIQ; + + /* + * can only add VLANs to enabled APs + */ + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN && + nsdata->vif.type == NL80211_IFTYPE_AP) + sdata->bss = &nsdata->u.ap; + } + } + + switch (sdata->vif.type) { + case NL80211_IFTYPE_WDS: + if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) + return -ENOLINK; + break; + case NL80211_IFTYPE_AP_VLAN: + if (!sdata->bss) + return -ENOLINK; + list_add(&sdata->u.vlan.list, &sdata->bss->vlans); + break; + case NL80211_IFTYPE_AP: + sdata->bss = &sdata->u.ap; + break; + case NL80211_IFTYPE_MESH_POINT: + if (!ieee80211_vif_is_mesh(&sdata->vif)) + break; + /* mesh ifaces must set allmulti to forward mcast traffic */ + atomic_inc(&local->iff_allmultis); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_ADHOC: + /* no special treatment */ + break; + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: + /* cannot happen */ + WARN_ON(1); + break; + } + + if (local->open_count == 0) { + res = 0; + if (local->ops->start) + res = local->ops->start(local_to_hw(local)); + if (res) + goto err_del_bss; + need_hw_reconfig = 1; + ieee80211_led_radio(local, local->hw.conf.radio_enabled); + } + + /* + * Check all interfaces and copy the hopefully now-present + * MAC address to those that have the special null one. + */ + list_for_each_entry(nsdata, &local->interfaces, list) { + struct net_device *ndev = nsdata->dev; + + /* + * No need to check netif_running since we do not allow + * it to start up with this invalid address. + */ + if (compare_ether_addr(null_addr, ndev->dev_addr) == 0) + memcpy(ndev->dev_addr, + local->hw.wiphy->perm_addr, + ETH_ALEN); + } + + if (compare_ether_addr(null_addr, local->mdev->dev_addr) == 0) + memcpy(local->mdev->dev_addr, local->hw.wiphy->perm_addr, + ETH_ALEN); + + /* + * Validate the MAC address for this device. + */ + if (!is_valid_ether_addr(dev->dev_addr)) { + if (!local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + return -EADDRNOTAVAIL; + } + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + /* no need to tell driver */ + break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + local->cooked_mntrs++; + break; + } + + /* must be before the call to ieee80211_configure_filter */ + local->monitors++; + if (local->monitors == 1) + local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; + + if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) + local->fif_fcsfail++; + if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) + local->fif_plcpfail++; + if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) + local->fif_control++; + if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) + local->fif_other_bss++; + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; + /* fall through */ + default: + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = dev->dev_addr; + res = local->ops->add_interface(local_to_hw(local), &conf); + if (res) + goto err_stop; + + if (ieee80211_vif_is_mesh(&sdata->vif)) + ieee80211_start_mesh(sdata); + changed |= ieee80211_reset_erp_info(sdata); + ieee80211_bss_info_change_notify(sdata, changed); + ieee80211_enable_keys(sdata); + + if (sdata->vif.type == NL80211_IFTYPE_STATION && + !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) + netif_carrier_off(dev); + else + netif_carrier_on(dev); + } + + if (sdata->vif.type == NL80211_IFTYPE_WDS) { + /* Create STA entry for the WDS peer */ + sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, + GFP_KERNEL); + if (!sta) { + res = -ENOMEM; + goto err_del_interface; + } + + /* no locking required since STA is not live yet */ + sta->flags |= WLAN_STA_AUTHORIZED; + + res = sta_info_insert(sta); + if (res) { + /* STA has been freed */ + goto err_del_interface; + } + } + + if (local->open_count == 0) { + res = dev_open(local->mdev); + WARN_ON(res); + if (res) + goto err_del_interface; + tasklet_enable(&local->tx_pending_tasklet); + tasklet_enable(&local->tasklet); + } + + /* + * set_multicast_list will be invoked by the networking core + * which will check whether any increments here were done in + * error and sync them down to the hardware as filter flags. + */ + if (sdata->flags & IEEE80211_SDATA_ALLMULTI) + atomic_inc(&local->iff_allmultis); + + if (sdata->flags & IEEE80211_SDATA_PROMISC) + atomic_inc(&local->iff_promiscs); + + local->open_count++; + if (need_hw_reconfig) { + ieee80211_hw_config(local); + /* + * set default queue parameters so drivers don't + * need to initialise the hardware if the hardware + * doesn't start up with sane defaults + */ + ieee80211_set_wmm_default(sdata); + } + + /* + * ieee80211_sta_work is disabled while network interface + * is down. Therefore, some configuration changes may not + * yet be effective. Trigger execution of ieee80211_sta_work + * to fix this. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + queue_work(local->hw.workqueue, &ifsta->work); + } + + netif_tx_start_all_queues(dev); + + return 0; + err_del_interface: + local->ops->remove_interface(local_to_hw(local), &conf); + err_stop: + if (!local->open_count && local->ops->stop) + local->ops->stop(local_to_hw(local)); + err_del_bss: + sdata->bss = NULL; + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + list_del(&sdata->u.vlan.list); + return res; +} + +static int ieee80211_stop(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_init_conf conf; + struct sta_info *sta; + + /* + * Stop TX on this interface first. + */ + netif_tx_stop_all_queues(dev); + + /* + * Now delete all active aggregation sessions. + */ + rcu_read_lock(); + + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sta->sdata == sdata) + ieee80211_sta_tear_down_BA_sessions(sdata, + sta->sta.addr); + } + + rcu_read_unlock(); + + /* + * Remove all stations associated with this interface. + * + * This must be done before calling ops->remove_interface() + * because otherwise we can later invoke ops->sta_notify() + * whenever the STAs are removed, and that invalidates driver + * assumptions about always getting a vif pointer that is valid + * (because if we remove a STA after ops->remove_interface() + * the driver will have removed the vif info already!) + * + * We could relax this and only unlink the stations from the + * hash table and list but keep them on a per-sdata list that + * will be inserted back again when the interface is brought + * up again, but I don't currently see a use case for that, + * except with WDS which gets a STA entry created when it is + * brought up. + */ + sta_info_flush(local, sdata); + + /* + * Don't count this interface for promisc/allmulti while it + * is down. dev_mc_unsync() will invoke set_multicast_list + * on the master interface which will sync these down to the + * hardware as filter flags. + */ + if (sdata->flags & IEEE80211_SDATA_ALLMULTI) + atomic_dec(&local->iff_allmultis); + + if (sdata->flags & IEEE80211_SDATA_PROMISC) + atomic_dec(&local->iff_promiscs); + + dev_mc_unsync(local->mdev, dev); + + /* APs need special treatment */ + if (sdata->vif.type == NL80211_IFTYPE_AP) { + struct ieee80211_sub_if_data *vlan, *tmp; + struct beacon_data *old_beacon = sdata->u.ap.beacon; + + /* remove beacon */ + rcu_assign_pointer(sdata->u.ap.beacon, NULL); + synchronize_rcu(); + kfree(old_beacon); + + /* down all dependent devices, that is VLANs */ + list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, + u.vlan.list) + dev_close(vlan->dev); + WARN_ON(!list_empty(&sdata->u.ap.vlans)); + } + + local->open_count--; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP_VLAN: + list_del(&sdata->u.vlan.list); + /* no need to tell driver */ + break; + case NL80211_IFTYPE_MONITOR: + if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + local->cooked_mntrs--; + break; + } + + local->monitors--; + if (local->monitors == 0) + local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; + + if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) + local->fif_fcsfail--; + if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) + local->fif_plcpfail--; + if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) + local->fif_control--; + if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) + local->fif_other_bss--; + + netif_addr_lock_bh(local->mdev); + ieee80211_configure_filter(local); + netif_addr_unlock_bh(local->mdev); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + sdata->u.sta.state = IEEE80211_STA_MLME_DISABLED; + memset(sdata->u.sta.bssid, 0, ETH_ALEN); + del_timer_sync(&sdata->u.sta.timer); + /* + * If the timer fired while we waited for it, it will have + * requeued the work. Now the work will be running again + * but will not rearm the timer again because it checks + * whether the interface is running, which, at this point, + * it no longer is. + */ + cancel_work_sync(&sdata->u.sta.work); + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.sta.skb_queue); + + sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; + kfree(sdata->u.sta.extra_ie); + sdata->u.sta.extra_ie = NULL; + sdata->u.sta.extra_ie_len = 0; + /* fall through */ + case NL80211_IFTYPE_MESH_POINT: + if (ieee80211_vif_is_mesh(&sdata->vif)) { + /* allmulti is always set on mesh ifaces */ + atomic_dec(&local->iff_allmultis); + ieee80211_stop_mesh(sdata); + } + /* fall through */ + default: + if (local->scan_sdata == sdata) { + if (!local->ops->hw_scan) + cancel_delayed_work_sync(&local->scan_work); + /* + * The software scan can no longer run now, so we can + * clear out the scan_sdata reference. However, the + * hardware scan may still be running. The complete + * function must be prepared to handle a NULL value. + */ + local->scan_sdata = NULL; + /* + * The memory barrier guarantees that another CPU + * that is hardware-scanning will now see the fact + * that this interface is gone. + */ + smp_mb(); + /* + * If software scanning, complete the scan but since + * the scan_sdata is NULL already don't send out a + * scan event to userspace -- the scan is incomplete. + */ + if (local->sw_scanning) + ieee80211_scan_completed(&local->hw); + } + + conf.vif = &sdata->vif; + conf.type = sdata->vif.type; + conf.mac_addr = dev->dev_addr; + /* disable all keys for as long as this netdev is down */ + ieee80211_disable_keys(sdata); + local->ops->remove_interface(local_to_hw(local), &conf); + } + + sdata->bss = NULL; + + if (local->open_count == 0) { + if (netif_running(local->mdev)) + dev_close(local->mdev); + + if (local->ops->stop) + local->ops->stop(local_to_hw(local)); + + ieee80211_led_radio(local, 0); + + flush_workqueue(local->hw.workqueue); + + tasklet_disable(&local->tx_pending_tasklet); + tasklet_disable(&local->tasklet); + } + + return 0; +} + +static void ieee80211_set_multicast_list(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + int allmulti, promisc, sdata_allmulti, sdata_promisc; + + allmulti = !!(dev->flags & IFF_ALLMULTI); + promisc = !!(dev->flags & IFF_PROMISC); + sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); + sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); + + if (allmulti != sdata_allmulti) { + if (dev->flags & IFF_ALLMULTI) + atomic_inc(&local->iff_allmultis); + else + atomic_dec(&local->iff_allmultis); + sdata->flags ^= IEEE80211_SDATA_ALLMULTI; + } + + if (promisc != sdata_promisc) { + if (dev->flags & IFF_PROMISC) + atomic_inc(&local->iff_promiscs); + else + atomic_dec(&local->iff_promiscs); + sdata->flags ^= IEEE80211_SDATA_PROMISC; + } + + dev_mc_sync(local->mdev, dev); +} +static void ieee80211_if_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->hard_start_xmit = ieee80211_subif_start_xmit; + dev->wireless_handlers = &ieee80211_iw_handler_def; + dev->set_multicast_list = ieee80211_set_multicast_list; + dev->change_mtu = ieee80211_change_mtu; + dev->open = ieee80211_open; + dev->stop = ieee80211_stop; + dev->destructor = free_netdev; + /* we will validate the address ourselves in ->open */ + dev->validate_addr = NULL; +} /* * Called when the netdev is removed or, by the code below, before * the interface type changes. @@ -31,17 +565,17 @@ static void ieee80211_teardown_sdata(struct net_device *dev) int flushed; int i; - ieee80211_debugfs_remove_netdev(sdata); - /* free extra data */ ieee80211_free_keys(sdata); + ieee80211_debugfs_remove_netdev(sdata); + for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) __skb_queue_purge(&sdata->fragments[i].skb_list); sdata->fragment_next = 0; switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: beacon = sdata->u.ap.beacon; rcu_assign_pointer(sdata->u.ap.beacon, NULL); synchronize_rcu(); @@ -53,23 +587,23 @@ static void ieee80211_teardown_sdata(struct net_device *dev) } break; - case IEEE80211_IF_TYPE_MESH_POINT: - /* Allow compiler to elide mesh_rmc_free call. */ + case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rmc_free(dev); - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: + mesh_rmc_free(sdata); + break; + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: kfree(sdata->u.sta.extra_ie); kfree(sdata->u.sta.assocreq_ies); kfree(sdata->u.sta.assocresp_ies); kfree_skb(sdata->u.sta.probe_resp); break; - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_VLAN: - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_MONITOR: break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: BUG(); break; } @@ -82,55 +616,43 @@ static void ieee80211_teardown_sdata(struct net_device *dev) * Helper function to initialise an interface to a specific type. */ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type) + enum nl80211_iftype type) { - struct ieee80211_if_sta *ifsta; - /* clear type-dependent union */ memset(&sdata->u, 0, sizeof(sdata->u)); /* and set some type-dependent values */ sdata->vif.type = type; + sdata->dev->hard_start_xmit = ieee80211_subif_start_xmit; + sdata->wdev.iftype = type; /* only monitor differs */ sdata->dev->type = ARPHRD_ETHER; switch (type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps_bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); break; - case IEEE80211_IF_TYPE_MESH_POINT: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - ifsta = &sdata->u.sta; - INIT_WORK(&ifsta->work, ieee80211_sta_work); - setup_timer(&ifsta->timer, ieee80211_sta_timer, - (unsigned long) sdata); - skb_queue_head_init(&ifsta->skb_queue); - - ifsta->capab = WLAN_CAPABILITY_ESS; - ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | - IEEE80211_AUTH_ALG_SHARED_KEY; - ifsta->flags |= IEEE80211_STA_CREATE_IBSS | - IEEE80211_STA_AUTO_BSSID_SEL | - IEEE80211_STA_AUTO_CHANNEL_SEL; - if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) - ifsta->flags |= IEEE80211_STA_WMM_ENABLED; - + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + ieee80211_sta_setup_sdata(sdata); + break; + case NL80211_IFTYPE_MESH_POINT: if (ieee80211_vif_is_mesh(&sdata->vif)) ieee80211_mesh_init_sdata(sdata); break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; sdata->dev->hard_start_xmit = ieee80211_monitor_start_xmit; sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_AP_VLAN: break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: BUG(); break; } @@ -139,7 +661,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, } int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, - enum ieee80211_if_types type) + enum nl80211_iftype type) { ASSERT_RTNL(); @@ -160,14 +682,16 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ieee80211_setup_sdata(sdata, type); /* reset some values that shouldn't be kept across type changes */ - sdata->basic_rates = 0; + sdata->bss_conf.basic_rates = + ieee80211_mandatory_rates(sdata->local, + sdata->local->hw.conf.channel->band); sdata->drop_unencrypted = 0; return 0; } int ieee80211_if_add(struct ieee80211_local *local, const char *name, - struct net_device **new_dev, enum ieee80211_if_types type, + struct net_device **new_dev, enum nl80211_iftype type, struct vif_params *params) { struct net_device *ndev; @@ -225,9 +749,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, if (ieee80211_vif_is_mesh(&sdata->vif) && params && params->mesh_id_len) - ieee80211_if_sta_set_mesh_id(&sdata->u.sta, - params->mesh_id_len, - params->mesh_id); + ieee80211_sdata_set_mesh_id(sdata, + params->mesh_id_len, + params->mesh_id); list_add_tail_rcu(&sdata->list, &local->interfaces); @@ -241,15 +765,13 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, return ret; } -void ieee80211_if_remove(struct net_device *dev) +void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ASSERT_RTNL(); list_del_rcu(&sdata->list); synchronize_rcu(); - unregister_netdevice(dev); + unregister_netdevice(sdata->dev); } /* diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6597c779e35a..a5b06fe71980 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -118,12 +118,12 @@ static const u8 *get_mac_for_key(struct ieee80211_key *key) * address to indicate a transmit-only key. */ if (key->conf.alg != ALG_WEP && - (key->sdata->vif.type == IEEE80211_IF_TYPE_AP || - key->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) + (key->sdata->vif.type == NL80211_IFTYPE_AP || + key->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) addr = zero_addr; if (key->sta) - addr = key->sta->addr; + addr = key->sta->sta.addr; return addr; } @@ -281,6 +281,20 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.alg = alg; key->conf.keyidx = idx; key->conf.keylen = key_len; + switch (alg) { + case ALG_WEP: + key->conf.iv_len = WEP_IV_LEN; + key->conf.icv_len = WEP_ICV_LEN; + break; + case ALG_TKIP: + key->conf.iv_len = TKIP_IV_LEN; + key->conf.icv_len = TKIP_ICV_LEN; + break; + case ALG_CCMP: + key->conf.iv_len = CCMP_HDR_LEN; + key->conf.icv_len = CCMP_MIC_LEN; + break; + } memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); INIT_LIST_HEAD(&key->todo); @@ -331,7 +345,7 @@ void ieee80211_key_link(struct ieee80211_key *key, */ key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; } else { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct sta_info *ap; /* diff --git a/net/mac80211/main.c b/net/mac80211/main.c index aa5a191598c9..ae62ad40ad63 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -45,16 +45,9 @@ struct ieee80211_tx_status_rtap_hdr { u8 data_retries; } __attribute__ ((packed)); -/* common interface routines */ - -static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) -{ - memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ - return ETH_ALEN; -} /* must be called under mdev tx lock */ -static void ieee80211_configure_filter(struct ieee80211_local *local) +void ieee80211_configure_filter(struct ieee80211_local *local) { unsigned int changed_flags; unsigned int new_flags = 0; @@ -97,9 +90,24 @@ static void ieee80211_configure_filter(struct ieee80211_local *local) /* master interface */ +static int header_parse_80211(const struct sk_buff *skb, unsigned char *haddr) +{ + memcpy(haddr, skb_mac_header(skb) + 10, ETH_ALEN); /* addr2 */ + return ETH_ALEN; +} + +static const struct header_ops ieee80211_header_ops = { + .create = eth_header, + .parse = header_parse_80211, + .rebuild = eth_rebuild_header, + .cache = eth_header_cache, + .cache_update = eth_header_cache_update, +}; + static int ieee80211_master_open(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; int res = -EOPNOTSUPP; @@ -121,7 +129,8 @@ static int ieee80211_master_open(struct net_device *dev) static int ieee80211_master_stop(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_sub_if_data *sdata; /* we hold the RTNL here so can safely walk the list */ @@ -134,849 +143,12 @@ static int ieee80211_master_stop(struct net_device *dev) static void ieee80211_master_set_multicast_list(struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; ieee80211_configure_filter(local); } -/* regular interfaces */ - -static int ieee80211_change_mtu(struct net_device *dev, int new_mtu) -{ - int meshhdrlen; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - meshhdrlen = (sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) ? 5 : 0; - - /* FIX: what would be proper limits for MTU? - * This interface uses 802.3 frames. */ - if (new_mtu < 256 || - new_mtu > IEEE80211_MAX_DATA_LEN - 24 - 6 - meshhdrlen) { - return -EINVAL; - } - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: setting MTU %d\n", dev->name, new_mtu); -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ - dev->mtu = new_mtu; - return 0; -} - -static inline int identical_mac_addr_allowed(int type1, int type2) -{ - return (type1 == IEEE80211_IF_TYPE_MNTR || - type2 == IEEE80211_IF_TYPE_MNTR || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_WDS) || - (type1 == IEEE80211_IF_TYPE_WDS && - (type2 == IEEE80211_IF_TYPE_WDS || - type2 == IEEE80211_IF_TYPE_AP)) || - (type1 == IEEE80211_IF_TYPE_AP && - type2 == IEEE80211_IF_TYPE_VLAN) || - (type1 == IEEE80211_IF_TYPE_VLAN && - (type2 == IEEE80211_IF_TYPE_AP || - type2 == IEEE80211_IF_TYPE_VLAN))); -} - -static int ieee80211_open(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata, *nsdata; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - struct ieee80211_if_init_conf conf; - u32 changed = 0; - int res; - bool need_hw_reconfig = 0; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - /* we hold the RTNL here so can safely walk the list */ - list_for_each_entry(nsdata, &local->interfaces, list) { - struct net_device *ndev = nsdata->dev; - - if (ndev != dev && netif_running(ndev)) { - /* - * Allow only a single IBSS interface to be up at any - * time. This is restricted because beacon distribution - * cannot work properly if both are in the same IBSS. - * - * To remove this restriction we'd have to disallow them - * from setting the same SSID on different IBSS interfaces - * belonging to the same hardware. Then, however, we're - * faced with having to adopt two different TSF timers... - */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - nsdata->vif.type == IEEE80211_IF_TYPE_IBSS) - return -EBUSY; - - /* - * The remaining checks are only performed for interfaces - * with the same MAC address. - */ - if (compare_ether_addr(dev->dev_addr, ndev->dev_addr)) - continue; - - /* - * check whether it may have the same address - */ - if (!identical_mac_addr_allowed(sdata->vif.type, - nsdata->vif.type)) - return -ENOTUNIQ; - - /* - * can only add VLANs to enabled APs - */ - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN && - nsdata->vif.type == IEEE80211_IF_TYPE_AP) - sdata->bss = &nsdata->u.ap; - } - } - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_WDS: - if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) - return -ENOLINK; - break; - case IEEE80211_IF_TYPE_VLAN: - if (!sdata->bss) - return -ENOLINK; - list_add(&sdata->u.vlan.list, &sdata->bss->vlans); - break; - case IEEE80211_IF_TYPE_AP: - sdata->bss = &sdata->u.ap; - break; - case IEEE80211_IF_TYPE_MESH_POINT: - /* mesh ifaces must set allmulti to forward mcast traffic */ - atomic_inc(&local->iff_allmultis); - break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_IBSS: - /* no special treatment */ - break; - case IEEE80211_IF_TYPE_INVALID: - /* cannot happen */ - WARN_ON(1); - break; - } - - if (local->open_count == 0) { - res = 0; - if (local->ops->start) - res = local->ops->start(local_to_hw(local)); - if (res) - goto err_del_bss; - need_hw_reconfig = 1; - ieee80211_led_radio(local, local->hw.conf.radio_enabled); - } - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: - /* no need to tell driver */ - break; - case IEEE80211_IF_TYPE_MNTR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs++; - break; - } - - /* must be before the call to ieee80211_configure_filter */ - local->monitors++; - if (local->monitors == 1) - local->hw.conf.flags |= IEEE80211_CONF_RADIOTAP; - - if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) - local->fif_fcsfail++; - if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) - local->fif_plcpfail++; - if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) - local->fif_control++; - if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) - local->fif_other_bss++; - - netif_addr_lock_bh(local->mdev); - ieee80211_configure_filter(local); - netif_addr_unlock_bh(local->mdev); - break; - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.flags &= ~IEEE80211_STA_PREV_BSSID_SET; - /* fall through */ - default: - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = dev->dev_addr; - res = local->ops->add_interface(local_to_hw(local), &conf); - if (res) - goto err_stop; - - if (ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_start_mesh(sdata->dev); - changed |= ieee80211_reset_erp_info(dev); - ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_enable_keys(sdata); - - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) - netif_carrier_off(dev); - else - netif_carrier_on(dev); - } - - if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { - /* Create STA entry for the WDS peer */ - sta = sta_info_alloc(sdata, sdata->u.wds.remote_addr, - GFP_KERNEL); - if (!sta) { - res = -ENOMEM; - goto err_del_interface; - } - - /* no locking required since STA is not live yet */ - sta->flags |= WLAN_STA_AUTHORIZED; - - res = sta_info_insert(sta); - if (res) { - /* STA has been freed */ - goto err_del_interface; - } - } - - if (local->open_count == 0) { - res = dev_open(local->mdev); - WARN_ON(res); - if (res) - goto err_del_interface; - tasklet_enable(&local->tx_pending_tasklet); - tasklet_enable(&local->tasklet); - } - - /* - * set_multicast_list will be invoked by the networking core - * which will check whether any increments here were done in - * error and sync them down to the hardware as filter flags. - */ - if (sdata->flags & IEEE80211_SDATA_ALLMULTI) - atomic_inc(&local->iff_allmultis); - - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_inc(&local->iff_promiscs); - - local->open_count++; - if (need_hw_reconfig) - ieee80211_hw_config(local); - - /* - * ieee80211_sta_work is disabled while network interface - * is down. Therefore, some configuration changes may not - * yet be effective. Trigger execution of ieee80211_sta_work - * to fix this. - */ - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - queue_work(local->hw.workqueue, &ifsta->work); - } - - netif_tx_start_all_queues(dev); - - return 0; - err_del_interface: - local->ops->remove_interface(local_to_hw(local), &conf); - err_stop: - if (!local->open_count && local->ops->stop) - local->ops->stop(local_to_hw(local)); - err_del_bss: - sdata->bss = NULL; - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) - list_del(&sdata->u.vlan.list); - return res; -} - -static int ieee80211_stop(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct ieee80211_if_init_conf conf; - struct sta_info *sta; - - /* - * Stop TX on this interface first. - */ - netif_tx_stop_all_queues(dev); - - /* - * Now delete all active aggregation sessions. - */ - rcu_read_lock(); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sta->sdata == sdata) - ieee80211_sta_tear_down_BA_sessions(dev, sta->addr); - } - - rcu_read_unlock(); - - /* - * Remove all stations associated with this interface. - * - * This must be done before calling ops->remove_interface() - * because otherwise we can later invoke ops->sta_notify() - * whenever the STAs are removed, and that invalidates driver - * assumptions about always getting a vif pointer that is valid - * (because if we remove a STA after ops->remove_interface() - * the driver will have removed the vif info already!) - * - * We could relax this and only unlink the stations from the - * hash table and list but keep them on a per-sdata list that - * will be inserted back again when the interface is brought - * up again, but I don't currently see a use case for that, - * except with WDS which gets a STA entry created when it is - * brought up. - */ - sta_info_flush(local, sdata); - - /* - * Don't count this interface for promisc/allmulti while it - * is down. dev_mc_unsync() will invoke set_multicast_list - * on the master interface which will sync these down to the - * hardware as filter flags. - */ - if (sdata->flags & IEEE80211_SDATA_ALLMULTI) - atomic_dec(&local->iff_allmultis); - - if (sdata->flags & IEEE80211_SDATA_PROMISC) - atomic_dec(&local->iff_promiscs); - - dev_mc_unsync(local->mdev, dev); - - /* APs need special treatment */ - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { - struct ieee80211_sub_if_data *vlan, *tmp; - struct beacon_data *old_beacon = sdata->u.ap.beacon; - - /* remove beacon */ - rcu_assign_pointer(sdata->u.ap.beacon, NULL); - synchronize_rcu(); - kfree(old_beacon); - - /* down all dependent devices, that is VLANs */ - list_for_each_entry_safe(vlan, tmp, &sdata->u.ap.vlans, - u.vlan.list) - dev_close(vlan->dev); - WARN_ON(!list_empty(&sdata->u.ap.vlans)); - } - - local->open_count--; - - switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_VLAN: - list_del(&sdata->u.vlan.list); - /* no need to tell driver */ - break; - case IEEE80211_IF_TYPE_MNTR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { - local->cooked_mntrs--; - break; - } - - local->monitors--; - if (local->monitors == 0) - local->hw.conf.flags &= ~IEEE80211_CONF_RADIOTAP; - - if (sdata->u.mntr_flags & MONITOR_FLAG_FCSFAIL) - local->fif_fcsfail--; - if (sdata->u.mntr_flags & MONITOR_FLAG_PLCPFAIL) - local->fif_plcpfail--; - if (sdata->u.mntr_flags & MONITOR_FLAG_CONTROL) - local->fif_control--; - if (sdata->u.mntr_flags & MONITOR_FLAG_OTHER_BSS) - local->fif_other_bss--; - - netif_addr_lock_bh(local->mdev); - ieee80211_configure_filter(local); - netif_addr_unlock_bh(local->mdev); - break; - case IEEE80211_IF_TYPE_MESH_POINT: - /* allmulti is always set on mesh ifaces */ - atomic_dec(&local->iff_allmultis); - /* fall through */ - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - sdata->u.sta.state = IEEE80211_DISABLED; - memset(sdata->u.sta.bssid, 0, ETH_ALEN); - del_timer_sync(&sdata->u.sta.timer); - /* - * When we get here, the interface is marked down. - * Call synchronize_rcu() to wait for the RX path - * should it be using the interface and enqueuing - * frames at this very time on another CPU. - */ - synchronize_rcu(); - skb_queue_purge(&sdata->u.sta.skb_queue); - - if (local->scan_dev == sdata->dev) { - if (!local->ops->hw_scan) { - local->sta_sw_scanning = 0; - cancel_delayed_work(&local->scan_work); - } else - local->sta_hw_scanning = 0; - } - - sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; - kfree(sdata->u.sta.extra_ie); - sdata->u.sta.extra_ie = NULL; - sdata->u.sta.extra_ie_len = 0; - /* fall through */ - default: - conf.vif = &sdata->vif; - conf.type = sdata->vif.type; - conf.mac_addr = dev->dev_addr; - /* disable all keys for as long as this netdev is down */ - ieee80211_disable_keys(sdata); - local->ops->remove_interface(local_to_hw(local), &conf); - } - - sdata->bss = NULL; - - if (local->open_count == 0) { - if (netif_running(local->mdev)) - dev_close(local->mdev); - - if (local->ops->stop) - local->ops->stop(local_to_hw(local)); - - ieee80211_led_radio(local, 0); - - flush_workqueue(local->hw.workqueue); - - tasklet_disable(&local->tx_pending_tasklet); - tasklet_disable(&local->tasklet); - } - - return 0; -} - -int ieee80211_start_tx_ba_session(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata; - u16 start_seq_num = 0; - u8 *state; - int ret; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) - return -EINVAL; - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Open BA session requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find the station\n"); -#endif - ret = -ENOENT; - goto exit; - } - - spin_lock_bh(&sta->lock); - - /* we have tried too many times, receiver does not want A-MPDU */ - if (sta->ampdu_mlme.addba_req_num[tid] > HT_AGG_MAX_RETRIES) { - ret = -EBUSY; - goto err_unlock_sta; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID is not in aggregation flow already */ - if (*state != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - session is not " - "idle on tid %u\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - ret = -EAGAIN; - goto err_unlock_sta; - } - - /* prepare A-MPDU MLME for Tx aggregation */ - sta->ampdu_mlme.tid_tx[tid] = - kmalloc(sizeof(struct tid_ampdu_tx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_tx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate tx mlme to tid %d failed\n", - tid); -#endif - ret = -ENOMEM; - goto err_unlock_sta; - } - /* Tx timer */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.function = - sta_addba_resp_timer_expired; - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); - - /* create a new queue for this aggregation */ - ret = ieee80211_ht_agg_queue_add(local, sta, tid); - - /* case no queue is available to aggregation - * don't switch to aggregation */ - if (ret) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - queue unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto err_unlock_queue; - } - sdata = sta->sdata; - - /* Ok, the Addba frame hasn't been sent yet, but if the driver calls the - * call back right away, it must see that the flow has begun */ - *state |= HT_ADDBA_REQUESTED_MSK; - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_START, - ra, tid, &start_seq_num); - - if (ret) { - /* No need to requeue the packets in the agg queue, since we - * held the tx lock: no packet could be enqueued to the newly - * allocated queue */ - ieee80211_ht_agg_queue_remove(local, sta, tid, 0); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "BA request denied - HW unavailable for" - " tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - *state = HT_AGG_STATE_IDLE; - goto err_unlock_queue; - } - - /* Will put all the packets in the new SW queue */ - ieee80211_requeue(local, ieee802_1d_to_ac[tid]); - spin_unlock_bh(&sta->lock); - - /* send an addBA request */ - sta->ampdu_mlme.dialog_token_allocator++; - sta->ampdu_mlme.tid_tx[tid]->dialog_token = - sta->ampdu_mlme.dialog_token_allocator; - sta->ampdu_mlme.tid_tx[tid]->ssn = start_seq_num; - - - ieee80211_send_addba_request(sta->sdata->dev, ra, tid, - sta->ampdu_mlme.tid_tx[tid]->dialog_token, - sta->ampdu_mlme.tid_tx[tid]->ssn, - 0x40, 5000); - /* activate the timer for the recipient's addBA response */ - sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer.expires = - jiffies + ADDBA_RESP_INTERVAL; - add_timer(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "activated addBA response timer on tid %d\n", tid); -#endif - goto exit; - -err_unlock_queue: - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - ret = -EBUSY; -err_unlock_sta: - spin_unlock_bh(&sta->lock); -exit: - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_session); - -int ieee80211_stop_tx_ba_session(struct ieee80211_hw *hw, - u8 *ra, u16 tid, - enum ieee80211_back_parties initiator) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int ret = 0; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) - return -EINVAL; - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); - return -ENOENT; - } - - /* check if the TID is in aggregation */ - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (*state != HT_AGG_STATE_OPERATIONAL) { - ret = -ENOENT; - goto stop_BA_exit; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Tx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ieee80211_stop_queue(hw, sta->tid_to_tx_q[tid]); - - *state = HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_TX_STOP, - ra, tid, NULL); - - /* case HW denied going back to legacy */ - if (ret) { - WARN_ON(ret != -EBUSY); - *state = HT_AGG_STATE_OPERATIONAL; - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - goto stop_BA_exit; - } - -stop_BA_exit: - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return ret; -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_session); - -void ieee80211_start_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); -#endif - return; - } - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); -#endif - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - spin_lock_bh(&sta->lock); - - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA was not requested yet, state is %d\n", - *state); -#endif - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); - return; - } - - WARN_ON_ONCE(*state & HT_ADDBA_DRV_READY_MSK); - - *state |= HT_ADDBA_DRV_READY_MSK; - - if (*state == HT_AGG_STATE_OPERATIONAL) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Aggregation is on for tid %d \n", tid); -#endif - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); - } - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb); - -void ieee80211_stop_tx_ba_cb(struct ieee80211_hw *hw, u8 *ra, u8 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct sta_info *sta; - u8 *state; - int agg_queue; - DECLARE_MAC_BUF(mac); - - if (tid >= STA_TID_NUM) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Bad TID value: tid = %d (>= %d)\n", - tid, STA_TID_NUM); -#endif - return; - } - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Stopping Tx BA session for %s tid %d\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - rcu_read_lock(); - sta = sta_info_get(local, ra); - if (!sta) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Could not find station: %s\n", - print_mac(mac, ra)); -#endif - rcu_read_unlock(); - return; - } - state = &sta->ampdu_mlme.tid_state_tx[tid]; - - /* NOTE: no need to use sta->lock in this state check, as - * ieee80211_stop_tx_ba_session will let only one stop call to - * pass through per sta/tid - */ - if ((*state & HT_AGG_STATE_REQ_STOP_BA_MSK) == 0) { -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "unexpected callback to A-MPDU stop\n"); -#endif - rcu_read_unlock(); - return; - } - - if (*state & HT_AGG_STATE_INITIATOR_MSK) - ieee80211_send_delba(sta->sdata->dev, ra, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); - - agg_queue = sta->tid_to_tx_q[tid]; - - ieee80211_ht_agg_queue_remove(local, sta, tid, 1); - - /* We just requeued the all the frames that were in the - * removed queue, and since we might miss a softirq we do - * netif_schedule_queue. ieee80211_wake_queue is not used - * here as this queue is not necessarily stopped - */ - netif_schedule_queue(netdev_get_tx_queue(local->mdev, agg_queue)); - spin_lock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; - sta->ampdu_mlme.addba_req_num[tid] = 0; - kfree(sta->ampdu_mlme.tid_tx[tid]); - sta->ampdu_mlme.tid_tx[tid] = NULL; - spin_unlock_bh(&sta->lock); - - rcu_read_unlock(); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb); - -void ieee80211_start_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping start BA session", skb->dev->name); -#endif - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_ADDBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_start_tx_ba_cb_irqsafe); - -void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_hw *hw, - const u8 *ra, u16 tid) -{ - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_ra_tid *ra_tid; - struct sk_buff *skb = dev_alloc_skb(0); - - if (unlikely(!skb)) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_WARNING "%s: Not enough memory, " - "dropping stop BA session", skb->dev->name); -#endif - return; - } - ra_tid = (struct ieee80211_ra_tid *) &skb->cb; - memcpy(&ra_tid->ra, ra, ETH_ALEN); - ra_tid->tid = tid; - - skb->pkt_type = IEEE80211_DELBA_MSG; - skb_queue_tail(&local->skb_queue, skb); - tasklet_schedule(&local->tasklet); -} -EXPORT_SYMBOL(ieee80211_stop_tx_ba_cb_irqsafe); - -static void ieee80211_set_multicast_list(struct net_device *dev) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - int allmulti, promisc, sdata_allmulti, sdata_promisc; - - allmulti = !!(dev->flags & IFF_ALLMULTI); - promisc = !!(dev->flags & IFF_PROMISC); - sdata_allmulti = !!(sdata->flags & IEEE80211_SDATA_ALLMULTI); - sdata_promisc = !!(sdata->flags & IEEE80211_SDATA_PROMISC); - - if (allmulti != sdata_allmulti) { - if (dev->flags & IFF_ALLMULTI) - atomic_inc(&local->iff_allmultis); - else - atomic_dec(&local->iff_allmultis); - sdata->flags ^= IEEE80211_SDATA_ALLMULTI; - } - - if (promisc != sdata_promisc) { - if (dev->flags & IFF_PROMISC) - atomic_inc(&local->iff_promiscs); - else - atomic_dec(&local->iff_promiscs); - sdata->flags ^= IEEE80211_SDATA_PROMISC; - } - - dev_mc_sync(local->mdev, dev); -} - -static const struct header_ops ieee80211_header_ops = { - .create = eth_header, - .parse = header_parse_80211, - .rebuild = eth_rebuild_header, - .cache = eth_header_cache, - .cache_update = eth_header_cache_update, -}; - -void ieee80211_if_setup(struct net_device *dev) -{ - ether_setup(dev); - dev->hard_start_xmit = ieee80211_subif_start_xmit; - dev->wireless_handlers = &ieee80211_iw_handler_def; - dev->set_multicast_list = ieee80211_set_multicast_list; - dev->change_mtu = ieee80211_change_mtu; - dev->open = ieee80211_open; - dev->stop = ieee80211_stop; - dev->destructor = free_netdev; -} - /* everything else */ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) @@ -987,18 +159,21 @@ int ieee80211_if_config(struct ieee80211_sub_if_data *sdata, u32 changed) if (WARN_ON(!netif_running(sdata->dev))) return 0; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return -EINVAL; + if (!local->ops->config_interface) return 0; memset(&conf, 0, sizeof(conf)); conf.changed = changed; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { conf.bssid = sdata->u.sta.bssid; conf.ssid = sdata->u.sta.ssid; conf.ssid_len = sdata->u.sta.ssid_len; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + } else if (sdata->vif.type == NL80211_IFTYPE_AP) { conf.bssid = sdata->dev->dev_addr; conf.ssid = sdata->u.ap.ssid; conf.ssid_len = sdata->u.ap.ssid_len; @@ -1027,7 +202,7 @@ int ieee80211_hw_config(struct ieee80211_local *local) struct ieee80211_channel *chan; int ret = 0; - if (local->sta_sw_scanning) + if (local->sw_scanning) chan = local->scan_channel; else chan = local->oper_channel; @@ -1099,8 +274,8 @@ u32 ieee80211_handle_ht(struct ieee80211_local *local, int enable_ht, ht_conf.ht_supported = 1; ht_conf.cap = req_ht_cap->cap & sband->ht_info.cap; - ht_conf.cap &= ~(IEEE80211_HT_CAP_MIMO_PS); - ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_MIMO_PS; + ht_conf.cap &= ~(IEEE80211_HT_CAP_SM_PS); + ht_conf.cap |= sband->ht_info.cap & IEEE80211_HT_CAP_SM_PS; ht_bss_conf.primary_channel = req_bss_cap->primary_channel; ht_bss_conf.bss_cap = req_bss_cap->bss_cap; ht_bss_conf.bss_op_mode = req_bss_cap->bss_op_mode; @@ -1152,6 +327,9 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return; + if (!changed) return; @@ -1162,10 +340,8 @@ void ieee80211_bss_info_change_notify(struct ieee80211_sub_if_data *sdata, changed); } -u32 ieee80211_reset_erp_info(struct net_device *dev) +u32 ieee80211_reset_erp_info(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - sdata->bss_conf.use_cts_prot = 0; sdata->bss_conf.use_short_preamble = 0; return BSS_CHANGED_ERP_CTS_PROT | BSS_CHANGED_ERP_PREAMBLE; @@ -1244,9 +420,10 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, struct ieee80211_key *key, struct sk_buff *skb) { - int hdrlen, iv_len, mic_len; + unsigned int hdrlen, iv_len, mic_len; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - hdrlen = ieee80211_get_hdrlen_from_skb(skb); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (!key) goto no_key; @@ -1268,24 +445,20 @@ static void ieee80211_remove_tx_extra(struct ieee80211_local *local, goto no_key; } - if (skb->len >= mic_len && + if (skb->len >= hdrlen + mic_len && !(key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) skb_trim(skb, skb->len - mic_len); - if (skb->len >= iv_len && skb->len > hdrlen) { + if (skb->len >= hdrlen + iv_len) { memmove(skb->data + iv_len, skb->data, hdrlen); - skb_pull(skb, iv_len); + hdr = (struct ieee80211_hdr *)skb_pull(skb, iv_len); } no_key: - { - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - u16 fc = le16_to_cpu(hdr->frame_control); - if ((fc & 0x8C) == 0x88) /* QoS Control Field */ { - fc &= ~IEEE80211_STYPE_QOS_DATA; - hdr->frame_control = cpu_to_le16(fc); - memmove(skb->data + 2, skb->data, hdrlen - 2); - skb_pull(skb, 2); - } + if (ieee80211_is_data_qos(hdr->frame_control)) { + hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); + memmove(skb->data + IEEE80211_QOS_CTL_LEN, skb->data, + hdrlen - IEEE80211_QOS_CTL_LEN); + skb_pull(skb, IEEE80211_QOS_CTL_LEN); } } @@ -1369,6 +542,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); u16 frag, type; __le16 fc; + struct ieee80211_supported_band *sband; struct ieee80211_tx_status_rtap_hdr *rthdr; struct ieee80211_sub_if_data *sdata; struct net_device *prev_dev = NULL; @@ -1376,47 +550,48 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_lock(); - if (info->status.excessive_retries) { - sta = sta_info_get(local, hdr->addr1); - if (sta) { - if (test_sta_flags(sta, WLAN_STA_PS)) { - /* - * The STA is in power save mode, so assume - * that this TX packet failed because of that. - */ - ieee80211_handle_filtered_frame(local, sta, skb); - rcu_read_unlock(); - return; - } + sta = sta_info_get(local, hdr->addr1); + + if (sta) { + if (info->status.excessive_retries && + test_sta_flags(sta, WLAN_STA_PS)) { + /* + * The STA is in power save mode, so assume + * that this TX packet failed because of that. + */ + ieee80211_handle_filtered_frame(local, sta, skb); + rcu_read_unlock(); + return; } - } - fc = hdr->frame_control; + fc = hdr->frame_control; + + if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && + (ieee80211_is_data_qos(fc))) { + u16 tid, ssn; + u8 *qc; - if ((info->flags & IEEE80211_TX_STAT_AMPDU_NO_BACK) && - (ieee80211_is_data_qos(fc))) { - u16 tid, ssn; - u8 *qc; - sta = sta_info_get(local, hdr->addr1); - if (sta) { qc = ieee80211_get_qos_ctl(hdr); tid = qc[0] & 0xf; ssn = ((le16_to_cpu(hdr->seq_ctrl) + 0x10) & IEEE80211_SCTL_SEQ); - ieee80211_send_bar(sta->sdata->dev, hdr->addr1, + ieee80211_send_bar(sta->sdata, hdr->addr1, tid, ssn); } - } - if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { - sta = sta_info_get(local, hdr->addr1); - if (sta) { + if (info->flags & IEEE80211_TX_STAT_TX_FILTERED) { ieee80211_handle_filtered_frame(local, sta, skb); rcu_read_unlock(); return; + } else { + if (info->status.excessive_retries) + sta->tx_retry_failed++; + sta->tx_retry_count += info->status.retry_count; } - } else - rate_control_tx_status(local->mdev, skb); + + sband = local->hw.wiphy->bands[info->band]; + rate_control_tx_status(local, sband, sta, skb); + } rcu_read_unlock(); @@ -1504,7 +679,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) rcu_read_lock(); list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { if (!netif_running(sdata->dev)) continue; @@ -1580,8 +755,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, local->hw.queues = 1; /* default */ - local->bridge_packets = 1; - local->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; local->fragmentation_threshold = IEEE80211_MAX_FRAG_THRESHOLD; local->short_retry_limit = 7; @@ -1592,7 +765,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, spin_lock_init(&local->key_lock); - INIT_DELAYED_WORK(&local->scan_work, ieee80211_sta_scan_work); + INIT_DELAYED_WORK(&local->scan_work, ieee80211_scan_work); sta_info_init(local); @@ -1619,7 +792,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) int result; enum ieee80211_band band; struct net_device *mdev; - struct wireless_dev *mwdev; + struct ieee80211_master_priv *mpriv; /* * generic code guarantees at least one band, @@ -1639,6 +812,13 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) } } + /* if low-level driver supports AP, we also support VLAN */ + if (local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_AP)) + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_AP_VLAN); + + /* mac80211 always supports monitor */ + local->hw.wiphy->interface_modes |= BIT(NL80211_IFTYPE_MONITOR); + result = wiphy_register(local->hw.wiphy); if (result < 0) return result; @@ -1654,16 +834,14 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (hw->queues < 4) hw->ampdu_queues = 0; - mdev = alloc_netdev_mq(sizeof(struct wireless_dev), + mdev = alloc_netdev_mq(sizeof(struct ieee80211_master_priv), "wmaster%d", ether_setup, ieee80211_num_queues(hw)); if (!mdev) goto fail_mdev_alloc; - mwdev = netdev_priv(mdev); - mdev->ieee80211_ptr = mwdev; - mwdev->wiphy = local->hw.wiphy; - + mpriv = netdev_priv(mdev); + mpriv->local = local; local->mdev = mdev; ieee80211_rx_bss_list_init(local); @@ -1745,7 +923,7 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* add one default STA interface */ result = ieee80211_if_add(local, "wlan%d", NULL, - IEEE80211_IF_TYPE_STA, NULL); + NL80211_IFTYPE_STATION, NULL); if (result) printk(KERN_WARNING "%s: Failed to add default virtual iface\n", wiphy_name(local->hw.wiphy)); @@ -1837,6 +1015,10 @@ static int __init ieee80211_init(void) BUILD_BUG_ON(offsetof(struct ieee80211_tx_info, driver_data) + IEEE80211_TX_INFO_DRIVER_DATA_SIZE > sizeof(skb->cb)); + ret = rc80211_minstrel_init(); + if (ret) + return ret; + ret = rc80211_pid_init(); if (ret) return ret; @@ -1849,6 +1031,7 @@ static int __init ieee80211_init(void) static void __exit ieee80211_exit(void) { rc80211_pid_exit(); + rc80211_minstrel_exit(); /* * For key todo, it'll be empty by now but the work diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 35f2f95f2fa7..8013277924f2 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -12,6 +12,9 @@ #include "ieee80211_i.h" #include "mesh.h" +#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) +#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) + #define PP_OFFSET 1 /* Path Selection Protocol */ #define PM_OFFSET 5 /* Path Selection Metric */ #define CC_OFFSET 9 /* Congestion Control Mode */ @@ -35,19 +38,28 @@ void ieee80211s_stop(void) kmem_cache_destroy(rm_cache); } +static void ieee80211_mesh_housekeeping_timer(unsigned long data) +{ + struct ieee80211_sub_if_data *sdata = (void *) data; + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + ifmsh->housekeeping = true; + queue_work(local->hw.workqueue, &ifmsh->work); +} + /** * mesh_matches_local - check if the config of a mesh point matches ours * * @ie: information elements of a management frame from the mesh peer - * @dev: local mesh interface + * @sdata: local mesh subif * * This function checks if the mesh configuration of a mesh point matches the * local mesh configuration, i.e. if both nodes belong to the same mesh network. */ -bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) +bool mesh_matches_local(struct ieee802_11_elems *ie, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *sta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; /* * As support for each feature is added, check for matching @@ -59,11 +71,11 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) * - MDA enabled * - Power management control on fc */ - if (sta->mesh_id_len == ie->mesh_id_len && - memcmp(sta->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && - memcmp(sta->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && - memcmp(sta->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && - memcmp(sta->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0) + if (ifmsh->mesh_id_len == ie->mesh_id_len && + memcmp(ifmsh->mesh_id, ie->mesh_id, ie->mesh_id_len) == 0 && + memcmp(ifmsh->mesh_pp_id, ie->mesh_config + PP_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_pm_id, ie->mesh_config + PM_OFFSET, 4) == 0 && + memcmp(ifmsh->mesh_cc_id, ie->mesh_config + CC_OFFSET, 4) == 0) return true; return false; @@ -73,10 +85,8 @@ bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev) * mesh_peer_accepts_plinks - check if an mp is willing to establish peer links * * @ie: information elements of a management frame from the mesh peer - * @dev: local mesh interface */ -bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie, - struct net_device *dev) +bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie) { return (*(ie->mesh_config + CAPAB_OFFSET) & ACCEPT_PLINKS) != 0; } @@ -98,11 +108,11 @@ void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) */ free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.sta.accepting_plinks) - ieee80211_sta_timer((unsigned long) sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) + ieee80211_mesh_housekeeping_timer((unsigned long) sdata); } -void mesh_ids_set_default(struct ieee80211_if_sta *sta) +void mesh_ids_set_default(struct ieee80211_if_mesh *sta) { u8 def_id[4] = {0x00, 0x0F, 0xAC, 0xff}; @@ -111,28 +121,26 @@ void mesh_ids_set_default(struct ieee80211_if_sta *sta) memcpy(sta->mesh_cc_id, def_id, 4); } -int mesh_rmc_init(struct net_device *dev) +int mesh_rmc_init(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int i; - sdata->u.sta.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL); - if (!sdata->u.sta.rmc) + sdata->u.mesh.rmc = kmalloc(sizeof(struct mesh_rmc), GFP_KERNEL); + if (!sdata->u.mesh.rmc) return -ENOMEM; - sdata->u.sta.rmc->idx_mask = RMC_BUCKETS - 1; + sdata->u.mesh.rmc->idx_mask = RMC_BUCKETS - 1; for (i = 0; i < RMC_BUCKETS; i++) - INIT_LIST_HEAD(&sdata->u.sta.rmc->bucket[i].list); + INIT_LIST_HEAD(&sdata->u.mesh.rmc->bucket[i].list); return 0; } -void mesh_rmc_free(struct net_device *dev) +void mesh_rmc_free(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct mesh_rmc *rmc = sdata->u.sta.rmc; + struct mesh_rmc *rmc = sdata->u.mesh.rmc; struct rmc_entry *p, *n; int i; - if (!sdata->u.sta.rmc) + if (!sdata->u.mesh.rmc) return; for (i = 0; i < RMC_BUCKETS; i++) @@ -142,7 +150,7 @@ void mesh_rmc_free(struct net_device *dev) } kfree(rmc); - sdata->u.sta.rmc = NULL; + sdata->u.mesh.rmc = NULL; } /** @@ -158,10 +166,9 @@ void mesh_rmc_free(struct net_device *dev) * it. */ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, - struct net_device *dev) + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct mesh_rmc *rmc = sdata->u.sta.rmc; + struct mesh_rmc *rmc = sdata->u.mesh.rmc; u32 seqnum = 0; int entries = 0; u8 idx; @@ -194,10 +201,9 @@ int mesh_rmc_check(u8 *sa, struct ieee80211s_hdr *mesh_hdr, return 0; } -void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) +void mesh_mgmt_ies_add(struct sk_buff *skb, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; struct ieee80211_supported_band *sband; u8 *pos; int len, i, rate; @@ -224,11 +230,11 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) } } - pos = skb_put(skb, 2 + sdata->u.sta.mesh_id_len); + pos = skb_put(skb, 2 + sdata->u.mesh.mesh_id_len); *pos++ = WLAN_EID_MESH_ID; - *pos++ = sdata->u.sta.mesh_id_len; - if (sdata->u.sta.mesh_id_len) - memcpy(pos, sdata->u.sta.mesh_id, sdata->u.sta.mesh_id_len); + *pos++ = sdata->u.mesh.mesh_id_len; + if (sdata->u.mesh.mesh_id_len) + memcpy(pos, sdata->u.mesh.mesh_id, sdata->u.mesh.mesh_id_len); pos = skb_put(skb, 21); *pos++ = WLAN_EID_MESH_CONFIG; @@ -237,15 +243,15 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) *pos++ = 1; /* Active path selection protocol ID */ - memcpy(pos, sdata->u.sta.mesh_pp_id, 4); + memcpy(pos, sdata->u.mesh.mesh_pp_id, 4); pos += 4; /* Active path selection metric ID */ - memcpy(pos, sdata->u.sta.mesh_pm_id, 4); + memcpy(pos, sdata->u.mesh.mesh_pm_id, 4); pos += 4; /* Congestion control mode identifier */ - memcpy(pos, sdata->u.sta.mesh_cc_id, 4); + memcpy(pos, sdata->u.mesh.mesh_cc_id, 4); pos += 4; /* Channel precedence: @@ -255,17 +261,17 @@ void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev) pos += 4; /* Mesh capability */ - sdata->u.sta.accepting_plinks = mesh_plink_availables(sdata); - *pos++ = sdata->u.sta.accepting_plinks ? ACCEPT_PLINKS : 0x00; + sdata->u.mesh.accepting_plinks = mesh_plink_availables(sdata); + *pos++ = sdata->u.mesh.accepting_plinks ? ACCEPT_PLINKS : 0x00; *pos++ = 0x00; return; } -u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl) +u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, struct mesh_table *tbl) { /* Use last four bytes of hw addr and interface index as hash index */ - return jhash_2words(*(u32 *)(addr+2), dev->ifindex, tbl->hash_rnd) + return jhash_2words(*(u32 *)(addr+2), sdata->dev->ifindex, tbl->hash_rnd) & tbl->hash_mask; } @@ -344,10 +350,10 @@ static void ieee80211_mesh_path_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(&sdata->wdev); + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; - queue_work(local->hw.workqueue, &ifsta->work); + queue_work(local->hw.workqueue, &ifmsh->work); } struct mesh_table *mesh_table_grow(struct mesh_table *tbl) @@ -399,50 +405,264 @@ int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata) { meshhdr->flags = 0; - meshhdr->ttl = sdata->u.sta.mshcfg.dot11MeshTTL; - put_unaligned(cpu_to_le32(sdata->u.sta.mesh_seqnum), &meshhdr->seqnum); - sdata->u.sta.mesh_seqnum++; + meshhdr->ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &meshhdr->seqnum); + sdata->u.mesh.mesh_seqnum++; return 6; } +static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_mesh *ifmsh) +{ + bool free_plinks; + +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: running mesh housekeeping\n", + sdata->dev->name); +#endif + + ieee80211_sta_expire(sdata, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); + mesh_path_expire(sdata); + + free_plinks = mesh_plink_availables(sdata); + if (free_plinks != sdata->u.mesh.accepting_plinks) + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); + + ifmsh->housekeeping = false; + mod_timer(&ifmsh->housekeeping_timer, + round_jiffies(jiffies + IEEE80211_MESH_HOUSEKEEPING_INTERVAL)); +} + + +void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_local *local = sdata->local; + + ifmsh->housekeeping = true; + queue_work(local->hw.workqueue, &ifmsh->work); + ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); +} + +void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata) +{ + del_timer_sync(&sdata->u.mesh.housekeeping_timer); + /* + * If the timer fired while we waited for it, it will have + * requeued the work. Now the work will be running again + * but will not rearm the timer again because it checks + * whether the interface is running, which, at this point, + * it no longer is. + */ + cancel_work_sync(&sdata->u.mesh.work); + + /* + * When we get here, the interface is marked down. + * Call synchronize_rcu() to wait for the RX path + * should it be using the interface and enqueuing + * frames at this very time on another CPU. + */ + synchronize_rcu(); + skb_queue_purge(&sdata->u.mesh.skb_queue); +} + +static void ieee80211_mesh_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, + u16 stype, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_local *local= sdata->local; + struct ieee802_11_elems elems; + struct ieee80211_channel *channel; + u64 supp_rates = 0; + size_t baselen; + int freq; + enum ieee80211_band band = rx_status->band; + + /* ignore ProbeResp to foreign address */ + if (stype == IEEE80211_STYPE_PROBE_RESP && + compare_ether_addr(mgmt->da, sdata->dev->dev_addr)) + return; + + baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; + if (baselen > len) + return; + + ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, + &elems); + + if (elems.ds_params && elems.ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + else + freq = rx_status->freq; + + channel = ieee80211_get_channel(local->hw.wiphy, freq); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return; + + if (elems.mesh_id && elems.mesh_config && + mesh_matches_local(&elems, sdata)) { + supp_rates = ieee80211_sta_get_rates(local, &elems, band); + + mesh_neighbour_update(mgmt->sa, supp_rates, sdata, + mesh_peer_accepts_plinks(&elems)); + } +} + +static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee80211_rx_status *rx_status) +{ + switch (mgmt->u.action.category) { + case PLINK_CATEGORY: + mesh_rx_plink_frame(sdata, mgmt, len, rx_status); + break; + case MESH_PATH_SEL_CATEGORY: + mesh_rx_path_sel_frame(sdata, mgmt, len); + break; + } +} + +static void ieee80211_mesh_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb) +{ + struct ieee80211_rx_status *rx_status; + struct ieee80211_if_mesh *ifmsh; + struct ieee80211_mgmt *mgmt; + u16 stype; + + ifmsh = &sdata->u.mesh; + + rx_status = (struct ieee80211_rx_status *) skb->cb; + mgmt = (struct ieee80211_mgmt *) skb->data; + stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; + + switch (stype) { + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_BEACON: + ieee80211_mesh_rx_bcn_presp(sdata, stype, mgmt, skb->len, + rx_status); + break; + case IEEE80211_STYPE_ACTION: + ieee80211_mesh_rx_mgmt_action(sdata, mgmt, skb->len, rx_status); + break; + } + + kfree_skb(skb); +} + +static void ieee80211_mesh_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.mesh.work); + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct sk_buff *skb; + + if (!netif_running(sdata->dev)) + return; + + if (local->sw_scanning || local->hw_scanning) + return; + + while ((skb = skb_dequeue(&ifmsh->skb_queue))) + ieee80211_mesh_rx_queued_mgmt(sdata, skb); + + if (ifmsh->preq_queue_len && + time_after(jiffies, + ifmsh->last_preq + msecs_to_jiffies(ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval))) + mesh_path_start_discovery(sdata); + + if (ifmsh->housekeeping) + ieee80211_mesh_housekeeping(sdata, ifmsh); +} + +void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + if (ieee80211_vif_is_mesh(&sdata->vif)) + queue_work(local->hw.workqueue, &sdata->u.mesh.work); + rcu_read_unlock(); +} + void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - - ifsta->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; - ifsta->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; - ifsta->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; - ifsta->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; - ifsta->mshcfg.dot11MeshTTL = MESH_TTL; - ifsta->mshcfg.auto_open_plinks = true; - ifsta->mshcfg.dot11MeshMaxPeerLinks = + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + + INIT_WORK(&ifmsh->work, ieee80211_mesh_work); + setup_timer(&ifmsh->housekeeping_timer, + ieee80211_mesh_housekeeping_timer, + (unsigned long) sdata); + skb_queue_head_init(&sdata->u.mesh.skb_queue); + + ifmsh->mshcfg.dot11MeshRetryTimeout = MESH_RET_T; + ifmsh->mshcfg.dot11MeshConfirmTimeout = MESH_CONF_T; + ifmsh->mshcfg.dot11MeshHoldingTimeout = MESH_HOLD_T; + ifmsh->mshcfg.dot11MeshMaxRetries = MESH_MAX_RETR; + ifmsh->mshcfg.dot11MeshTTL = MESH_TTL; + ifmsh->mshcfg.auto_open_plinks = true; + ifmsh->mshcfg.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS; - ifsta->mshcfg.dot11MeshHWMPactivePathTimeout = + ifmsh->mshcfg.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT; - ifsta->mshcfg.dot11MeshHWMPpreqMinInterval = + ifmsh->mshcfg.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT; - ifsta->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = + ifmsh->mshcfg.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME; - ifsta->mshcfg.dot11MeshHWMPmaxPREQretries = + ifmsh->mshcfg.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES; - ifsta->mshcfg.path_refresh_time = + ifmsh->mshcfg.path_refresh_time = MESH_PATH_REFRESH_TIME; - ifsta->mshcfg.min_discovery_timeout = + ifmsh->mshcfg.min_discovery_timeout = MESH_MIN_DISCOVERY_TIMEOUT; - ifsta->accepting_plinks = true; - ifsta->preq_id = 0; - ifsta->dsn = 0; - atomic_set(&ifsta->mpaths, 0); - mesh_rmc_init(sdata->dev); - ifsta->last_preq = jiffies; + ifmsh->accepting_plinks = true; + ifmsh->preq_id = 0; + ifmsh->dsn = 0; + atomic_set(&ifmsh->mpaths, 0); + mesh_rmc_init(sdata); + ifmsh->last_preq = jiffies; /* Allocate all mesh structures when creating the first mesh interface. */ if (!mesh_allocated) ieee80211s_init(); - mesh_ids_set_default(ifsta); - setup_timer(&ifsta->mesh_path_timer, + mesh_ids_set_default(ifmsh); + setup_timer(&ifmsh->mesh_path_timer, ieee80211_mesh_path_timer, (unsigned long) sdata); - INIT_LIST_HEAD(&ifsta->preq_queue.list); - spin_lock_init(&ifsta->mesh_preq_queue_lock); + INIT_LIST_HEAD(&ifmsh->preq_queue.list); + spin_lock_init(&ifmsh->mesh_preq_queue_lock); +} + +ieee80211_rx_result +ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; + struct ieee80211_mgmt *mgmt; + u16 fc; + + if (skb->len < 24) + return RX_DROP_MONITOR; + + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = le16_to_cpu(mgmt->frame_control); + + switch (fc & IEEE80211_FCTL_STYPE) { + case IEEE80211_STYPE_PROBE_RESP: + case IEEE80211_STYPE_BEACON: + case IEEE80211_STYPE_ACTION: + memcpy(skb->cb, rx_status, sizeof(*rx_status)); + skb_queue_tail(&ifmsh->skb_queue, skb); + queue_work(local->hw.workqueue, &ifmsh->work); + return RX_QUEUED; + } + + return RX_CONTINUE; } diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7495fbb0d211..e10471c6ba42 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -47,7 +47,7 @@ enum mesh_path_flags { * struct mesh_path - mac80211 mesh path structure * * @dst: mesh path destination mac address - * @dev: mesh path device + * @sdata: mesh subif * @next_hop: mesh neighbor to which frames for this destination will be * forwarded * @timer: mesh path discovery timer @@ -64,14 +64,15 @@ enum mesh_path_flags { * @state_lock: mesh pat state lock * * - * The combination of dst and dev is unique in the mesh path table. Since the + * The combination of dst and sdata is unique in the mesh path table. Since the * next_hop STA is only protected by RCU as well, deleting the STA must also * remove/substitute the mesh_path structure and wait until that is no longer * reachable before destroying the STA completely. */ struct mesh_path { u8 dst[ETH_ALEN]; - struct net_device *dev; + u8 mpp[ETH_ALEN]; /* used for MPP or MAP */ + struct ieee80211_sub_if_data *sdata; struct sta_info *next_hop; struct timer_list timer; struct sk_buff_head frame_queue; @@ -203,67 +204,82 @@ int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); int ieee80211_new_mesh_header(struct ieee80211s_hdr *meshhdr, struct ieee80211_sub_if_data *sdata); int mesh_rmc_check(u8 *addr, struct ieee80211s_hdr *mesh_hdr, - struct net_device *dev); -bool mesh_matches_local(struct ieee802_11_elems *ie, struct net_device *dev); -void mesh_ids_set_default(struct ieee80211_if_sta *sta); -void mesh_mgmt_ies_add(struct sk_buff *skb, struct net_device *dev); -void mesh_rmc_free(struct net_device *dev); -int mesh_rmc_init(struct net_device *dev); + struct ieee80211_sub_if_data *sdata); +bool mesh_matches_local(struct ieee802_11_elems *ie, + struct ieee80211_sub_if_data *sdata); +void mesh_ids_set_default(struct ieee80211_if_mesh *mesh); +void mesh_mgmt_ies_add(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +void mesh_rmc_free(struct ieee80211_sub_if_data *sdata); +int mesh_rmc_init(struct ieee80211_sub_if_data *sdata); void ieee80211s_init(void); void ieee80211s_stop(void); void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata); +ieee80211_rx_result +ieee80211_mesh_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status); +void ieee80211_start_mesh(struct ieee80211_sub_if_data *sdata); +void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata); /* Mesh paths */ -int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev); -void mesh_path_start_discovery(struct net_device *dev); -struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev); -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev); +int mesh_nexthop_lookup(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); +void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup(u8 *dst, + struct ieee80211_sub_if_data *sdata); +struct mesh_path *mpp_path_lookup(u8 *dst, + struct ieee80211_sub_if_data *sdata); +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata); +struct mesh_path *mesh_path_lookup_by_idx(int idx, + struct ieee80211_sub_if_data *sdata); void mesh_path_fix_nexthop(struct mesh_path *mpath, struct sta_info *next_hop); -void mesh_path_expire(struct net_device *dev); -void mesh_path_flush(struct net_device *dev); -void mesh_rx_path_sel_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, - size_t len); -int mesh_path_add(u8 *dst, struct net_device *dev); +void mesh_path_expire(struct ieee80211_sub_if_data *sdata); +void mesh_path_flush(struct ieee80211_sub_if_data *sdata); +void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len); +int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata); /* Mesh plinks */ -void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, - bool add); -bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie, - struct net_device *dev); +void mesh_neighbour_update(u8 *hw_addr, u64 rates, + struct ieee80211_sub_if_data *sdata, bool add); +bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); void mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); void mesh_plink_broken(struct sta_info *sta); void mesh_plink_deactivate(struct sta_info *sta); int mesh_plink_open(struct sta_info *sta); int mesh_plink_close(struct sta_info *sta); void mesh_plink_block(struct sta_info *sta); -void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, - size_t len, struct ieee80211_rx_status *rx_status); +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee80211_rx_status *rx_status); /* Private interfaces */ /* Mesh tables */ struct mesh_table *mesh_table_alloc(int size_order); void mesh_table_free(struct mesh_table *tbl, bool free_leafs); struct mesh_table *mesh_table_grow(struct mesh_table *tbl); -u32 mesh_table_hash(u8 *addr, struct net_device *dev, struct mesh_table *tbl); +u32 mesh_table_hash(u8 *addr, struct ieee80211_sub_if_data *sdata, + struct mesh_table *tbl); /* Mesh paths */ int mesh_path_error_tx(u8 *dest, __le32 dest_dsn, u8 *ra, - struct net_device *dev); + struct ieee80211_sub_if_data *sdata); void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta); void mesh_path_flush_pending(struct mesh_path *mpath); void mesh_path_tx_pending(struct mesh_path *mpath); int mesh_pathtbl_init(void); void mesh_pathtbl_unregister(void); -int mesh_path_del(u8 *addr, struct net_device *dev); +int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata); void mesh_path_timer(unsigned long data); void mesh_path_flush_by_nexthop(struct sta_info *sta); -void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev); +void mesh_path_discard_frame(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata); #ifdef CONFIG_MAC80211_MESH extern int mesh_allocated; static inline int mesh_plink_free_count(struct ieee80211_sub_if_data *sdata) { - return sdata->u.sta.mshcfg.dot11MeshMaxPeerLinks - - atomic_read(&sdata->u.sta.mshstats.estab_plinks); + return sdata->u.mesh.mshcfg.dot11MeshMaxPeerLinks - + atomic_read(&sdata->u.mesh.mshstats.estab_plinks); } static inline bool mesh_plink_availables(struct ieee80211_sub_if_data *sdata) @@ -281,8 +297,12 @@ static inline void mesh_path_activate(struct mesh_path *mpath) for (i = 0; i <= x->hash_mask; i++) \ hlist_for_each_entry_rcu(node, p, &x->hash_buckets[i], list) +void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local); + #else #define mesh_allocated 0 +static inline void +ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {} #endif #endif /* IEEE80211S_H */ diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 08aca446ca01..501c7831adb4 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -64,14 +64,14 @@ static inline u32 u32_field_get(u8 *preq_elem, int offset, bool ae) #define DSN_LT(x, y) ((long) (x) - (long) (y) < 0) #define net_traversal_jiffies(s) \ - msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) + msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime) #define default_lifetime(s) \ - MSEC_TO_TU(s->u.sta.mshcfg.dot11MeshHWMPactivePathTimeout) + MSEC_TO_TU(s->u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout) #define min_preq_int_jiff(s) \ - (msecs_to_jiffies(s->u.sta.mshcfg.dot11MeshHWMPpreqMinInterval)) -#define max_preq_retries(s) (s->u.sta.mshcfg.dot11MeshHWMPmaxPREQretries) + (msecs_to_jiffies(s->u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval)) +#define max_preq_retries(s) (s->u.mesh.mshcfg.dot11MeshHWMPmaxPREQretries) #define disc_timeout_jiff(s) \ - msecs_to_jiffies(sdata->u.sta.mshcfg.min_discovery_timeout) + msecs_to_jiffies(sdata->u.mesh.mshcfg.min_discovery_timeout) enum mpath_frame_type { MPATH_PREQ = 0, @@ -82,9 +82,9 @@ enum mpath_frame_type { static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, u8 *orig_addr, __le32 orig_dsn, u8 dst_flags, u8 *dst, __le32 dst_dsn, u8 *da, u8 hop_count, u8 ttl, __le32 lifetime, - __le32 metric, __le32 preq_id, struct net_device *dev) + __le32 metric, __le32 preq_id, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; u8 *pos; @@ -99,11 +99,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; mgmt->u.action.u.mesh_action.action_code = action; @@ -149,7 +149,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } @@ -161,9 +161,9 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, * @ra: node this frame is addressed to */ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, - struct net_device *dev) + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; u8 *pos; @@ -178,11 +178,11 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.mesh_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.mesh_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, ra, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = MESH_PATH_SEL_CATEGORY; mgmt->u.action.u.mesh_action.action_code = MPATH_PERR; @@ -198,7 +198,7 @@ int mesh_path_error_tx(u8 *dst, __le32 dst_dsn, u8 *ra, pos += ETH_ALEN; memcpy(pos, &dst_dsn, 4); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } @@ -223,7 +223,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ - rate = sband->bitrates[sta->txrate_idx].bitrate; + rate = sband->bitrates[sta->last_txrate_idx].bitrate; tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; @@ -233,7 +233,7 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, /** * hwmp_route_info_get - Update routing info to originator and transmitter * - * @dev: local mesh interface + * @sdata: local mesh subif * @mgmt: mesh management frame * @hwmp_ie: hwmp information element (PREP or PREQ) * @@ -246,11 +246,11 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, * Notes: this function is the only place (besides user-provided info) where * path routing information is updated. */ -static u32 hwmp_route_info_get(struct net_device *dev, +static u32 hwmp_route_info_get(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *hwmp_ie) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct mesh_path *mpath; struct sta_info *sta; bool fresh_info; @@ -301,14 +301,14 @@ static u32 hwmp_route_info_get(struct net_device *dev, new_metric = MAX_METRIC; exp_time = TU_TO_EXP_TIME(orig_lifetime); - if (memcmp(orig_addr, dev->dev_addr, ETH_ALEN) == 0) { + if (memcmp(orig_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { /* This MP is the originator, we are not interested in this * frame, except for updating transmitter's path info. */ process = false; fresh_info = false; } else { - mpath = mesh_path_lookup(orig_addr, dev); + mpath = mesh_path_lookup(orig_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_FIXED) @@ -324,8 +324,8 @@ static u32 hwmp_route_info_get(struct net_device *dev, } } } else { - mesh_path_add(orig_addr, dev); - mpath = mesh_path_lookup(orig_addr, dev); + mesh_path_add(orig_addr, sdata); + mpath = mesh_path_lookup(orig_addr, sdata); if (!mpath) { rcu_read_unlock(); return 0; @@ -357,7 +357,7 @@ static u32 hwmp_route_info_get(struct net_device *dev, else { fresh_info = true; - mpath = mesh_path_lookup(ta, dev); + mpath = mesh_path_lookup(ta, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if ((mpath->flags & MESH_PATH_FIXED) || @@ -365,8 +365,8 @@ static u32 hwmp_route_info_get(struct net_device *dev, (last_hop_metric > mpath->metric))) fresh_info = false; } else { - mesh_path_add(ta, dev); - mpath = mesh_path_lookup(ta, dev); + mesh_path_add(ta, sdata); + mpath = mesh_path_lookup(ta, sdata); if (!mpath) { rcu_read_unlock(); return 0; @@ -392,11 +392,10 @@ static u32 hwmp_route_info_get(struct net_device *dev, return process ? new_metric : 0; } -static void hwmp_preq_frame_process(struct net_device *dev, +static void hwmp_preq_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *preq_elem, u32 metric) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_path *mpath; u8 *dst_addr, *orig_addr; u8 dst_flags, ttl; @@ -411,19 +410,19 @@ static void hwmp_preq_frame_process(struct net_device *dev, orig_dsn = PREQ_IE_ORIG_DSN(preq_elem); dst_flags = PREQ_IE_DST_F(preq_elem); - if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0) { + if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) { forward = false; reply = true; metric = 0; - if (time_after(jiffies, ifsta->last_dsn_update + + if (time_after(jiffies, ifmsh->last_dsn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifsta->last_dsn_update)) { - dst_dsn = ++ifsta->dsn; - ifsta->last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_dsn_update)) { + dst_dsn = ++ifmsh->dsn; + ifmsh->last_dsn_update = jiffies; } } else { rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) { if ((!(mpath->flags & MESH_PATH_DSN_VALID)) || DSN_LT(mpath->dsn, dst_dsn)) { @@ -445,15 +444,15 @@ static void hwmp_preq_frame_process(struct net_device *dev, if (reply) { lifetime = PREQ_IE_LIFETIME(preq_elem); - ttl = ifsta->mshcfg.dot11MeshTTL; + ttl = ifmsh->mshcfg.dot11MeshTTL; if (ttl != 0) mesh_path_sel_frame_tx(MPATH_PREP, 0, dst_addr, cpu_to_le32(dst_dsn), 0, orig_addr, cpu_to_le32(orig_dsn), mgmt->sa, 0, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, dev); + 0, sdata); else - ifsta->mshstats.dropped_frames_ttl++; + ifmsh->mshstats.dropped_frames_ttl++; } if (forward) { @@ -463,7 +462,7 @@ static void hwmp_preq_frame_process(struct net_device *dev, ttl = PREQ_IE_TTL(preq_elem); lifetime = PREQ_IE_LIFETIME(preq_elem); if (ttl <= 1) { - ifsta->mshstats.dropped_frames_ttl++; + ifmsh->mshstats.dropped_frames_ttl++; return; } --ttl; @@ -472,20 +471,19 @@ static void hwmp_preq_frame_process(struct net_device *dev, hopcount = PREQ_IE_HOPCOUNT(preq_elem) + 1; mesh_path_sel_frame_tx(MPATH_PREQ, flags, orig_addr, cpu_to_le32(orig_dsn), dst_flags, dst_addr, - cpu_to_le32(dst_dsn), dev->broadcast, + cpu_to_le32(dst_dsn), sdata->dev->broadcast, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), cpu_to_le32(preq_id), - dev); - ifsta->mshstats.fwded_frames++; + sdata); + ifmsh->mshstats.fwded_frames++; } } -static void hwmp_prep_frame_process(struct net_device *dev, +static void hwmp_prep_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *prep_elem, u32 metric) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mesh_path *mpath; u8 *dst_addr, *orig_addr; u8 ttl, hopcount, flags; @@ -499,18 +497,18 @@ static void hwmp_prep_frame_process(struct net_device *dev, * replies */ dst_addr = PREP_IE_DST_ADDR(prep_elem); - if (memcmp(dst_addr, dev->dev_addr, ETH_ALEN) == 0) + if (memcmp(dst_addr, sdata->dev->dev_addr, ETH_ALEN) == 0) /* destination, no forwarding required */ return; ttl = PREP_IE_TTL(prep_elem); if (ttl <= 1) { - sdata->u.sta.mshstats.dropped_frames_ttl++; + sdata->u.mesh.mshstats.dropped_frames_ttl++; return; } rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) spin_lock_bh(&mpath->state_lock); else @@ -519,7 +517,7 @@ static void hwmp_prep_frame_process(struct net_device *dev, spin_unlock_bh(&mpath->state_lock); goto fail; } - memcpy(next_hop, mpath->next_hop->addr, ETH_ALEN); + memcpy(next_hop, mpath->next_hop->sta.addr, ETH_ALEN); spin_unlock_bh(&mpath->state_lock); --ttl; flags = PREP_IE_FLAGS(prep_elem); @@ -531,20 +529,20 @@ static void hwmp_prep_frame_process(struct net_device *dev, mesh_path_sel_frame_tx(MPATH_PREP, flags, orig_addr, cpu_to_le32(orig_dsn), 0, dst_addr, - cpu_to_le32(dst_dsn), mpath->next_hop->addr, hopcount, ttl, + cpu_to_le32(dst_dsn), mpath->next_hop->sta.addr, hopcount, ttl, cpu_to_le32(lifetime), cpu_to_le32(metric), - 0, dev); + 0, sdata); rcu_read_unlock(); - sdata->u.sta.mshstats.fwded_frames++; + sdata->u.mesh.mshstats.fwded_frames++; return; fail: rcu_read_unlock(); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; return; } -static void hwmp_perr_frame_process(struct net_device *dev, +static void hwmp_perr_frame_process(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, u8 *perr_elem) { struct mesh_path *mpath; @@ -555,18 +553,18 @@ static void hwmp_perr_frame_process(struct net_device *dev, dst_addr = PERR_IE_DST_ADDR(perr_elem); dst_dsn = PERR_IE_DST_DSN(perr_elem); rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (mpath) { spin_lock_bh(&mpath->state_lock); if (mpath->flags & MESH_PATH_ACTIVE && - memcmp(ta, mpath->next_hop->addr, ETH_ALEN) == 0 && + memcmp(ta, mpath->next_hop->sta.addr, ETH_ALEN) == 0 && (!(mpath->flags & MESH_PATH_DSN_VALID) || DSN_GT(dst_dsn, mpath->dsn))) { mpath->flags &= ~MESH_PATH_ACTIVE; mpath->dsn = dst_dsn; spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(dst_addr, cpu_to_le32(dst_dsn), - dev->broadcast, dev); + sdata->dev->broadcast, sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -575,7 +573,7 @@ static void hwmp_perr_frame_process(struct net_device *dev, -void mesh_rx_path_sel_frame(struct net_device *dev, +void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { @@ -583,6 +581,10 @@ void mesh_rx_path_sel_frame(struct net_device *dev, size_t baselen; u32 last_hop_metric; + /* need action_code */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + return; + baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, len - baselen, &elems); @@ -592,25 +594,25 @@ void mesh_rx_path_sel_frame(struct net_device *dev, if (!elems.preq || elems.preq_len != 37) /* Right now we support just 1 destination and no AE */ return; - last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.preq); + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.preq); if (!last_hop_metric) return; - hwmp_preq_frame_process(dev, mgmt, elems.preq, last_hop_metric); + hwmp_preq_frame_process(sdata, mgmt, elems.preq, last_hop_metric); break; case MPATH_PREP: if (!elems.prep || elems.prep_len != 31) /* Right now we support no AE */ return; - last_hop_metric = hwmp_route_info_get(dev, mgmt, elems.prep); + last_hop_metric = hwmp_route_info_get(sdata, mgmt, elems.prep); if (!last_hop_metric) return; - hwmp_prep_frame_process(dev, mgmt, elems.prep, last_hop_metric); + hwmp_prep_frame_process(sdata, mgmt, elems.prep, last_hop_metric); break; case MPATH_PERR: if (!elems.perr || elems.perr_len != 12) /* Right now we support only one destination per PERR */ return; - hwmp_perr_frame_process(dev, mgmt, elems.perr); + hwmp_perr_frame_process(sdata, mgmt, elems.perr); default: return; } @@ -628,9 +630,8 @@ void mesh_rx_path_sel_frame(struct net_device *dev, */ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) { - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(mpath->dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_sub_if_data *sdata = mpath->sdata; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; preq_node = kmalloc(sizeof(struct mesh_preq_queue), GFP_KERNEL); @@ -639,9 +640,9 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) return; } - spin_lock(&ifsta->mesh_preq_queue_lock); - if (ifsta->preq_queue_len == MAX_PREQ_QUEUE_LEN) { - spin_unlock(&ifsta->mesh_preq_queue_lock); + spin_lock(&ifmsh->mesh_preq_queue_lock); + if (ifmsh->preq_queue_len == MAX_PREQ_QUEUE_LEN) { + spin_unlock(&ifmsh->mesh_preq_queue_lock); kfree(preq_node); if (printk_ratelimit()) printk(KERN_DEBUG "Mesh HWMP: PREQ node queue full\n"); @@ -651,55 +652,53 @@ static void mesh_queue_preq(struct mesh_path *mpath, u8 flags) memcpy(preq_node->dst, mpath->dst, ETH_ALEN); preq_node->flags = flags; - list_add_tail(&preq_node->list, &ifsta->preq_queue.list); - ++ifsta->preq_queue_len; - spin_unlock(&ifsta->mesh_preq_queue_lock); + list_add_tail(&preq_node->list, &ifmsh->preq_queue.list); + ++ifmsh->preq_queue_len; + spin_unlock(&ifmsh->mesh_preq_queue_lock); - if (time_after(jiffies, ifsta->last_preq + min_preq_int_jiff(sdata))) - queue_work(sdata->local->hw.workqueue, &ifsta->work); + if (time_after(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) + queue_work(sdata->local->hw.workqueue, &ifmsh->work); - else if (time_before(jiffies, ifsta->last_preq)) { + else if (time_before(jiffies, ifmsh->last_preq)) { /* avoid long wait if did not send preqs for a long time * and jiffies wrapped around */ - ifsta->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; - queue_work(sdata->local->hw.workqueue, &ifsta->work); + ifmsh->last_preq = jiffies - min_preq_int_jiff(sdata) - 1; + queue_work(sdata->local->hw.workqueue, &ifmsh->work); } else - mod_timer(&ifsta->mesh_path_timer, ifsta->last_preq + + mod_timer(&ifmsh->mesh_path_timer, ifmsh->last_preq + min_preq_int_jiff(sdata)); } /** * mesh_path_start_discovery - launch a path discovery from the PREQ queue * - * @dev: local mesh interface + * @sdata: local mesh subif */ -void mesh_path_start_discovery(struct net_device *dev) +void mesh_path_start_discovery(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; + struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct mesh_preq_queue *preq_node; struct mesh_path *mpath; u8 ttl, dst_flags; u32 lifetime; - spin_lock(&ifsta->mesh_preq_queue_lock); - if (!ifsta->preq_queue_len || - time_before(jiffies, ifsta->last_preq + + spin_lock(&ifmsh->mesh_preq_queue_lock); + if (!ifmsh->preq_queue_len || + time_before(jiffies, ifmsh->last_preq + min_preq_int_jiff(sdata))) { - spin_unlock(&ifsta->mesh_preq_queue_lock); + spin_unlock(&ifmsh->mesh_preq_queue_lock); return; } - preq_node = list_first_entry(&ifsta->preq_queue.list, + preq_node = list_first_entry(&ifmsh->preq_queue.list, struct mesh_preq_queue, list); list_del(&preq_node->list); - --ifsta->preq_queue_len; - spin_unlock(&ifsta->mesh_preq_queue_lock); + --ifmsh->preq_queue_len; + spin_unlock(&ifmsh->mesh_preq_queue_lock); rcu_read_lock(); - mpath = mesh_path_lookup(preq_node->dst, dev); + mpath = mesh_path_lookup(preq_node->dst, sdata); if (!mpath) goto enddiscovery; @@ -721,18 +720,18 @@ void mesh_path_start_discovery(struct net_device *dev) goto enddiscovery; } - ifsta->last_preq = jiffies; + ifmsh->last_preq = jiffies; - if (time_after(jiffies, ifsta->last_dsn_update + + if (time_after(jiffies, ifmsh->last_dsn_update + net_traversal_jiffies(sdata)) || - time_before(jiffies, ifsta->last_dsn_update)) { - ++ifsta->dsn; - sdata->u.sta.last_dsn_update = jiffies; + time_before(jiffies, ifmsh->last_dsn_update)) { + ++ifmsh->dsn; + sdata->u.mesh.last_dsn_update = jiffies; } lifetime = default_lifetime(sdata); - ttl = sdata->u.sta.mshcfg.dot11MeshTTL; + ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; if (ttl == 0) { - sdata->u.sta.mshstats.dropped_frames_ttl++; + sdata->u.mesh.mshstats.dropped_frames_ttl++; spin_unlock_bh(&mpath->state_lock); goto enddiscovery; } @@ -743,11 +742,11 @@ void mesh_path_start_discovery(struct net_device *dev) dst_flags = MP_F_RF; spin_unlock_bh(&mpath->state_lock); - mesh_path_sel_frame_tx(MPATH_PREQ, 0, dev->dev_addr, - cpu_to_le32(ifsta->dsn), dst_flags, mpath->dst, - cpu_to_le32(mpath->dsn), dev->broadcast, 0, + mesh_path_sel_frame_tx(MPATH_PREQ, 0, sdata->dev->dev_addr, + cpu_to_le32(ifmsh->dsn), dst_flags, mpath->dst, + cpu_to_le32(mpath->dsn), sdata->dev->broadcast, 0, ttl, cpu_to_le32(lifetime), 0, - cpu_to_le32(ifsta->preq_id++), dev); + cpu_to_le32(ifmsh->preq_id++), sdata); mod_timer(&mpath->timer, jiffies + mpath->discovery_timeout); enddiscovery: @@ -759,7 +758,7 @@ enddiscovery: * ieee80211s_lookup_nexthop - put the appropriate next hop on a mesh frame * * @skb: 802.11 frame to be sent - * @dev: network device the frame will be sent through + * @sdata: network subif the frame will be sent through * @fwd_frame: true if this frame was originally from a different host * * Returns: 0 if the next hop was found. Nonzero otherwise. If no next hop is @@ -767,9 +766,9 @@ enddiscovery: * sent when the path is resolved. This means the caller must not free the skb * in this case. */ -int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev) +int mesh_nexthop_lookup(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct sk_buff *skb_to_free = NULL; struct mesh_path *mpath; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; @@ -777,14 +776,14 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev) int err = 0; rcu_read_lock(); - mpath = mesh_path_lookup(dst_addr, dev); + mpath = mesh_path_lookup(dst_addr, sdata); if (!mpath) { - mesh_path_add(dst_addr, dev); - mpath = mesh_path_lookup(dst_addr, dev); + mesh_path_add(dst_addr, sdata); + mpath = mesh_path_lookup(dst_addr, sdata); if (!mpath) { dev_kfree_skb(skb); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; err = -ENOSPC; goto endlookup; } @@ -792,14 +791,15 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev) if (mpath->flags & MESH_PATH_ACTIVE) { if (time_after(jiffies, mpath->exp_time - - msecs_to_jiffies(sdata->u.sta.mshcfg.path_refresh_time)) - && !memcmp(dev->dev_addr, hdr->addr4, ETH_ALEN) + msecs_to_jiffies(sdata->u.mesh.mshcfg.path_refresh_time)) + && !memcmp(sdata->dev->dev_addr, hdr->addr4, + ETH_ALEN) && !(mpath->flags & MESH_PATH_RESOLVING) && !(mpath->flags & MESH_PATH_FIXED)) { mesh_queue_preq(mpath, PREQ_Q_F_START | PREQ_Q_F_REFRESH); } - memcpy(hdr->addr1, mpath->next_hop->addr, + memcpy(hdr->addr1, mpath->next_hop->sta.addr, ETH_ALEN); } else { if (!(mpath->flags & MESH_PATH_RESOLVING)) { @@ -815,7 +815,7 @@ int mesh_nexthop_lookup(struct sk_buff *skb, struct net_device *dev) skb_queue_tail(&mpath->frame_queue, skb); if (skb_to_free) - mesh_path_discard_frame(skb_to_free, dev); + mesh_path_discard_frame(skb_to_free, sdata); err = -ENOENT; } @@ -835,7 +835,7 @@ void mesh_path_timer(unsigned long data) if (!mpath) goto endmpathtimer; spin_lock_bh(&mpath->state_lock); - sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev); + sdata = mpath->sdata; if (mpath->flags & MESH_PATH_RESOLVED || (!(mpath->flags & MESH_PATH_RESOLVING))) mpath->flags &= ~(MESH_PATH_RESOLVING | MESH_PATH_RESOLVED); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 838ee60492ad..3c72557df45a 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -9,7 +9,6 @@ #include <linux/etherdevice.h> #include <linux/list.h> -#include <linux/netdevice.h> #include <linux/random.h> #include <linux/spinlock.h> #include <linux/string.h> @@ -37,6 +36,7 @@ struct mpath_node { }; static struct mesh_table *mesh_paths; +static struct mesh_table *mpp_paths; /* Store paths for MPP&MAP */ /* This lock will have the grow table function as writer and add / delete nodes * as readers. When reading the table (i.e. doing lookups) we are well protected @@ -62,13 +62,13 @@ void mesh_path_assign_nexthop(struct mesh_path *mpath, struct sta_info *sta) /** * mesh_path_lookup - look up a path in the mesh path table * @dst: hardware address (ETH_ALEN length) of destination - * @dev: local interface + * @sdata: local subif * * Returns: pointer to the mesh path structure, or NULL if not found * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) +struct mesh_path *mesh_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct hlist_node *n; @@ -78,10 +78,10 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) tbl = rcu_dereference(mesh_paths); - bucket = &tbl->hash_buckets[mesh_table_hash(dst, dev, tbl)]; + bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; hlist_for_each_entry_rcu(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) { if (MPATH_EXPIRED(mpath)) { spin_lock_bh(&mpath->state_lock); @@ -95,16 +95,44 @@ struct mesh_path *mesh_path_lookup(u8 *dst, struct net_device *dev) return NULL; } +struct mesh_path *mpp_path_lookup(u8 *dst, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath; + struct hlist_node *n; + struct hlist_head *bucket; + struct mesh_table *tbl; + struct mpath_node *node; + + tbl = rcu_dereference(mpp_paths); + + bucket = &tbl->hash_buckets[mesh_table_hash(dst, sdata, tbl)]; + hlist_for_each_entry_rcu(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && + memcmp(dst, mpath->dst, ETH_ALEN) == 0) { + if (MPATH_EXPIRED(mpath)) { + spin_lock_bh(&mpath->state_lock); + if (MPATH_EXPIRED(mpath)) + mpath->flags &= ~MESH_PATH_ACTIVE; + spin_unlock_bh(&mpath->state_lock); + } + return mpath; + } + } + return NULL; +} + + /** * mesh_path_lookup_by_idx - look up a path in the mesh path table by its index * @idx: index - * @dev: local interface, or NULL for all entries + * @sdata: local subif, or NULL for all entries * * Returns: pointer to the mesh path structure, or NULL if not found. * * Locking: must be called within a read rcu section. */ -struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) +struct mesh_path *mesh_path_lookup_by_idx(int idx, struct ieee80211_sub_if_data *sdata) { struct mpath_node *node; struct hlist_node *p; @@ -112,7 +140,7 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) int j = 0; for_each_mesh_entry(mesh_paths, p, node, i) { - if (dev && node->mpath->dev != dev) + if (sdata && node->mpath->sdata != sdata) continue; if (j++ == idx) { if (MPATH_EXPIRED(node->mpath)) { @@ -131,15 +159,14 @@ struct mesh_path *mesh_path_lookup_by_idx(int idx, struct net_device *dev) /** * mesh_path_add - allocate and add a new path to the mesh path table * @addr: destination address of the path (ETH_ALEN length) - * @dev: local interface + * @sdata: local subif * * Returns: 0 on sucess * * State: the initial state of the new path is set to 0 */ -int mesh_path_add(u8 *dst, struct net_device *dev) +int mesh_path_add(u8 *dst, struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct mesh_path *mpath, *new_mpath; struct mpath_node *node, *new_node; struct hlist_head *bucket; @@ -148,14 +175,14 @@ int mesh_path_add(u8 *dst, struct net_device *dev) int err = 0; u32 hash_idx; - if (memcmp(dst, dev->dev_addr, ETH_ALEN) == 0) + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) /* never add ourselves as neighbours */ return -ENOTSUPP; if (is_multicast_ether_addr(dst)) return -ENOTSUPP; - if (atomic_add_unless(&sdata->u.sta.mpaths, 1, MESH_MAX_MPATHS) == 0) + if (atomic_add_unless(&sdata->u.mesh.mpaths, 1, MESH_MAX_MPATHS) == 0) return -ENOSPC; err = -ENOMEM; @@ -169,7 +196,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev) read_lock(&pathtbl_resize_lock); memcpy(new_mpath->dst, dst, ETH_ALEN); - new_mpath->dev = dev; + new_mpath->sdata = sdata; new_mpath->flags = 0; skb_queue_head_init(&new_mpath->frame_queue); new_node->mpath = new_mpath; @@ -179,7 +206,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev) spin_lock_init(&new_mpath->state_lock); init_timer(&new_mpath->timer); - hash_idx = mesh_table_hash(dst, dev, mesh_paths); + hash_idx = mesh_table_hash(dst, sdata, mesh_paths); bucket = &mesh_paths->hash_buckets[hash_idx]; spin_lock(&mesh_paths->hashwlock[hash_idx]); @@ -187,7 +214,7 @@ int mesh_path_add(u8 *dst, struct net_device *dev) err = -EEXIST; hlist_for_each_entry(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) goto err_exists; } @@ -223,7 +250,92 @@ err_exists: err_node_alloc: kfree(new_mpath); err_path_alloc: - atomic_dec(&sdata->u.sta.mpaths); + atomic_dec(&sdata->u.mesh.mpaths); + return err; +} + + +int mpp_path_add(u8 *dst, u8 *mpp, struct ieee80211_sub_if_data *sdata) +{ + struct mesh_path *mpath, *new_mpath; + struct mpath_node *node, *new_node; + struct hlist_head *bucket; + struct hlist_node *n; + int grow = 0; + int err = 0; + u32 hash_idx; + + + if (memcmp(dst, sdata->dev->dev_addr, ETH_ALEN) == 0) + /* never add ourselves as neighbours */ + return -ENOTSUPP; + + if (is_multicast_ether_addr(dst)) + return -ENOTSUPP; + + err = -ENOMEM; + new_mpath = kzalloc(sizeof(struct mesh_path), GFP_KERNEL); + if (!new_mpath) + goto err_path_alloc; + + new_node = kmalloc(sizeof(struct mpath_node), GFP_KERNEL); + if (!new_node) + goto err_node_alloc; + + read_lock(&pathtbl_resize_lock); + memcpy(new_mpath->dst, dst, ETH_ALEN); + memcpy(new_mpath->mpp, mpp, ETH_ALEN); + new_mpath->sdata = sdata; + new_mpath->flags = 0; + skb_queue_head_init(&new_mpath->frame_queue); + new_node->mpath = new_mpath; + new_mpath->exp_time = jiffies; + spin_lock_init(&new_mpath->state_lock); + + hash_idx = mesh_table_hash(dst, sdata, mpp_paths); + bucket = &mpp_paths->hash_buckets[hash_idx]; + + spin_lock(&mpp_paths->hashwlock[hash_idx]); + + err = -EEXIST; + hlist_for_each_entry(node, n, bucket, list) { + mpath = node->mpath; + if (mpath->sdata == sdata && memcmp(dst, mpath->dst, ETH_ALEN) == 0) + goto err_exists; + } + + hlist_add_head_rcu(&new_node->list, bucket); + if (atomic_inc_return(&mpp_paths->entries) >= + mpp_paths->mean_chain_len * (mpp_paths->hash_mask + 1)) + grow = 1; + + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + if (grow) { + struct mesh_table *oldtbl, *newtbl; + + write_lock(&pathtbl_resize_lock); + oldtbl = mpp_paths; + newtbl = mesh_table_grow(mpp_paths); + if (!newtbl) { + write_unlock(&pathtbl_resize_lock); + return 0; + } + rcu_assign_pointer(mpp_paths, newtbl); + write_unlock(&pathtbl_resize_lock); + + synchronize_rcu(); + mesh_table_free(oldtbl, false); + } + return 0; + +err_exists: + spin_unlock(&mpp_paths->hashwlock[hash_idx]); + read_unlock(&pathtbl_resize_lock); + kfree(new_node); +err_node_alloc: + kfree(new_mpath); +err_path_alloc: return err; } @@ -241,7 +353,7 @@ void mesh_plink_broken(struct sta_info *sta) struct mesh_path *mpath; struct mpath_node *node; struct hlist_node *p; - struct net_device *dev = sta->sdata->dev; + struct ieee80211_sub_if_data *sdata = sta->sdata; int i; rcu_read_lock(); @@ -256,7 +368,7 @@ void mesh_plink_broken(struct sta_info *sta) spin_unlock_bh(&mpath->state_lock); mesh_path_error_tx(mpath->dst, cpu_to_le32(mpath->dsn), - dev->broadcast, dev); + sdata->dev->broadcast, sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -284,11 +396,11 @@ void mesh_path_flush_by_nexthop(struct sta_info *sta) for_each_mesh_entry(mesh_paths, p, node, i) { mpath = node->mpath; if (mpath->next_hop == sta) - mesh_path_del(mpath->dst, mpath->dev); + mesh_path_del(mpath->dst, mpath->sdata); } } -void mesh_path_flush(struct net_device *dev) +void mesh_path_flush(struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -297,19 +409,18 @@ void mesh_path_flush(struct net_device *dev) for_each_mesh_entry(mesh_paths, p, node, i) { mpath = node->mpath; - if (mpath->dev == dev) - mesh_path_del(mpath->dst, mpath->dev); + if (mpath->sdata == sdata) + mesh_path_del(mpath->dst, mpath->sdata); } } static void mesh_path_node_reclaim(struct rcu_head *rp) { struct mpath_node *node = container_of(rp, struct mpath_node, rcu); - struct ieee80211_sub_if_data *sdata = - IEEE80211_DEV_TO_SUB_IF(node->mpath->dev); + struct ieee80211_sub_if_data *sdata = node->mpath->sdata; del_timer_sync(&node->mpath->timer); - atomic_dec(&sdata->u.sta.mpaths); + atomic_dec(&sdata->u.mesh.mpaths); kfree(node->mpath); kfree(node); } @@ -318,11 +429,11 @@ static void mesh_path_node_reclaim(struct rcu_head *rp) * mesh_path_del - delete a mesh path from the table * * @addr: dst address (ETH_ALEN length) - * @dev: local interface + * @sdata: local subif * * Returns: 0 if succesful */ -int mesh_path_del(u8 *addr, struct net_device *dev) +int mesh_path_del(u8 *addr, struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -332,13 +443,13 @@ int mesh_path_del(u8 *addr, struct net_device *dev) int err = 0; read_lock(&pathtbl_resize_lock); - hash_idx = mesh_table_hash(addr, dev, mesh_paths); + hash_idx = mesh_table_hash(addr, sdata, mesh_paths); bucket = &mesh_paths->hash_buckets[hash_idx]; spin_lock(&mesh_paths->hashwlock[hash_idx]); hlist_for_each_entry(node, n, bucket, list) { mpath = node->mpath; - if (mpath->dev == dev && + if (mpath->sdata == sdata && memcmp(addr, mpath->dst, ETH_ALEN) == 0) { spin_lock_bh(&mpath->state_lock); mpath->flags |= MESH_PATH_RESOLVING; @@ -378,33 +489,33 @@ void mesh_path_tx_pending(struct mesh_path *mpath) * mesh_path_discard_frame - discard a frame whose path could not be resolved * * @skb: frame to discard - * @dev: network device the frame was to be sent through + * @sdata: network subif the frame was to be sent through * * If the frame was beign forwarded from another MP, a PERR frame will be sent * to the precursor. * * Locking: the function must me called within a rcu_read_lock region */ -void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev) +void mesh_path_discard_frame(struct sk_buff *skb, + struct ieee80211_sub_if_data *sdata) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct mesh_path *mpath; u32 dsn = 0; - if (memcmp(hdr->addr4, dev->dev_addr, ETH_ALEN) != 0) { + if (memcmp(hdr->addr4, sdata->dev->dev_addr, ETH_ALEN) != 0) { u8 *ra, *da; da = hdr->addr3; ra = hdr->addr2; - mpath = mesh_path_lookup(da, dev); + mpath = mesh_path_lookup(da, sdata); if (mpath) dsn = ++mpath->dsn; - mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, dev); + mesh_path_error_tx(skb->data, cpu_to_le32(dsn), ra, sdata); } kfree_skb(skb); - sdata->u.sta.mshstats.dropped_frames_no_route++; + sdata->u.mesh.mshstats.dropped_frames_no_route++; } /** @@ -416,14 +527,11 @@ void mesh_path_discard_frame(struct sk_buff *skb, struct net_device *dev) */ void mesh_path_flush_pending(struct mesh_path *mpath) { - struct ieee80211_sub_if_data *sdata; struct sk_buff *skb; - sdata = IEEE80211_DEV_TO_SUB_IF(mpath->dev); - while ((skb = skb_dequeue(&mpath->frame_queue)) && (mpath->flags & MESH_PATH_ACTIVE)) - mesh_path_discard_frame(skb, mpath->dev); + mesh_path_discard_frame(skb, mpath->sdata); } /** @@ -472,7 +580,7 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) node = hlist_entry(p, struct mpath_node, list); mpath = node->mpath; new_node->mpath = mpath; - hash_idx = mesh_table_hash(mpath->dst, mpath->dev, newtbl); + hash_idx = mesh_table_hash(mpath->dst, mpath->sdata, newtbl); hlist_add_head(&new_node->list, &newtbl->hash_buckets[hash_idx]); return 0; @@ -481,15 +589,25 @@ static int mesh_path_node_copy(struct hlist_node *p, struct mesh_table *newtbl) int mesh_pathtbl_init(void) { mesh_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mesh_paths) + return -ENOMEM; mesh_paths->free_node = &mesh_path_node_free; mesh_paths->copy_node = &mesh_path_node_copy; mesh_paths->mean_chain_len = MEAN_CHAIN_LEN; - if (!mesh_paths) + + mpp_paths = mesh_table_alloc(INIT_PATHS_SIZE_ORDER); + if (!mpp_paths) { + mesh_table_free(mesh_paths, true); return -ENOMEM; + } + mpp_paths->free_node = &mesh_path_node_free; + mpp_paths->copy_node = &mesh_path_node_copy; + mpp_paths->mean_chain_len = MEAN_CHAIN_LEN; + return 0; } -void mesh_path_expire(struct net_device *dev) +void mesh_path_expire(struct ieee80211_sub_if_data *sdata) { struct mesh_path *mpath; struct mpath_node *node; @@ -498,7 +616,7 @@ void mesh_path_expire(struct net_device *dev) read_lock(&pathtbl_resize_lock); for_each_mesh_entry(mesh_paths, p, node, i) { - if (node->mpath->dev != dev) + if (node->mpath->sdata != sdata) continue; mpath = node->mpath; spin_lock_bh(&mpath->state_lock); @@ -507,7 +625,7 @@ void mesh_path_expire(struct net_device *dev) time_after(jiffies, mpath->exp_time + MESH_PATH_EXPIRE)) { spin_unlock_bh(&mpath->state_lock); - mesh_path_del(mpath->dst, mpath->dev); + mesh_path_del(mpath->dst, mpath->sdata); } else spin_unlock_bh(&mpath->state_lock); } @@ -517,4 +635,5 @@ void mesh_path_expire(struct net_device *dev) void mesh_pathtbl_unregister(void) { mesh_table_free(mesh_paths, true); + mesh_table_free(mpp_paths, true); } diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 9efeb1f07025..faac101c0f85 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -36,11 +36,11 @@ #define MESH_SECURITY_AUTHENTICATION_IMPOSSIBLE 9 #define MESH_SECURITY_FAILED_VERIFICATION 10 -#define dot11MeshMaxRetries(s) (s->u.sta.mshcfg.dot11MeshMaxRetries) -#define dot11MeshRetryTimeout(s) (s->u.sta.mshcfg.dot11MeshRetryTimeout) -#define dot11MeshConfirmTimeout(s) (s->u.sta.mshcfg.dot11MeshConfirmTimeout) -#define dot11MeshHoldingTimeout(s) (s->u.sta.mshcfg.dot11MeshHoldingTimeout) -#define dot11MeshMaxPeerLinks(s) (s->u.sta.mshcfg.dot11MeshMaxPeerLinks) +#define dot11MeshMaxRetries(s) (s->u.mesh.mshcfg.dot11MeshMaxRetries) +#define dot11MeshRetryTimeout(s) (s->u.mesh.mshcfg.dot11MeshRetryTimeout) +#define dot11MeshConfirmTimeout(s) (s->u.mesh.mshcfg.dot11MeshConfirmTimeout) +#define dot11MeshHoldingTimeout(s) (s->u.mesh.mshcfg.dot11MeshHoldingTimeout) +#define dot11MeshMaxPeerLinks(s) (s->u.mesh.mshcfg.dot11MeshMaxPeerLinks) enum plink_frame_type { PLINK_OPEN = 0, @@ -63,14 +63,14 @@ enum plink_event { static inline void mesh_plink_inc_estab_count(struct ieee80211_sub_if_data *sdata) { - atomic_inc(&sdata->u.sta.mshstats.estab_plinks); + atomic_inc(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); } static inline void mesh_plink_dec_estab_count(struct ieee80211_sub_if_data *sdata) { - atomic_dec(&sdata->u.sta.mshstats.estab_plinks); + atomic_dec(&sdata->u.mesh.mshstats.estab_plinks); mesh_accept_plinks_update(sdata); } @@ -106,7 +106,7 @@ static struct sta_info *mesh_plink_alloc(struct ieee80211_sub_if_data *sdata, return NULL; sta->flags = WLAN_STA_AUTHORIZED; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; return sta; } @@ -144,10 +144,10 @@ void mesh_plink_deactivate(struct sta_info *sta) spin_unlock_bh(&sta->lock); } -static int mesh_plink_frame_tx(struct net_device *dev, +static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum plink_frame_type action, u8 *da, __le16 llid, __le16 plid, __le16 reason) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb = dev_alloc_skb(local->hw.extra_tx_headroom + 400); struct ieee80211_mgmt *mgmt; bool include_plid = false; @@ -163,10 +163,10 @@ static int mesh_plink_frame_tx(struct net_device *dev, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 25 + sizeof(mgmt->u.action.u.plink_action)); memset(mgmt, 0, 25 + sizeof(mgmt->u.action.u.plink_action)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); /* BSSID is left zeroed, wildcard value */ mgmt->u.action.category = PLINK_CATEGORY; mgmt->u.action.u.plink_action.action_code = action; @@ -180,7 +180,7 @@ static int mesh_plink_frame_tx(struct net_device *dev, /* two-byte status code followed by two-byte AID */ memset(pos, 0, 4); } - mesh_mgmt_ies_add(skb, dev); + mesh_mgmt_ies_add(skb, sdata); } /* Add Peer Link Management element */ @@ -217,15 +217,14 @@ static int mesh_plink_frame_tx(struct net_device *dev, memcpy(pos, &reason, 2); } - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); return 0; } -void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, +void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct ieee80211_sub_if_data *sdata, bool peer_accepting_plinks) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; rcu_read_lock(); @@ -244,10 +243,10 @@ void mesh_neighbour_update(u8 *hw_addr, u64 rates, struct net_device *dev, } sta->last_rx = jiffies; - sta->supp_rates[local->hw.conf.channel->band] = rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; if (peer_accepting_plinks && sta->plink_state == PLINK_LISTEN && - sdata->u.sta.accepting_plinks && - sdata->u.sta.mshcfg.auto_open_plinks) + sdata->u.mesh.accepting_plinks && + sdata->u.mesh.mshcfg.auto_open_plinks) mesh_plink_open(sta); rcu_read_unlock(); @@ -257,7 +256,6 @@ static void mesh_plink_timer(unsigned long data) { struct sta_info *sta; __le16 llid, plid, reason; - struct net_device *dev = NULL; struct ieee80211_sub_if_data *sdata; #ifdef CONFIG_MAC80211_VERBOSE_MPL_DEBUG DECLARE_MAC_BUF(mac); @@ -277,12 +275,11 @@ static void mesh_plink_timer(unsigned long data) return; } mpl_dbg("Mesh plink timer for %s fired on state %d\n", - print_mac(mac, sta->addr), sta->plink_state); + print_mac(mac, sta->sta.addr), sta->plink_state); reason = 0; llid = sta->llid; plid = sta->plid; sdata = sta->sdata; - dev = sdata->dev; switch (sta->plink_state) { case PLINK_OPN_RCVD: @@ -291,7 +288,7 @@ static void mesh_plink_timer(unsigned long data) if (sta->plink_retries < dot11MeshMaxRetries(sdata)) { u32 rand; mpl_dbg("Mesh plink for %s (retry, timeout): %d %d\n", - print_mac(mac, sta->addr), + print_mac(mac, sta->sta.addr), sta->plink_retries, sta->plink_timeout); get_random_bytes(&rand, sizeof(u32)); sta->plink_timeout = sta->plink_timeout + @@ -299,7 +296,7 @@ static void mesh_plink_timer(unsigned long data) ++sta->plink_retries; mod_plink_timer(sta, sta->plink_timeout); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); break; } @@ -312,7 +309,7 @@ static void mesh_plink_timer(unsigned long data) sta->plink_state = PLINK_HOLDING; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, plid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case PLINK_HOLDING: @@ -355,10 +352,10 @@ int mesh_plink_open(struct sta_info *sta) mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink: starting establishment with %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); - return mesh_plink_frame_tx(sdata->dev, PLINK_OPEN, - sta->addr, llid, 0, 0); + return mesh_plink_frame_tx(sdata, PLINK_OPEN, + sta->sta.addr, llid, 0, 0); } void mesh_plink_block(struct sta_info *sta) @@ -382,7 +379,7 @@ int mesh_plink_close(struct sta_info *sta) #endif mpl_dbg("Mesh plink: closing link with %s\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); spin_lock_bh(&sta->lock); sta->reason = cpu_to_le16(MESH_LINK_CANCELLED); reason = sta->reason; @@ -403,15 +400,14 @@ int mesh_plink_close(struct sta_info *sta) llid = sta->llid; plid = sta->plid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(sta->sdata->dev, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sta->sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); return 0; } -void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, +void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; struct ieee802_11_elems elems; struct sta_info *sta; @@ -425,6 +421,10 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, DECLARE_MAC_BUF(mac); #endif + /* need action_code, aux */ + if (len < IEEE80211_MIN_ACTION_SIZE + 3) + return; + if (is_multicast_ether_addr(mgmt->da)) { mpl_dbg("Mesh plink: ignore frame from multicast address"); return; @@ -478,7 +478,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, /* Now we will figure out the appropriate event... */ event = PLINK_UNDEFINED; - if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, dev))) { + if (ftype != PLINK_CLOSE && (!mesh_matches_local(&elems, sdata))) { switch (ftype) { case PLINK_OPEN: event = OPN_RJCT; @@ -577,9 +577,9 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->llid = llid; mesh_plink_timer_set(sta, dot11MeshRetryTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_OPEN, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_OPEN, sta->sta.addr, llid, 0, 0); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -604,7 +604,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: @@ -613,7 +613,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, sta->plid = plid; llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: @@ -646,13 +646,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; case CNF_ACPT: @@ -661,7 +661,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); break; default: spin_unlock_bh(&sta->lock); @@ -685,7 +685,7 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: @@ -694,8 +694,8 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, mesh_plink_inc_estab_count(sdata); spin_unlock_bh(&sta->lock); mpl_dbg("Mesh plink with %s ESTABLISHED\n", - print_mac(mac, sta->addr)); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + print_mac(mac, sta->sta.addr)); + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -714,13 +714,13 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, llid = sta->llid; mod_plink_timer(sta, dot11MeshHoldingTimeout(sdata)); spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, llid, plid, reason); break; case OPN_ACPT: llid = sta->llid; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CONFIRM, sta->addr, llid, + mesh_plink_frame_tx(sdata, PLINK_CONFIRM, sta->sta.addr, llid, plid, 0); break; default: @@ -743,8 +743,8 @@ void mesh_rx_plink_frame(struct net_device *dev, struct ieee80211_mgmt *mgmt, llid = sta->llid; reason = sta->reason; spin_unlock_bh(&sta->lock); - mesh_plink_frame_tx(dev, PLINK_CLOSE, sta->addr, llid, - plid, reason); + mesh_plink_frame_tx(sdata, PLINK_CLOSE, sta->sta.addr, + llid, plid, reason); break; default: spin_unlock_bh(&sta->lock); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 902cac1bd246..409bb7716236 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -11,11 +11,6 @@ * published by the Free Software Foundation. */ -/* TODO: - * order BSS list by RSSI(?) ("quality of AP") - * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, - * SSID) - */ #include <linux/delay.h> #include <linux/if_ether.h> #include <linux/skbuff.h> @@ -26,607 +21,184 @@ #include <linux/etherdevice.h> #include <linux/rtnetlink.h> #include <net/iw_handler.h> -#include <asm/types.h> - #include <net/mac80211.h> +#include <asm/unaligned.h> + #include "ieee80211_i.h" #include "rate.h" #include "led.h" -#include "mesh.h" +#define IEEE80211_ASSOC_SCANS_MAX_TRIES 2 #define IEEE80211_AUTH_TIMEOUT (HZ / 5) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) #define IEEE80211_ASSOC_MAX_TRIES 3 #define IEEE80211_MONITORING_INTERVAL (2 * HZ) -#define IEEE80211_MESH_HOUSEKEEPING_INTERVAL (60 * HZ) #define IEEE80211_PROBE_INTERVAL (60 * HZ) #define IEEE80211_RETRY_AUTH_INTERVAL (1 * HZ) #define IEEE80211_SCAN_INTERVAL (2 * HZ) #define IEEE80211_SCAN_INTERVAL_SLOW (15 * HZ) #define IEEE80211_IBSS_JOIN_TIMEOUT (7 * HZ) -#define IEEE80211_PROBE_DELAY (HZ / 33) -#define IEEE80211_CHANNEL_TIME (HZ / 33) -#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) -#define IEEE80211_SCAN_RESULT_EXPIRE (10 * HZ) #define IEEE80211_IBSS_MERGE_INTERVAL (30 * HZ) #define IEEE80211_IBSS_INACTIVITY_LIMIT (60 * HZ) -#define IEEE80211_MESH_PEER_INACTIVITY_LIMIT (1800 * HZ) #define IEEE80211_IBSS_MAX_STA_ENTRIES 128 -#define ERP_INFO_USE_PROTECTION BIT(1) - -/* mgmt header + 1 byte action code */ -#define IEEE80211_MIN_ACTION_SIZE (24 + 1) - -#define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 -#define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C -#define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFA0 -#define IEEE80211_DELBA_PARAM_TID_MASK 0xF000 -#define IEEE80211_DELBA_PARAM_INITIATOR_MASK 0x0800 - -/* next values represent the buffer size for A-MPDU frame. - * According to IEEE802.11n spec size varies from 8K to 64K (in powers of 2) */ -#define IEEE80211_MIN_AMPDU_BUF 0x8 -#define IEEE80211_MAX_AMPDU_BUF 0x40 - -static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, - u8 *ssid, size_t ssid_len); -static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len); -static void ieee80211_rx_bss_put(struct ieee80211_local *local, - struct ieee80211_sta_bss *bss); -static int ieee80211_sta_find_ibss(struct net_device *dev, - struct ieee80211_if_sta *ifsta); -static int ieee80211_sta_wep_configured(struct net_device *dev); -static int ieee80211_sta_start_scan(struct net_device *dev, - u8 *ssid, size_t ssid_len); -static int ieee80211_sta_config_auth(struct net_device *dev, - struct ieee80211_if_sta *ifsta); -static void sta_rx_agg_session_timer_expired(unsigned long data); - - -void ieee802_11_parse_elems(u8 *start, size_t len, - struct ieee802_11_elems *elems) -{ - size_t left = len; - u8 *pos = start; - - memset(elems, 0, sizeof(*elems)); - - while (left >= 2) { - u8 id, elen; - - id = *pos++; - elen = *pos++; - left -= 2; - - if (elen > left) - return; - - switch (id) { - case WLAN_EID_SSID: - elems->ssid = pos; - elems->ssid_len = elen; - break; - case WLAN_EID_SUPP_RATES: - elems->supp_rates = pos; - elems->supp_rates_len = elen; - break; - case WLAN_EID_FH_PARAMS: - elems->fh_params = pos; - elems->fh_params_len = elen; - break; - case WLAN_EID_DS_PARAMS: - elems->ds_params = pos; - elems->ds_params_len = elen; - break; - case WLAN_EID_CF_PARAMS: - elems->cf_params = pos; - elems->cf_params_len = elen; - break; - case WLAN_EID_TIM: - elems->tim = pos; - elems->tim_len = elen; - break; - case WLAN_EID_IBSS_PARAMS: - elems->ibss_params = pos; - elems->ibss_params_len = elen; - break; - case WLAN_EID_CHALLENGE: - elems->challenge = pos; - elems->challenge_len = elen; - break; - case WLAN_EID_WPA: - if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && - pos[2] == 0xf2) { - /* Microsoft OUI (00:50:F2) */ - if (pos[3] == 1) { - /* OUI Type 1 - WPA IE */ - elems->wpa = pos; - elems->wpa_len = elen; - } else if (elen >= 5 && pos[3] == 2) { - if (pos[4] == 0) { - elems->wmm_info = pos; - elems->wmm_info_len = elen; - } else if (pos[4] == 1) { - elems->wmm_param = pos; - elems->wmm_param_len = elen; - } - } - } - break; - case WLAN_EID_RSN: - elems->rsn = pos; - elems->rsn_len = elen; - break; - case WLAN_EID_ERP_INFO: - elems->erp_info = pos; - elems->erp_info_len = elen; - break; - case WLAN_EID_EXT_SUPP_RATES: - elems->ext_supp_rates = pos; - elems->ext_supp_rates_len = elen; - break; - case WLAN_EID_HT_CAPABILITY: - elems->ht_cap_elem = pos; - elems->ht_cap_elem_len = elen; - break; - case WLAN_EID_HT_EXTRA_INFO: - elems->ht_info_elem = pos; - elems->ht_info_elem_len = elen; - break; - case WLAN_EID_MESH_ID: - elems->mesh_id = pos; - elems->mesh_id_len = elen; - break; - case WLAN_EID_MESH_CONFIG: - elems->mesh_config = pos; - elems->mesh_config_len = elen; - break; - case WLAN_EID_PEER_LINK: - elems->peer_link = pos; - elems->peer_link_len = elen; - break; - case WLAN_EID_PREQ: - elems->preq = pos; - elems->preq_len = elen; - break; - case WLAN_EID_PREP: - elems->prep = pos; - elems->prep_len = elen; - break; - case WLAN_EID_PERR: - elems->perr = pos; - elems->perr_len = elen; - break; - case WLAN_EID_CHANNEL_SWITCH: - elems->ch_switch_elem = pos; - elems->ch_switch_elem_len = elen; - break; - case WLAN_EID_QUIET: - if (!elems->quiet_elem) { - elems->quiet_elem = pos; - elems->quiet_elem_len = elen; - } - elems->num_of_quiet_elem++; - break; - case WLAN_EID_COUNTRY: - elems->country_elem = pos; - elems->country_elem_len = elen; - break; - case WLAN_EID_PWR_CONSTRAINT: - elems->pwr_constr_elem = pos; - elems->pwr_constr_elem_len = elen; - break; - default: - break; - } - - left -= elen; - pos += elen; - } -} - - +/* utils */ static int ecw2cw(int ecw) { return (1 << ecw) - 1; } - -static void ieee80211_sta_def_wmm_params(struct net_device *dev, - struct ieee80211_sta_bss *bss, - int ibss) +static u8 *ieee80211_bss_get_ie(struct ieee80211_bss *bss, u8 ie) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - int i, have_higher_than_11mbit = 0; - - - /* cf. IEEE 802.11 9.2.12 */ - for (i = 0; i < bss->supp_rates_len; i++) - if ((bss->supp_rates[i] & 0x7f) * 5 > 110) - have_higher_than_11mbit = 1; + u8 *end, *pos; - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && - have_higher_than_11mbit) - sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; - else - sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - - - if (local->ops->conf_tx) { - struct ieee80211_tx_queue_params qparam; - - memset(&qparam, 0, sizeof(qparam)); - - qparam.aifs = 2; - - if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && - !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)) - qparam.cw_min = 31; - else - qparam.cw_min = 15; - - qparam.cw_max = 1023; - qparam.txop = 0; - - for (i = 0; i < local_to_hw(local)->queues; i++) - local->ops->conf_tx(local_to_hw(local), i, &qparam); - } -} - -static void ieee80211_sta_wmm_params(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - u8 *wmm_param, size_t wmm_param_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_tx_queue_params params; - size_t left; - int count; - u8 *pos; - - if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) - return; - - if (!wmm_param) - return; - - if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) - return; - count = wmm_param[6] & 0x0f; - if (count == ifsta->wmm_last_param_set) - return; - ifsta->wmm_last_param_set = count; - - pos = wmm_param + 8; - left = wmm_param_len - 8; - - memset(¶ms, 0, sizeof(params)); - - if (!local->ops->conf_tx) - return; - - local->wmm_acm = 0; - for (; left >= 4; left -= 4, pos += 4) { - int aci = (pos[0] >> 5) & 0x03; - int acm = (pos[0] >> 4) & 0x01; - int queue; + pos = bss->ies; + if (pos == NULL) + return NULL; + end = pos + bss->ies_len; - switch (aci) { - case 1: - queue = 3; - if (acm) - local->wmm_acm |= BIT(0) | BIT(3); - break; - case 2: - queue = 1; - if (acm) - local->wmm_acm |= BIT(4) | BIT(5); - break; - case 3: - queue = 0; - if (acm) - local->wmm_acm |= BIT(6) | BIT(7); + while (pos + 1 < end) { + if (pos + 2 + pos[1] > end) break; - case 0: - default: - queue = 2; - if (acm) - local->wmm_acm |= BIT(1) | BIT(2); - break; - } - - params.aifs = pos[0] & 0x0f; - params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); - params.cw_min = ecw2cw(pos[1] & 0x0f); - params.txop = get_unaligned_le16(pos + 2); -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " - "cWmin=%d cWmax=%d txop=%d\n", - dev->name, queue, aci, acm, params.aifs, params.cw_min, - params.cw_max, params.txop); -#endif - /* TODO: handle ACM (block TX, fallback to next lowest allowed - * AC for now) */ - if (local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { - printk(KERN_DEBUG "%s: failed to set TX queue " - "parameters for queue %d\n", dev->name, queue); - } + if (pos[0] == ie) + return pos; + pos += 2 + pos[1]; } -} - -static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, - bool use_protection, - bool use_short_preamble) -{ - struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - DECLARE_MAC_BUF(mac); -#endif - u32 changed = 0; - if (use_protection != bss_conf->use_cts_prot) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" - "%s)\n", - sdata->dev->name, - use_protection ? "enabled" : "disabled", - print_mac(mac, ifsta->bssid)); - } -#endif - bss_conf->use_cts_prot = use_protection; - changed |= BSS_CHANGED_ERP_CTS_PROT; - } - - if (use_short_preamble != bss_conf->use_short_preamble) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: switched to %s barker preamble" - " (BSSID=%s)\n", - sdata->dev->name, - use_short_preamble ? "short" : "long", - print_mac(mac, ifsta->bssid)); - } -#endif - bss_conf->use_short_preamble = use_short_preamble; - changed |= BSS_CHANGED_ERP_PREAMBLE; - } - - return changed; -} - -static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, - u8 erp_value) -{ - bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; - bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; - - return ieee80211_handle_protect_preamb(sdata, - use_protection, use_short_preamble); + return NULL; } -static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta_bss *bss) +static int ieee80211_compatible_rates(struct ieee80211_bss *bss, + struct ieee80211_supported_band *sband, + u64 *rates) { - u32 changed = 0; + int i, j, count; + *rates = 0; + count = 0; + for (i = 0; i < bss->supp_rates_len; i++) { + int rate = (bss->supp_rates[i] & 0x7F) * 5; - if (bss->has_erp_value) - changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value); - else { - u16 capab = bss->capability; - changed |= ieee80211_handle_protect_preamb(sdata, false, - (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); + for (j = 0; j < sband->n_bitrates; j++) + if (sband->bitrates[j].bitrate == rate) { + *rates |= BIT(j); + count++; + break; + } } - return changed; -} - -int ieee80211_ht_cap_ie_to_ht_info(struct ieee80211_ht_cap *ht_cap_ie, - struct ieee80211_ht_info *ht_info) -{ - - if (ht_info == NULL) - return -EINVAL; - - memset(ht_info, 0, sizeof(*ht_info)); - - if (ht_cap_ie) { - u8 ampdu_info = ht_cap_ie->ampdu_params_info; - - ht_info->ht_supported = 1; - ht_info->cap = le16_to_cpu(ht_cap_ie->cap_info); - ht_info->ampdu_factor = - ampdu_info & IEEE80211_HT_CAP_AMPDU_FACTOR; - ht_info->ampdu_density = - (ampdu_info & IEEE80211_HT_CAP_AMPDU_DENSITY) >> 2; - memcpy(ht_info->supp_mcs_set, ht_cap_ie->supp_mcs_set, 16); - } else - ht_info->ht_supported = 0; - - return 0; + return count; } -int ieee80211_ht_addt_info_ie_to_ht_bss_info( - struct ieee80211_ht_addt_info *ht_add_info_ie, - struct ieee80211_ht_bss_info *bss_info) +/* also used by mesh code */ +u64 ieee80211_sta_get_rates(struct ieee80211_local *local, + struct ieee802_11_elems *elems, + enum ieee80211_band band) { - if (bss_info == NULL) - return -EINVAL; - - memset(bss_info, 0, sizeof(*bss_info)); - - if (ht_add_info_ie) { - u16 op_mode; - op_mode = le16_to_cpu(ht_add_info_ie->operation_mode); + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + size_t num_rates; + u64 supp_rates; + int i, j; + sband = local->hw.wiphy->bands[band]; - bss_info->primary_channel = ht_add_info_ie->control_chan; - bss_info->bss_cap = ht_add_info_ie->ht_param; - bss_info->bss_op_mode = (u8)(op_mode & 0xff); + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; } - return 0; + bitrates = sband->bitrates; + num_rates = sband->n_bitrates; + supp_rates = 0; + for (i = 0; i < elems->supp_rates_len + + elems->ext_supp_rates_len; i++) { + u8 rate = 0; + int own_rate; + if (i < elems->supp_rates_len) + rate = elems->supp_rates[i]; + else if (elems->ext_supp_rates) + rate = elems->ext_supp_rates + [i - elems->supp_rates_len]; + own_rate = 5 * (rate & 0x7f); + for (j = 0; j < num_rates; j++) + if (bitrates[j].bitrate == own_rate) + supp_rates |= BIT(j); + } + return supp_rates; } -static void ieee80211_sta_send_associnfo(struct net_device *dev, - struct ieee80211_if_sta *ifsta) +/* frame sending functions */ + +/* also used by scanning code */ +void ieee80211_send_probe_req(struct ieee80211_sub_if_data *sdata, u8 *dst, + u8 *ssid, size_t ssid_len) { - char *buf; - size_t len; + struct ieee80211_local *local = sdata->local; + struct ieee80211_supported_band *sband; + struct sk_buff *skb; + struct ieee80211_mgmt *mgmt; + u8 *pos, *supp_rates, *esupp_rates = NULL; int i; - union iwreq_data wrqu; - if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) - return; - - buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + - ifsta->assocresp_ies_len), GFP_KERNEL); - if (!buf) + skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for probe " + "request\n", sdata->dev->name); return; - - len = sprintf(buf, "ASSOCINFO("); - if (ifsta->assocreq_ies) { - len += sprintf(buf + len, "ReqIEs="); - for (i = 0; i < ifsta->assocreq_ies_len; i++) { - len += sprintf(buf + len, "%02x", - ifsta->assocreq_ies[i]); - } - } - if (ifsta->assocresp_ies) { - if (ifsta->assocreq_ies) - len += sprintf(buf + len, " "); - len += sprintf(buf + len, "RespIEs="); - for (i = 0; i < ifsta->assocresp_ies_len; i++) { - len += sprintf(buf + len, "%02x", - ifsta->assocresp_ies[i]); - } } - len += sprintf(buf + len, ")"); + skb_reserve(skb, local->hw.extra_tx_headroom); - if (len > IW_CUSTOM_MAX) { - len = sprintf(buf, "ASSOCRESPIE="); - for (i = 0; i < ifsta->assocresp_ies_len; i++) { - len += sprintf(buf + len, "%02x", - ifsta->assocresp_ies[i]); - } + mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); + memset(mgmt, 0, 24); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_REQ); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); + if (dst) { + memcpy(mgmt->da, dst, ETH_ALEN); + memcpy(mgmt->bssid, dst, ETH_ALEN); + } else { + memset(mgmt->da, 0xff, ETH_ALEN); + memset(mgmt->bssid, 0xff, ETH_ALEN); } + pos = skb_put(skb, 2 + ssid_len); + *pos++ = WLAN_EID_SSID; + *pos++ = ssid_len; + memcpy(pos, ssid, ssid_len); - memset(&wrqu, 0, sizeof(wrqu)); - wrqu.data.length = len; - wireless_send_event(dev, IWEVCUSTOM, &wrqu, buf); - - kfree(buf); -} - - -static void ieee80211_set_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - bool assoc) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = sdata->local; - struct ieee80211_conf *conf = &local_to_hw(local)->conf; - union iwreq_data wrqu; - u32 changed = BSS_CHANGED_ASSOC; - - if (assoc) { - struct ieee80211_sta_bss *bss; - - ifsta->flags |= IEEE80211_STA_ASSOCIATED; - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return; - - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, - conf->channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (bss) { - /* set timing information */ - sdata->bss_conf.beacon_int = bss->beacon_int; - sdata->bss_conf.timestamp = bss->timestamp; - sdata->bss_conf.dtim_period = bss->dtim_period; - - changed |= ieee80211_handle_bss_capability(sdata, bss); - - ieee80211_rx_bss_put(local, bss); - } + supp_rates = skb_put(skb, 2); + supp_rates[0] = WLAN_EID_SUPP_RATES; + supp_rates[1] = 0; + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { - changed |= BSS_CHANGED_HT; - sdata->bss_conf.assoc_ht = 1; - sdata->bss_conf.ht_conf = &conf->ht_conf; - sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; + for (i = 0; i < sband->n_bitrates; i++) { + struct ieee80211_rate *rate = &sband->bitrates[i]; + if (esupp_rates) { + pos = skb_put(skb, 1); + esupp_rates[1]++; + } else if (supp_rates[1] == 8) { + esupp_rates = skb_put(skb, 3); + esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; + esupp_rates[1] = 1; + pos = &esupp_rates[2]; + } else { + pos = skb_put(skb, 1); + supp_rates[1]++; } - - ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; - memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); - memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); - ieee80211_sta_send_associnfo(dev, ifsta); - } else { - netif_carrier_off(dev); - ieee80211_sta_tear_down_BA_sessions(dev, ifsta->bssid); - ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - changed |= ieee80211_reset_erp_info(dev); - - sdata->bss_conf.assoc_ht = 0; - sdata->bss_conf.ht_conf = NULL; - sdata->bss_conf.ht_bss_conf = NULL; - - memset(wrqu.ap_addr.sa_data, 0, ETH_ALEN); + *pos = rate->bitrate / 5; } - ifsta->last_probe = jiffies; - ieee80211_led_assoc(local, assoc); - - sdata->bss_conf.assoc = assoc; - ieee80211_bss_info_change_notify(sdata, changed); - if (assoc) - netif_carrier_on(dev); - - wrqu.ap_addr.sa_family = ARPHRD_ETHER; - wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); + ieee80211_tx_skb(sdata, skb, 0); } -static void ieee80211_set_disassoc(struct net_device *dev, - struct ieee80211_if_sta *ifsta, int deauth) -{ - if (deauth) - ifsta->auth_tries = 0; - ifsta->assoc_tries = 0; - ieee80211_set_associated(dev, ifsta, 0); -} - -void ieee80211_sta_tx(struct net_device *dev, struct sk_buff *skb, - int encrypt) -{ - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - skb->dev = sdata->local->mdev; - skb_set_mac_header(skb, 0); - skb_set_network_header(skb, 0); - skb_set_transport_header(skb, 0); - - skb->iif = sdata->dev->ifindex; - skb->do_not_encrypt = !encrypt; - - dev_queue_xmit(skb); -} - - -static void ieee80211_send_auth(struct net_device *dev, +static void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, int transaction, u8 *extra, size_t extra_len, int encrypt) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; @@ -634,19 +206,19 @@ static void ieee80211_send_auth(struct net_device *dev, sizeof(*mgmt) + 6 + extra_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for auth " - "frame\n", dev->name); + "frame\n", sdata->dev->name); return; } skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + 6); memset(mgmt, 0, 24 + 6); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_AUTH); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_AUTH); if (encrypt) mgmt->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); mgmt->u.auth.auth_alg = cpu_to_le16(ifsta->auth_alg); mgmt->u.auth.auth_transaction = cpu_to_le16(transaction); @@ -655,64 +227,19 @@ static void ieee80211_send_auth(struct net_device *dev, if (extra) memcpy(skb_put(skb, extra_len), extra, extra_len); - ieee80211_sta_tx(dev, skb, encrypt); -} - - -static void ieee80211_authenticate(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - DECLARE_MAC_BUF(mac); - - ifsta->auth_tries++; - if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { - printk(KERN_DEBUG "%s: authentication with AP %s" - " timed out\n", - dev->name, print_mac(mac, ifsta->bssid)); - ifsta->state = IEEE80211_DISABLED; - return; - } - - ifsta->state = IEEE80211_AUTHENTICATE; - printk(KERN_DEBUG "%s: authenticate with AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); - - ieee80211_send_auth(dev, ifsta, 1, NULL, 0, 0); - - mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); -} - -static int ieee80211_compatible_rates(struct ieee80211_sta_bss *bss, - struct ieee80211_supported_band *sband, - u64 *rates) -{ - int i, j, count; - *rates = 0; - count = 0; - for (i = 0; i < bss->supp_rates_len; i++) { - int rate = (bss->supp_rates[i] & 0x7F) * 5; - - for (j = 0; j < sband->n_bitrates; j++) - if (sband->bitrates[j].bitrate == rate) { - *rates |= BIT(j); - count++; - break; - } - } - - return count; + ieee80211_tx_skb(sdata, skb, encrypt); } -static void ieee80211_send_assoc(struct net_device *dev, +static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; - u8 *pos, *ies; + u8 *pos, *ies, *ht_add_ie; int i, len, count, rates_len, supp_rates_len; u16 capab; - struct ieee80211_sta_bss *bss; + struct ieee80211_bss *bss; int wmm = 0; struct ieee80211_supported_band *sband; u64 rates = 0; @@ -722,7 +249,7 @@ static void ieee80211_send_assoc(struct net_device *dev, ifsta->ssid_len); if (!skb) { printk(KERN_DEBUG "%s: failed to allocate buffer for assoc " - "frame\n", dev->name); + "frame\n", sdata->dev->name); return; } skb_reserve(skb, local->hw.extra_tx_headroom); @@ -738,13 +265,13 @@ static void ieee80211_send_assoc(struct net_device *dev, capab |= WLAN_CAPABILITY_SHORT_PREAMBLE; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifsta->bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (bss) { if (bss->capability & WLAN_CAPABILITY_PRIVACY) capab |= WLAN_CAPABILITY_PRIVACY; - if (bss->wmm_ie) + if (bss->wmm_used) wmm = 1; /* get all rates supported by the device and the AP as @@ -766,13 +293,13 @@ static void ieee80211_send_assoc(struct net_device *dev, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); if (ifsta->flags & IEEE80211_STA_PREV_BSSID_SET) { skb_put(skb, 10); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_REASSOC_REQ); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_REASSOC_REQ); mgmt->u.reassoc_req.capab_info = cpu_to_le16(capab); mgmt->u.reassoc_req.listen_interval = cpu_to_le16(local->hw.conf.listen_interval); @@ -780,8 +307,8 @@ static void ieee80211_send_assoc(struct net_device *dev, ETH_ALEN); } else { skb_put(skb, 4); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ASSOC_REQ); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ASSOC_REQ); mgmt->u.assoc_req.capab_info = cpu_to_le16(capab); mgmt->u.reassoc_req.listen_interval = cpu_to_le16(local->hw.conf.listen_interval); @@ -866,9 +393,10 @@ static void ieee80211_send_assoc(struct net_device *dev, /* wmm support is a must to HT */ if (wmm && (ifsta->flags & IEEE80211_STA_WMM_ENABLED) && - sband->ht_info.ht_supported && bss->ht_add_ie) { + sband->ht_info.ht_supported && + (ht_add_ie = ieee80211_bss_get_ie(bss, WLAN_EID_HT_EXTRA_INFO))) { struct ieee80211_ht_addt_info *ht_add_info = - (struct ieee80211_ht_addt_info *)bss->ht_add_ie; + (struct ieee80211_ht_addt_info *)ht_add_ie; u16 cap = sband->ht_info.cap; __le16 tmp; u32 flags = local->hw.conf.channel->flags; @@ -907,21 +435,22 @@ static void ieee80211_send_assoc(struct net_device *dev, if (ifsta->assocreq_ies) memcpy(ifsta->assocreq_ies, ies, ifsta->assocreq_ies_len); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); } -static void ieee80211_send_deauth(struct net_device *dev, - struct ieee80211_if_sta *ifsta, u16 reason) +static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata, + u16 stype, u16 reason) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for deauth " - "frame\n", dev->name); + printk(KERN_DEBUG "%s: failed to allocate buffer for " + "deauth/disassoc frame\n", sdata->dev->name); return; } skb_reserve(skb, local->hw.extra_tx_headroom); @@ -929,940 +458,594 @@ static void ieee80211_send_deauth(struct net_device *dev, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); memset(mgmt, 0, 24); memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_DEAUTH); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | stype); skb_put(skb, 2); + /* u.deauth.reason_code == u.disassoc.reason_code */ mgmt->u.deauth.reason_code = cpu_to_le16(reason); - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); } - -static void ieee80211_send_disassoc(struct net_device *dev, - struct ieee80211_if_sta *ifsta, u16 reason) +/* MLME */ +static void ieee80211_sta_def_wmm_params(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss *bss) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; + struct ieee80211_local *local = sdata->local; + int i, have_higher_than_11mbit = 0; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt)); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for disassoc " - "frame\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); + /* cf. IEEE 802.11 9.2.12 */ + for (i = 0; i < bss->supp_rates_len; i++) + if ((bss->supp_rates[i] & 0x7f) * 5 > 110) + have_higher_than_11mbit = 1; - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, ifsta->bssid, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_DISASSOC); - skb_put(skb, 2); - mgmt->u.disassoc.reason_code = cpu_to_le16(reason); + if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + have_higher_than_11mbit) + sdata->flags |= IEEE80211_SDATA_OPERATING_GMODE; + else + sdata->flags &= ~IEEE80211_SDATA_OPERATING_GMODE; - ieee80211_sta_tx(dev, skb, 0); + ieee80211_set_wmm_default(sdata); } - -static int ieee80211_privacy_mismatch(struct net_device *dev, - struct ieee80211_if_sta *ifsta) +static void ieee80211_sta_wmm_params(struct ieee80211_local *local, + struct ieee80211_if_sta *ifsta, + u8 *wmm_param, size_t wmm_param_len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - int bss_privacy; - int wep_privacy; - int privacy_invoked; + struct ieee80211_tx_queue_params params; + size_t left; + int count; + u8 *pos; - if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) - return 0; + if (!(ifsta->flags & IEEE80211_STA_WMM_ENABLED)) + return; - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, - local->hw.conf.channel->center_freq, - ifsta->ssid, ifsta->ssid_len); - if (!bss) - return 0; + if (!wmm_param) + return; - bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); - wep_privacy = !!ieee80211_sta_wep_configured(dev); - privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); + if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1) + return; + count = wmm_param[6] & 0x0f; + if (count == ifsta->wmm_last_param_set) + return; + ifsta->wmm_last_param_set = count; - ieee80211_rx_bss_put(local, bss); + pos = wmm_param + 8; + left = wmm_param_len - 8; - if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked)) - return 0; + memset(¶ms, 0, sizeof(params)); - return 1; -} + if (!local->ops->conf_tx) + return; + local->wmm_acm = 0; + for (; left >= 4; left -= 4, pos += 4) { + int aci = (pos[0] >> 5) & 0x03; + int acm = (pos[0] >> 4) & 0x01; + int queue; -static void ieee80211_associate(struct net_device *dev, - struct ieee80211_if_sta *ifsta) + switch (aci) { + case 1: + queue = 3; + if (acm) + local->wmm_acm |= BIT(0) | BIT(3); + break; + case 2: + queue = 1; + if (acm) + local->wmm_acm |= BIT(4) | BIT(5); + break; + case 3: + queue = 0; + if (acm) + local->wmm_acm |= BIT(6) | BIT(7); + break; + case 0: + default: + queue = 2; + if (acm) + local->wmm_acm |= BIT(1) | BIT(2); + break; + } + + params.aifs = pos[0] & 0x0f; + params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4); + params.cw_min = ecw2cw(pos[1] & 0x0f); + params.txop = get_unaligned_le16(pos + 2); +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d " + "cWmin=%d cWmax=%d txop=%d\n", + local->mdev->name, queue, aci, acm, params.aifs, params.cw_min, + params.cw_max, params.txop); +#endif + /* TODO: handle ACM (block TX, fallback to next lowest allowed + * AC for now) */ + if (local->ops->conf_tx(local_to_hw(local), queue, ¶ms)) { + printk(KERN_DEBUG "%s: failed to set TX queue " + "parameters for queue %d\n", local->mdev->name, queue); + } + } +} + +static u32 ieee80211_handle_protect_preamb(struct ieee80211_sub_if_data *sdata, + bool use_protection, + bool use_short_preamble) { + struct ieee80211_bss_conf *bss_conf = &sdata->bss_conf; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + struct ieee80211_if_sta *ifsta = &sdata->u.sta; DECLARE_MAC_BUF(mac); +#endif + u32 changed = 0; - ifsta->assoc_tries++; - if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { - printk(KERN_DEBUG "%s: association with AP %s" - " timed out\n", - dev->name, print_mac(mac, ifsta->bssid)); - ifsta->state = IEEE80211_DISABLED; - return; + if (use_protection != bss_conf->use_cts_prot) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: CTS protection %s (BSSID=" + "%s)\n", + sdata->dev->name, + use_protection ? "enabled" : "disabled", + print_mac(mac, ifsta->bssid)); + } +#endif + bss_conf->use_cts_prot = use_protection; + changed |= BSS_CHANGED_ERP_CTS_PROT; } - ifsta->state = IEEE80211_ASSOCIATE; - printk(KERN_DEBUG "%s: associate with AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); - if (ieee80211_privacy_mismatch(dev, ifsta)) { - printk(KERN_DEBUG "%s: mismatch in privacy configuration and " - "mixed-cell disabled - abort association\n", dev->name); - ifsta->state = IEEE80211_DISABLED; - return; + if (use_short_preamble != bss_conf->use_short_preamble) { +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: switched to %s barker preamble" + " (BSSID=%s)\n", + sdata->dev->name, + use_short_preamble ? "short" : "long", + print_mac(mac, ifsta->bssid)); + } +#endif + bss_conf->use_short_preamble = use_short_preamble; + changed |= BSS_CHANGED_ERP_PREAMBLE; } - ieee80211_send_assoc(dev, ifsta); - - mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); + return changed; } - -static void ieee80211_associated(struct net_device *dev, - struct ieee80211_if_sta *ifsta) +static u32 ieee80211_handle_erp_ie(struct ieee80211_sub_if_data *sdata, + u8 erp_value) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - int disassoc; - DECLARE_MAC_BUF(mac); - - /* TODO: start monitoring current AP signal quality and number of - * missed beacons. Scan other channels every now and then and search - * for better APs. */ - /* TODO: remove expired BSSes */ + bool use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + bool use_short_preamble = (erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0; - ifsta->state = IEEE80211_ASSOCIATED; + return ieee80211_handle_protect_preamb(sdata, + use_protection, use_short_preamble); +} - rcu_read_lock(); +static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss *bss) +{ + u32 changed = 0; - sta = sta_info_get(local, ifsta->bssid); - if (!sta) { - printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", - dev->name, print_mac(mac, ifsta->bssid)); - disassoc = 1; - } else { - disassoc = 0; - if (time_after(jiffies, - sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { - if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { - printk(KERN_DEBUG "%s: No ProbeResp from " - "current AP %s - assume out of " - "range\n", - dev->name, print_mac(mac, ifsta->bssid)); - disassoc = 1; - sta_info_unlink(&sta); - } else - ieee80211_send_probe_req(dev, ifsta->bssid, - local->scan_ssid, - local->scan_ssid_len); - ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; - } else { - ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; - if (time_after(jiffies, ifsta->last_probe + - IEEE80211_PROBE_INTERVAL)) { - ifsta->last_probe = jiffies; - ieee80211_send_probe_req(dev, ifsta->bssid, - ifsta->ssid, - ifsta->ssid_len); - } - } + if (bss->has_erp_value) + changed |= ieee80211_handle_erp_ie(sdata, bss->erp_value); + else { + u16 capab = bss->capability; + changed |= ieee80211_handle_protect_preamb(sdata, false, + (capab & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0); } - rcu_read_unlock(); - - if (disassoc && sta) - sta_info_destroy(sta); - - if (disassoc) { - ifsta->state = IEEE80211_DISABLED; - ieee80211_set_associated(dev, ifsta, 0); - } else { - mod_timer(&ifsta->timer, jiffies + - IEEE80211_MONITORING_INTERVAL); - } + return changed; } +static void ieee80211_sta_send_apinfo(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) +{ + union iwreq_data wrqu; + memset(&wrqu, 0, sizeof(wrqu)); + if (ifsta->flags & IEEE80211_STA_ASSOCIATED) + memcpy(wrqu.ap_addr.sa_data, sdata->u.sta.bssid, ETH_ALEN); + wrqu.ap_addr.sa_family = ARPHRD_ETHER; + wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); +} -static void ieee80211_send_probe_req(struct net_device *dev, u8 *dst, - u8 *ssid, size_t ssid_len) +static void ieee80211_sta_send_associnfo(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_supported_band *sband; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u8 *pos, *supp_rates, *esupp_rates = NULL; + char *buf; + size_t len; int i; + union iwreq_data wrqu; - skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*mgmt) + 200); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for probe " - "request\n", dev->name); + if (!ifsta->assocreq_ies && !ifsta->assocresp_ies) return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_PROBE_REQ); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (dst) { - memcpy(mgmt->da, dst, ETH_ALEN); - memcpy(mgmt->bssid, dst, ETH_ALEN); - } else { - memset(mgmt->da, 0xff, ETH_ALEN); - memset(mgmt->bssid, 0xff, ETH_ALEN); - } - pos = skb_put(skb, 2 + ssid_len); - *pos++ = WLAN_EID_SSID; - *pos++ = ssid_len; - memcpy(pos, ssid, ssid_len); - - supp_rates = skb_put(skb, 2); - supp_rates[0] = WLAN_EID_SUPP_RATES; - supp_rates[1] = 0; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + buf = kmalloc(50 + 2 * (ifsta->assocreq_ies_len + + ifsta->assocresp_ies_len), GFP_KERNEL); + if (!buf) + return; - for (i = 0; i < sband->n_bitrates; i++) { - struct ieee80211_rate *rate = &sband->bitrates[i]; - if (esupp_rates) { - pos = skb_put(skb, 1); - esupp_rates[1]++; - } else if (supp_rates[1] == 8) { - esupp_rates = skb_put(skb, 3); - esupp_rates[0] = WLAN_EID_EXT_SUPP_RATES; - esupp_rates[1] = 1; - pos = &esupp_rates[2]; - } else { - pos = skb_put(skb, 1); - supp_rates[1]++; + len = sprintf(buf, "ASSOCINFO("); + if (ifsta->assocreq_ies) { + len += sprintf(buf + len, "ReqIEs="); + for (i = 0; i < ifsta->assocreq_ies_len; i++) { + len += sprintf(buf + len, "%02x", + ifsta->assocreq_ies[i]); } - *pos = rate->bitrate / 5; } + if (ifsta->assocresp_ies) { + if (ifsta->assocreq_ies) + len += sprintf(buf + len, " "); + len += sprintf(buf + len, "RespIEs="); + for (i = 0; i < ifsta->assocresp_ies_len; i++) { + len += sprintf(buf + len, "%02x", + ifsta->assocresp_ies[i]); + } + } + len += sprintf(buf + len, ")"); - ieee80211_sta_tx(dev, skb, 0); -} + if (len > IW_CUSTOM_MAX) { + len = sprintf(buf, "ASSOCRESPIE="); + for (i = 0; i < ifsta->assocresp_ies_len; i++) { + len += sprintf(buf + len, "%02x", + ifsta->assocresp_ies[i]); + } + } + if (len <= IW_CUSTOM_MAX) { + memset(&wrqu, 0, sizeof(wrqu)); + wrqu.data.length = len; + wireless_send_event(sdata->dev, IWEVCUSTOM, &wrqu, buf); + } -static int ieee80211_sta_wep_configured(struct net_device *dev) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (!sdata || !sdata->default_key || - sdata->default_key->conf.alg != ALG_WEP) - return 0; - return 1; + kfree(buf); } -static void ieee80211_auth_completed(struct net_device *dev, +static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - printk(KERN_DEBUG "%s: authenticated\n", dev->name); - ifsta->flags |= IEEE80211_STA_AUTHENTICATED; - ieee80211_associate(dev, ifsta); -} + struct ieee80211_local *local = sdata->local; + struct ieee80211_conf *conf = &local_to_hw(local)->conf; + u32 changed = BSS_CHANGED_ASSOC; + struct ieee80211_bss *bss; -static void ieee80211_auth_challenge(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - u8 *pos; - struct ieee802_11_elems elems; + ifsta->flags |= IEEE80211_STA_ASSOCIATED; - pos = mgmt->u.auth.variable; - ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); - if (!elems.challenge) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - ieee80211_send_auth(dev, ifsta, 3, elems.challenge - 2, - elems.challenge_len + 2, 1); -} -static void ieee80211_send_addba_resp(struct net_device *dev, u8 *da, u16 tid, - u8 dialog_token, u16 status, u16 policy, - u16 buf_size, u16 timeout) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + conf->channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (bss) { + /* set timing information */ + sdata->bss_conf.beacon_int = bss->beacon_int; + sdata->bss_conf.timestamp = bss->timestamp; + sdata->bss_conf.dtim_period = bss->dtim_period; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + changed |= ieee80211_handle_bss_capability(sdata, bss); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer " - "for addba resp frame\n", dev->name); - return; + ieee80211_rx_bss_put(local, bss); } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); + if (conf->flags & IEEE80211_CONF_SUPPORT_HT_MODE) { + changed |= BSS_CHANGED_HT; + sdata->bss_conf.assoc_ht = 1; + sdata->bss_conf.ht_conf = &conf->ht_conf; + sdata->bss_conf.ht_bss_conf = &conf->ht_bss_conf; + } - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + ifsta->flags |= IEEE80211_STA_PREV_BSSID_SET; + memcpy(ifsta->prev_bssid, sdata->u.sta.bssid, ETH_ALEN); + ieee80211_sta_send_associnfo(sdata, ifsta); - capab = (u16)(policy << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(buf_size << 6); /* bit 15:6 max size of aggregation */ + ifsta->last_probe = jiffies; + ieee80211_led_assoc(local, 1); - mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + sdata->bss_conf.assoc = 1; + /* + * For now just always ask the driver to update the basic rateset + * when we have associated, we aren't checking whether it actually + * changed or not. + */ + changed |= BSS_CHANGED_BASIC_RATES; + ieee80211_bss_info_change_notify(sdata, changed); - ieee80211_sta_tx(dev, skb, 0); + netif_tx_start_all_queues(sdata->dev); + netif_carrier_on(sdata->dev); - return; + ieee80211_sta_send_apinfo(sdata, ifsta); } -void ieee80211_send_addba_request(struct net_device *dev, const u8 *da, - u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout) +static void ieee80211_direct_probe(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 capab; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + DECLARE_MAC_BUF(mac); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for addba request frame\n", dev->name); + ifsta->direct_probe_tries++; + if (ifsta->direct_probe_tries > IEEE80211_AUTH_MAX_TRIES) { + printk(KERN_DEBUG "%s: direct probe to AP %s timed out\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + printk(KERN_DEBUG "%s: direct probe to AP %s try %d\n", + sdata->dev->name, print_mac(mac, ifsta->bssid), + ifsta->direct_probe_tries); - mgmt->u.action.u.addba_req.dialog_token = dialog_token; - capab = (u16)(1 << 1); /* bit 1 aggregation policy */ - capab |= (u16)(tid << 2); /* bit 5:2 TID number */ - capab |= (u16)(agg_size << 6); /* bit 15:6 max size of aggergation */ + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; - mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + set_bit(IEEE80211_STA_REQ_DIRECT_PROBE, &ifsta->request); - mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_req.start_seq_num = - cpu_to_le16(start_seq_num << 4); + /* Direct probe is sent to broadcast address as some APs + * will not answer to direct packet in unassociated state. + */ + ieee80211_send_probe_req(sdata, NULL, + ifsta->ssid, ifsta->ssid_len); - ieee80211_sta_tx(dev, skb, 0); + mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } -static void ieee80211_sta_process_addba_request(struct net_device *dev, - struct ieee80211_mgmt *mgmt, - size_t len) + +static void ieee80211_authenticate(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct ieee80211_conf *conf = &hw->conf; - struct sta_info *sta; - struct tid_ampdu_rx *tid_agg_rx; - u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num, status; - u8 dialog_token; - int ret = -EOPNOTSUPP; DECLARE_MAC_BUF(mac); - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (!sta) { - rcu_read_unlock(); + ifsta->auth_tries++; + if (ifsta->auth_tries > IEEE80211_AUTH_MAX_TRIES) { + printk(KERN_DEBUG "%s: authentication with AP %s" + " timed out\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); - start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; - - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); - ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; - buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; - - status = WLAN_STATUS_REQUEST_DECLINED; - - /* sanity check for incoming parameters: - * check if configuration can support the BA policy - * and if buffer size does not exceeds max value */ - if (((ba_policy != 1) - && (!(conf->ht_conf.cap & IEEE80211_HT_CAP_DELAY_BA))) - || (buf_size > IEEE80211_MAX_AMPDU_BUF)) { - status = WLAN_STATUS_INVALID_QOS_PARAM; -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "AddBA Req with bad params from " - "%s on tid %u. policy %d, buffer size %d\n", - print_mac(mac, mgmt->sa), tid, ba_policy, - buf_size); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end_no_lock; - } - /* determine default buffer size */ - if (buf_size == 0) { - struct ieee80211_supported_band *sband; - - sband = local->hw.wiphy->bands[conf->channel->band]; - buf_size = IEEE80211_MIN_AMPDU_BUF; - buf_size = buf_size << sband->ht_info.ampdu_factor; - } - - - /* examine state machine */ - spin_lock_bh(&sta->lock); - - if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "unexpected AddBA Req from " - "%s on tid %u\n", - print_mac(mac, mgmt->sa), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto end; - } + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + printk(KERN_DEBUG "%s: authenticate with AP %s\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); - /* prepare A-MPDU MLME for Rx aggregation */ - sta->ampdu_mlme.tid_rx[tid] = - kmalloc(sizeof(struct tid_ampdu_rx), GFP_ATOMIC); - if (!sta->ampdu_mlme.tid_rx[tid]) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "allocate rx mlme to tid %d failed\n", - tid); -#endif - goto end; - } - /* rx timer */ - sta->ampdu_mlme.tid_rx[tid]->session_timer.function = - sta_rx_agg_session_timer_expired; - sta->ampdu_mlme.tid_rx[tid]->session_timer.data = - (unsigned long)&sta->timer_to_tid[tid]; - init_timer(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - tid_agg_rx = sta->ampdu_mlme.tid_rx[tid]; - - /* prepare reordering buffer */ - tid_agg_rx->reorder_buf = - kmalloc(buf_size * sizeof(struct sk_buff *), GFP_ATOMIC); - if (!tid_agg_rx->reorder_buf) { -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_ERR "can not allocate reordering buffer " - "to tid %d\n", tid); -#endif - kfree(sta->ampdu_mlme.tid_rx[tid]); - goto end; - } - memset(tid_agg_rx->reorder_buf, 0, - buf_size * sizeof(struct sk_buff *)); - - if (local->ops->ampdu_action) - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_START, - sta->addr, tid, &start_seq_num); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx A-MPDU request on tid %d result %d\n", tid, ret); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (ret) { - kfree(tid_agg_rx->reorder_buf); - kfree(tid_agg_rx); - sta->ampdu_mlme.tid_rx[tid] = NULL; - goto end; - } + ieee80211_send_auth(sdata, ifsta, 1, NULL, 0, 0); - /* change state and send addba resp */ - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL; - tid_agg_rx->dialog_token = dialog_token; - tid_agg_rx->ssn = start_seq_num; - tid_agg_rx->head_seq_num = start_seq_num; - tid_agg_rx->buf_size = buf_size; - tid_agg_rx->timeout = timeout; - tid_agg_rx->stored_mpdu_num = 0; - status = WLAN_STATUS_SUCCESS; -end: - spin_unlock_bh(&sta->lock); - -end_no_lock: - ieee80211_send_addba_resp(sta->sdata->dev, sta->addr, tid, - dialog_token, status, 1, buf_size, timeout); - rcu_read_unlock(); + mod_timer(&ifsta->timer, jiffies + IEEE80211_AUTH_TIMEOUT); } -static void ieee80211_sta_process_addba_resp(struct net_device *dev, - struct ieee80211_mgmt *mgmt, - size_t len) +static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, bool deauth, + bool self_disconnected, u16 reason) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; + struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u16 capab; - u16 tid; - u8 *state; + u32 changed = BSS_CHANGED_ASSOC; rcu_read_lock(); - sta = sta_info_get(local, mgmt->sa); + sta = sta_info_get(local, ifsta->bssid); if (!sta) { rcu_read_unlock(); return; } - capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); - tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; + if (deauth) { + ifsta->direct_probe_tries = 0; + ifsta->auth_tries = 0; + } + ifsta->assoc_scan_tries = 0; + ifsta->assoc_tries = 0; - state = &sta->ampdu_mlme.tid_state_tx[tid]; + netif_tx_stop_all_queues(sdata->dev); + netif_carrier_off(sdata->dev); - spin_lock_bh(&sta->lock); + ieee80211_sta_tear_down_BA_sessions(sdata, sta->sta.addr); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->lock); - goto addba_resp_exit; + if (self_disconnected) { + if (deauth) + ieee80211_send_deauth_disassoc(sdata, + IEEE80211_STYPE_DEAUTH, reason); + else + ieee80211_send_deauth_disassoc(sdata, + IEEE80211_STYPE_DISASSOC, reason); } - if (mgmt->u.action.u.addba_resp.dialog_token != - sta->ampdu_mlme.tid_tx[tid]->dialog_token) { - spin_unlock_bh(&sta->lock); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "wrong addBA response token, tid %d\n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - goto addba_resp_exit; - } + ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; + changed |= ieee80211_reset_erp_info(sdata); - del_timer_sync(&sta->ampdu_mlme.tid_tx[tid]->addba_resp_timer); -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "switched off addBA timer for tid %d \n", tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) - == WLAN_STATUS_SUCCESS) { - *state |= HT_ADDBA_RECEIVED_MSK; - sta->ampdu_mlme.addba_req_num[tid] = 0; + if (sdata->bss_conf.assoc_ht) + changed |= BSS_CHANGED_HT; - if (*state == HT_AGG_STATE_OPERATIONAL) - ieee80211_wake_queue(hw, sta->tid_to_tx_q[tid]); + sdata->bss_conf.assoc_ht = 0; + sdata->bss_conf.ht_conf = NULL; + sdata->bss_conf.ht_bss_conf = NULL; - spin_unlock_bh(&sta->lock); - } else { - sta->ampdu_mlme.addba_req_num[tid]++; - /* this will allow the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, sta->addr, tid, - WLAN_BACK_INITIATOR); - } + ieee80211_led_assoc(local, 0); + sdata->bss_conf.assoc = 0; + + ieee80211_sta_send_apinfo(sdata, ifsta); + + if (self_disconnected) + ifsta->state = IEEE80211_STA_MLME_DISABLED; + + sta_info_unlink(&sta); -addba_resp_exit: rcu_read_unlock(); + + sta_info_destroy(sta); } -void ieee80211_send_delba(struct net_device *dev, const u8 *da, u16 tid, - u16 initiator, u16 reason_code) +static int ieee80211_sta_wep_configured(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct sk_buff *skb; - struct ieee80211_mgmt *mgmt; - u16 params; - - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); - - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer " - "for delba frame\n", dev->name); - return; - } + if (!sdata || !sdata->default_key || + sdata->default_key->conf.alg != ALG_WEP) + return 0; + return 1; +} - skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24); - memset(mgmt, 0, 24); - memcpy(mgmt->da, da, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) - memcpy(mgmt->bssid, dev->dev_addr, ETH_ALEN); - else - memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); +static int ieee80211_privacy_mismatch(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; + int bss_privacy; + int wep_privacy; + int privacy_invoked; - skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); + if (!ifsta || (ifsta->flags & IEEE80211_STA_MIXED_CELL)) + return 0; - mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; - params = (u16)(initiator << 11); /* bit 11 initiator */ - params |= (u16)(tid << 12); /* bit 15:12 TID number */ + bss = ieee80211_rx_bss_get(local, ifsta->bssid, + local->hw.conf.channel->center_freq, + ifsta->ssid, ifsta->ssid_len); + if (!bss) + return 0; - mgmt->u.action.u.delba.params = cpu_to_le16(params); - mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); + bss_privacy = !!(bss->capability & WLAN_CAPABILITY_PRIVACY); + wep_privacy = !!ieee80211_sta_wep_configured(sdata); + privacy_invoked = !!(ifsta->flags & IEEE80211_STA_PRIVACY_INVOKED); - ieee80211_sta_tx(dev, skb, 0); -} + ieee80211_rx_bss_put(local, bss); -void ieee80211_send_bar(struct net_device *dev, u8 *ra, u16 tid, u16 ssn) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_bar *bar; - u16 bar_control = 0; + if ((bss_privacy == wep_privacy) || (bss_privacy == privacy_invoked)) + return 0; - skb = dev_alloc_skb(sizeof(*bar) + local->hw.extra_tx_headroom); - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "bar frame\n", dev->name); - return; - } - skb_reserve(skb, local->hw.extra_tx_headroom); - bar = (struct ieee80211_bar *)skb_put(skb, sizeof(*bar)); - memset(bar, 0, sizeof(*bar)); - bar->frame_control = IEEE80211_FC(IEEE80211_FTYPE_CTL, - IEEE80211_STYPE_BACK_REQ); - memcpy(bar->ra, ra, ETH_ALEN); - memcpy(bar->ta, dev->dev_addr, ETH_ALEN); - bar_control |= (u16)IEEE80211_BAR_CTRL_ACK_POLICY_NORMAL; - bar_control |= (u16)IEEE80211_BAR_CTRL_CBMTID_COMPRESSED_BA; - bar_control |= (u16)(tid << 12); - bar->control = cpu_to_le16(bar_control); - bar->start_seq_num = cpu_to_le16(ssn); - - ieee80211_sta_tx(dev, skb, 0); + return 1; } -void ieee80211_sta_stop_rx_ba_session(struct net_device *dev, u8 *ra, u16 tid, - u16 initiator, u16 reason) +static void ieee80211_associate(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - int ret, i; DECLARE_MAC_BUF(mac); - rcu_read_lock(); - - sta = sta_info_get(local, ra); - if (!sta) { - rcu_read_unlock(); + ifsta->assoc_tries++; + if (ifsta->assoc_tries > IEEE80211_ASSOC_MAX_TRIES) { + printk(KERN_DEBUG "%s: association with AP %s" + " timed out\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - /* check if TID is in operational state */ - spin_lock_bh(&sta->lock); - if (sta->ampdu_mlme.tid_state_rx[tid] - != HT_AGG_STATE_OPERATIONAL) { - spin_unlock_bh(&sta->lock); - rcu_read_unlock(); + ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; + printk(KERN_DEBUG "%s: associate with AP %s\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + if (ieee80211_privacy_mismatch(sdata, ifsta)) { + printk(KERN_DEBUG "%s: mismatch in privacy configuration and " + "mixed-cell disabled - abort association\n", sdata->dev->name); + ifsta->state = IEEE80211_STA_MLME_DISABLED; return; } - sta->ampdu_mlme.tid_state_rx[tid] = - HT_AGG_STATE_REQ_STOP_BA_MSK | - (initiator << HT_AGG_STATE_INITIATOR_SHIFT); - spin_unlock_bh(&sta->lock); - - /* stop HW Rx aggregation. ampdu_action existence - * already verified in session init so we add the BUG_ON */ - BUG_ON(!local->ops->ampdu_action); - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "Rx BA session stop requested for %s tid %u\n", - print_mac(mac, ra), tid); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - ret = local->ops->ampdu_action(hw, IEEE80211_AMPDU_RX_STOP, - ra, tid, NULL); - if (ret) - printk(KERN_DEBUG "HW problem - can not stop rx " - "aggregation for tid %d\n", tid); - - /* shutdown timer has not expired */ - if (initiator != WLAN_BACK_TIMER) - del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer); - - /* check if this is a self generated aggregation halt */ - if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER) - ieee80211_send_delba(dev, ra, tid, 0, reason); - - /* free the reordering buffer */ - for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) { - if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) { - /* release the reordered frames */ - dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]); - sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--; - sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL; - } - } - /* free resources */ - kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf); - kfree(sta->ampdu_mlme.tid_rx[tid]); - sta->ampdu_mlme.tid_rx[tid] = NULL; - sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE; - rcu_read_unlock(); + ieee80211_send_assoc(sdata, ifsta); + + mod_timer(&ifsta->timer, jiffies + IEEE80211_ASSOC_TIMEOUT); } -static void ieee80211_sta_process_delba(struct net_device *dev, - struct ieee80211_mgmt *mgmt, size_t len) +static void ieee80211_associated(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; - u16 tid, params; - u16 initiator; + int disassoc; DECLARE_MAC_BUF(mac); - rcu_read_lock(); - - sta = sta_info_get(local, mgmt->sa); - if (!sta) { - rcu_read_unlock(); - return; - } - - params = le16_to_cpu(mgmt->u.action.u.delba.params); - tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; - initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; - -#ifdef CONFIG_MAC80211_HT_DEBUG - if (net_ratelimit()) - printk(KERN_DEBUG "delba from %s (%s) tid %d reason code %d\n", - print_mac(mac, mgmt->sa), - initiator ? "initiator" : "recipient", tid, - mgmt->u.action.u.delba.reason_code); -#endif /* CONFIG_MAC80211_HT_DEBUG */ - - if (initiator == WLAN_BACK_INITIATOR) - ieee80211_sta_stop_rx_ba_session(dev, sta->addr, tid, - WLAN_BACK_INITIATOR, 0); - else { /* WLAN_BACK_RECIPIENT */ - spin_lock_bh(&sta->lock); - sta->ampdu_mlme.tid_state_tx[tid] = - HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(&local->hw, sta->addr, tid, - WLAN_BACK_RECIPIENT); - } - rcu_read_unlock(); -} + /* TODO: start monitoring current AP signal quality and number of + * missed beacons. Scan other channels every now and then and search + * for better APs. */ + /* TODO: remove expired BSSes */ -/* - * After sending add Block Ack request we activated a timer until - * add Block Ack response will arrive from the recipient. - * If this timer expires sta_addba_resp_timer_expired will be executed. - */ -void sta_addba_resp_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and both sta_info and TID are needed, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u16 tid = *(u8 *)data; - struct sta_info *temp_sta = container_of((void *)data, - struct sta_info, timer_to_tid[tid]); - - struct ieee80211_local *local = temp_sta->local; - struct ieee80211_hw *hw = &local->hw; - struct sta_info *sta; - u8 *state; + ifsta->state = IEEE80211_STA_MLME_ASSOCIATED; rcu_read_lock(); - sta = sta_info_get(local, temp_sta->addr); + sta = sta_info_get(local, ifsta->bssid); if (!sta) { - rcu_read_unlock(); - return; - } - - state = &sta->ampdu_mlme.tid_state_tx[tid]; - /* check if the TID waits for addBA response */ - spin_lock_bh(&sta->lock); - if (!(*state & HT_ADDBA_REQUESTED_MSK)) { - spin_unlock_bh(&sta->lock); - *state = HT_AGG_STATE_IDLE; -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "timer expired on tid %d but we are not " - "expecting addBA response there", tid); -#endif - goto timer_expired_exit; + printk(KERN_DEBUG "%s: No STA entry for own AP %s\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + disassoc = 1; + } else { + disassoc = 0; + if (time_after(jiffies, + sta->last_rx + IEEE80211_MONITORING_INTERVAL)) { + if (ifsta->flags & IEEE80211_STA_PROBEREQ_POLL) { + printk(KERN_DEBUG "%s: No ProbeResp from " + "current AP %s - assume out of " + "range\n", + sdata->dev->name, print_mac(mac, ifsta->bssid)); + disassoc = 1; + } else + ieee80211_send_probe_req(sdata, ifsta->bssid, + ifsta->ssid, + ifsta->ssid_len); + ifsta->flags ^= IEEE80211_STA_PROBEREQ_POLL; + } else { + ifsta->flags &= ~IEEE80211_STA_PROBEREQ_POLL; + if (time_after(jiffies, ifsta->last_probe + + IEEE80211_PROBE_INTERVAL)) { + ifsta->last_probe = jiffies; + ieee80211_send_probe_req(sdata, ifsta->bssid, + ifsta->ssid, + ifsta->ssid_len); + } + } } -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "addBA response timer expired on tid %d\n", tid); -#endif - - /* go through the state check in stop_BA_session */ - *state = HT_AGG_STATE_OPERATIONAL; - spin_unlock_bh(&sta->lock); - ieee80211_stop_tx_ba_session(hw, temp_sta->addr, tid, - WLAN_BACK_INITIATOR); - -timer_expired_exit: rcu_read_unlock(); -} -/* - * After accepting the AddBA Request we activated a timer, - * resetting it after each frame that arrives from the originator. - * if this timer expires ieee80211_sta_stop_rx_ba_session will be executed. - */ -static void sta_rx_agg_session_timer_expired(unsigned long data) -{ - /* not an elegant detour, but there is no choice as the timer passes - * only one argument, and various sta_info are needed here, so init - * flow in sta_info_create gives the TID as data, while the timer_to_id - * array gives the sta through container_of */ - u8 *ptid = (u8 *)data; - u8 *timer_to_id = ptid - *ptid; - struct sta_info *sta = container_of(timer_to_id, struct sta_info, - timer_to_tid[0]); - -#ifdef CONFIG_MAC80211_HT_DEBUG - printk(KERN_DEBUG "rx session timer expired on tid %d\n", (u16)*ptid); -#endif - ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr, - (u16)*ptid, WLAN_BACK_TIMER, - WLAN_REASON_QSTA_TIMEOUT); + if (disassoc) + ieee80211_set_disassoc(sdata, ifsta, true, true, + WLAN_REASON_PREV_AUTH_NOT_VALID); + else + mod_timer(&ifsta->timer, jiffies + + IEEE80211_MONITORING_INTERVAL); } -void ieee80211_sta_tear_down_BA_sessions(struct net_device *dev, u8 *addr) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - int i; - for (i = 0; i < STA_TID_NUM; i++) { - ieee80211_stop_tx_ba_session(&local->hw, addr, i, - WLAN_BACK_INITIATOR); - ieee80211_sta_stop_rx_ba_session(dev, addr, i, - WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS); - } +static void ieee80211_auth_completed(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) +{ + printk(KERN_DEBUG "%s: authenticated\n", sdata->dev->name); + ifsta->flags |= IEEE80211_STA_AUTHENTICATED; + ieee80211_associate(sdata, ifsta); } -static void ieee80211_send_refuse_measurement_request(struct net_device *dev, - struct ieee80211_msrment_ie *request_ie, - const u8 *da, const u8 *bssid, - u8 dialog_token) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sk_buff *skb; - struct ieee80211_mgmt *msr_report; - skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + - sizeof(struct ieee80211_msrment_ie)); +static void ieee80211_auth_challenge(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + u8 *pos; + struct ieee802_11_elems elems; - if (!skb) { - printk(KERN_ERR "%s: failed to allocate buffer for " - "measurement report frame\n", dev->name); + pos = mgmt->u.auth.variable; + ieee802_11_parse_elems(pos, len - (pos - (u8 *) mgmt), &elems); + if (!elems.challenge) return; - } - - skb_reserve(skb, local->hw.extra_tx_headroom); - msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); - memset(msr_report, 0, 24); - memcpy(msr_report->da, da, ETH_ALEN); - memcpy(msr_report->sa, dev->dev_addr, ETH_ALEN); - memcpy(msr_report->bssid, bssid, ETH_ALEN); - msr_report->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_ACTION); - - skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); - msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; - msr_report->u.action.u.measurement.action_code = - WLAN_ACTION_SPCT_MSR_RPRT; - msr_report->u.action.u.measurement.dialog_token = dialog_token; - - msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; - msr_report->u.action.u.measurement.length = - sizeof(struct ieee80211_msrment_ie); - - memset(&msr_report->u.action.u.measurement.msr_elem, 0, - sizeof(struct ieee80211_msrment_ie)); - msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; - msr_report->u.action.u.measurement.msr_elem.mode |= - IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; - msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; - - ieee80211_sta_tx(dev, skb, 0); -} - -static void ieee80211_sta_process_measurement_req(struct net_device *dev, - struct ieee80211_mgmt *mgmt, - size_t len) -{ - /* - * Ignoring measurement request is spec violation. - * Mandatory measurements must be reported optional - * measurements might be refused or reported incapable - * For now just refuse - * TODO: Answer basic measurement as unmeasured - */ - ieee80211_send_refuse_measurement_request(dev, - &mgmt->u.action.u.measurement.msr_elem, - mgmt->sa, mgmt->bssid, - mgmt->u.action.u.measurement.dialog_token); + ieee80211_send_auth(sdata, ifsta, 3, elems.challenge - 2, + elems.challenge_len + 2, 1); } - -static void ieee80211_rx_mgmt_auth(struct net_device *dev, +static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); u16 auth_alg, auth_transaction, status_code; DECLARE_MAC_BUF(mac); - if (ifsta->state != IEEE80211_AUTHENTICATE && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return; if (len < 24 + 6) return; - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && memcmp(ifsta->bssid, mgmt->sa, ETH_ALEN) != 0) return; - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; @@ -1870,7 +1053,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { /* * IEEE 802.11 standard does not require authentication in IBSS * networks and most implementations do not seem to use it. @@ -1879,7 +1062,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, */ if (auth_alg != WLAN_AUTH_OPEN || auth_transaction != 1) return; - ieee80211_send_auth(dev, ifsta, 2, NULL, 0, 0); + ieee80211_send_auth(sdata, ifsta, 2, NULL, 0, 0); } if (auth_alg != ifsta->auth_alg || @@ -1912,7 +1095,7 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, algs[pos] == 0xff) continue; if (algs[pos] == WLAN_AUTH_SHARED_KEY && - !ieee80211_sta_wep_configured(dev)) + !ieee80211_sta_wep_configured(sdata)) continue; ifsta->auth_alg = algs[pos]; break; @@ -1924,19 +1107,19 @@ static void ieee80211_rx_mgmt_auth(struct net_device *dev, switch (ifsta->auth_alg) { case WLAN_AUTH_OPEN: case WLAN_AUTH_LEAP: - ieee80211_auth_completed(dev, ifsta); + ieee80211_auth_completed(sdata, ifsta); break; case WLAN_AUTH_SHARED_KEY: if (ifsta->auth_transaction == 4) - ieee80211_auth_completed(dev, ifsta); + ieee80211_auth_completed(sdata, ifsta); else - ieee80211_auth_challenge(dev, ifsta, mgmt, len); + ieee80211_auth_challenge(sdata, ifsta, mgmt, len); break; } } -static void ieee80211_rx_mgmt_deauth(struct net_device *dev, +static void ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) @@ -1953,22 +1136,22 @@ static void ieee80211_rx_mgmt_deauth(struct net_device *dev, reason_code = le16_to_cpu(mgmt->u.deauth.reason_code); if (ifsta->flags & IEEE80211_STA_AUTHENTICATED) - printk(KERN_DEBUG "%s: deauthenticated\n", dev->name); + printk(KERN_DEBUG "%s: deauthenticated\n", sdata->dev->name); - if (ifsta->state == IEEE80211_AUTHENTICATE || - ifsta->state == IEEE80211_ASSOCIATE || - ifsta->state == IEEE80211_ASSOCIATED) { - ifsta->state = IEEE80211_AUTHENTICATE; + if (ifsta->state == IEEE80211_STA_MLME_AUTHENTICATE || + ifsta->state == IEEE80211_STA_MLME_ASSOCIATE || + ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; mod_timer(&ifsta->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(dev, ifsta, 1); + ieee80211_set_disassoc(sdata, ifsta, true, false, 0); ifsta->flags &= ~IEEE80211_STA_AUTHENTICATED; } -static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, +static void ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len) @@ -1985,15 +1168,15 @@ static void ieee80211_rx_mgmt_disassoc(struct net_device *dev, reason_code = le16_to_cpu(mgmt->u.disassoc.reason_code); if (ifsta->flags & IEEE80211_STA_ASSOCIATED) - printk(KERN_DEBUG "%s: disassociated\n", dev->name); + printk(KERN_DEBUG "%s: disassociated\n", sdata->dev->name); - if (ifsta->state == IEEE80211_ASSOCIATED) { - ifsta->state = IEEE80211_ASSOCIATE; + if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) { + ifsta->state = IEEE80211_STA_MLME_ASSOCIATE; mod_timer(&ifsta->timer, jiffies + IEEE80211_RETRY_AUTH_INTERVAL); } - ieee80211_set_disassoc(dev, ifsta, 0); + ieee80211_set_disassoc(sdata, ifsta, false, false, 0); } @@ -2004,7 +1187,6 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, int reassoc) { struct ieee80211_local *local = sdata->local; - struct net_device *dev = sdata->dev; struct ieee80211_supported_band *sband; struct sta_info *sta; u64 rates, basic_rates; @@ -2019,7 +1201,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* AssocResp and ReassocResp have identical structure, so process both * of them in this function. */ - if (ifsta->state != IEEE80211_ASSOCIATE) + if (ifsta->state != IEEE80211_STA_MLME_ASSOCIATE) return; if (len < 24 + 6) @@ -2034,12 +1216,12 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, printk(KERN_DEBUG "%s: RX %sssocResp from %s (capab=0x%x " "status=%d aid=%d)\n", - dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), + sdata->dev->name, reassoc ? "Rea" : "A", print_mac(mac, mgmt->sa), capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); if (status_code != WLAN_STATUS_SUCCESS) { printk(KERN_DEBUG "%s: AP denied association (code=%d)\n", - dev->name, status_code); + sdata->dev->name, status_code); /* if this was a reassociation, ensure we try a "full" * association next time. This works around some broken APs * which do not correctly reject reassociation requests. */ @@ -2049,7 +1231,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if ((aid & (BIT(15) | BIT(14))) != (BIT(15) | BIT(14))) printk(KERN_DEBUG "%s: invalid aid value %d; bits 15:14 not " - "set\n", dev->name, aid); + "set\n", sdata->dev->name, aid); aid &= ~(BIT(15) | BIT(14)); pos = mgmt->u.assoc_resp.variable; @@ -2057,11 +1239,11 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (!elems.supp_rates) { printk(KERN_DEBUG "%s: no SuppRates element in AssocResp\n", - dev->name); + sdata->dev->name); return; } - printk(KERN_DEBUG "%s: associated\n", dev->name); + printk(KERN_DEBUG "%s: associated\n", sdata->dev->name); ifsta->aid = aid; ifsta->ap_capab = capab_info; @@ -2076,17 +1258,17 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, /* Add STA entry for the AP */ sta = sta_info_get(local, ifsta->bssid); if (!sta) { - struct ieee80211_sta_bss *bss; + struct ieee80211_bss *bss; int err; sta = sta_info_alloc(sdata, ifsta->bssid, GFP_ATOMIC); if (!sta) { printk(KERN_DEBUG "%s: failed to alloc STA entry for" - " the AP\n", dev->name); + " the AP\n", sdata->dev->name); rcu_read_unlock(); return; } - bss = ieee80211_rx_bss_get(dev, ifsta->bssid, + bss = ieee80211_rx_bss_get(local, ifsta->bssid, local->hw.conf.channel->center_freq, ifsta->ssid, ifsta->ssid_len); if (bss) { @@ -2099,7 +1281,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, err = sta_info_insert(sta); if (err) { printk(KERN_DEBUG "%s: failed to insert STA entry for" - " the AP (error %d)\n", dev->name, err); + " the AP (error %d)\n", sdata->dev->name, err); rcu_read_unlock(); return; } @@ -2152,8 +1334,8 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, } } - sta->supp_rates[local->hw.conf.channel->band] = rates; - sdata->basic_rates = basic_rates; + sta->sta.supp_rates[local->hw.conf.channel->band] = rates; + sdata->bss_conf.basic_rates = basic_rates; /* cf. IEEE 802.11 9.2.12 */ if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && @@ -2166,20 +1348,18 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, (ifsta->flags & IEEE80211_STA_WMM_ENABLED)) { struct ieee80211_ht_bss_info bss_info; ieee80211_ht_cap_ie_to_ht_info( - (struct ieee80211_ht_cap *) - elems.ht_cap_elem, &sta->ht_info); + elems.ht_cap_elem, &sta->sta.ht_info); ieee80211_ht_addt_info_ie_to_ht_bss_info( - (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); - ieee80211_handle_ht(local, 1, &sta->ht_info, &bss_info); + ieee80211_handle_ht(local, 1, &sta->sta.ht_info, &bss_info); } - rate_control_rate_init(sta, local); + rate_control_rate_init(sta); if (elems.wmm_param) { set_sta_flags(sta, WLAN_STA_WME); rcu_read_unlock(); - ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); } else rcu_read_unlock(); @@ -2188,234 +1368,26 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, * ieee80211_set_associated() will tell the driver */ bss_conf->aid = aid; bss_conf->assoc_capability = capab_info; - ieee80211_set_associated(dev, ifsta, 1); + ieee80211_set_associated(sdata, ifsta); - ieee80211_associated(dev, ifsta); + ieee80211_associated(sdata, ifsta); } -/* Caller must hold local->sta_bss_lock */ -static void __ieee80211_rx_bss_hash_add(struct net_device *dev, - struct ieee80211_sta_bss *bss) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - u8 hash_idx; - - if (bss_mesh_cfg(bss)) - hash_idx = mesh_id_hash(bss_mesh_id(bss), - bss_mesh_id_len(bss)); - else - hash_idx = STA_HASH(bss->bssid); - - bss->hnext = local->sta_bss_hash[hash_idx]; - local->sta_bss_hash[hash_idx] = bss; -} - - -/* Caller must hold local->sta_bss_lock */ -static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, - struct ieee80211_sta_bss *bss) -{ - struct ieee80211_sta_bss *b, *prev = NULL; - b = local->sta_bss_hash[STA_HASH(bss->bssid)]; - while (b) { - if (b == bss) { - if (!prev) - local->sta_bss_hash[STA_HASH(bss->bssid)] = - bss->hnext; - else - prev->hnext = bss->hnext; - break; - } - prev = b; - b = b->hnext; - } -} - - -static struct ieee80211_sta_bss * -ieee80211_rx_bss_add(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - atomic_inc(&bss->users); - atomic_inc(&bss->users); - memcpy(bss->bssid, bssid, ETH_ALEN); - bss->freq = freq; - if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { - memcpy(bss->ssid, ssid, ssid_len); - bss->ssid_len = ssid_len; - } - - spin_lock_bh(&local->sta_bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->sta_bss_list); - __ieee80211_rx_bss_hash_add(dev, bss); - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -static struct ieee80211_sta_bss * -ieee80211_rx_bss_get(struct net_device *dev, u8 *bssid, int freq, - u8 *ssid, u8 ssid_len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - bss = local->sta_bss_hash[STA_HASH(bssid)]; - while (bss) { - if (!bss_mesh_cfg(bss) && - !memcmp(bss->bssid, bssid, ETH_ALEN) && - bss->freq == freq && - bss->ssid_len == ssid_len && - (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -#ifdef CONFIG_MAC80211_MESH -static struct ieee80211_sta_bss * -ieee80211_rx_mesh_bss_get(struct net_device *dev, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int freq) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - bss = local->sta_bss_hash[mesh_id_hash(mesh_id, mesh_id_len)]; - while (bss) { - if (bss_mesh_cfg(bss) && - !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) && - bss->freq == freq && - mesh_id_len == bss->mesh_id_len && - (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id, - mesh_id_len))) { - atomic_inc(&bss->users); - break; - } - bss = bss->hnext; - } - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} - -static struct ieee80211_sta_bss * -ieee80211_rx_mesh_bss_add(struct net_device *dev, u8 *mesh_id, int mesh_id_len, - u8 *mesh_cfg, int mesh_config_len, int freq) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - - if (mesh_config_len != MESH_CFG_LEN) - return NULL; - - bss = kzalloc(sizeof(*bss), GFP_ATOMIC); - if (!bss) - return NULL; - - bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC); - if (!bss->mesh_cfg) { - kfree(bss); - return NULL; - } - - if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) { - bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC); - if (!bss->mesh_id) { - kfree(bss->mesh_cfg); - kfree(bss); - return NULL; - } - memcpy(bss->mesh_id, mesh_id, mesh_id_len); - } - - atomic_inc(&bss->users); - atomic_inc(&bss->users); - memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN); - bss->mesh_id_len = mesh_id_len; - bss->freq = freq; - spin_lock_bh(&local->sta_bss_lock); - /* TODO: order by RSSI? */ - list_add_tail(&bss->list, &local->sta_bss_list); - __ieee80211_rx_bss_hash_add(dev, bss); - spin_unlock_bh(&local->sta_bss_lock); - return bss; -} -#endif - -static void ieee80211_rx_bss_free(struct ieee80211_sta_bss *bss) -{ - kfree(bss->wpa_ie); - kfree(bss->rsn_ie); - kfree(bss->wmm_ie); - kfree(bss->ht_ie); - kfree(bss->ht_add_ie); - kfree(bss_mesh_id(bss)); - kfree(bss_mesh_cfg(bss)); - kfree(bss); -} - - -static void ieee80211_rx_bss_put(struct ieee80211_local *local, - struct ieee80211_sta_bss *bss) -{ - local_bh_disable(); - if (!atomic_dec_and_lock(&bss->users, &local->sta_bss_lock)) { - local_bh_enable(); - return; - } - - __ieee80211_rx_bss_hash_del(local, bss); - list_del(&bss->list); - spin_unlock_bh(&local->sta_bss_lock); - ieee80211_rx_bss_free(bss); -} - - -void ieee80211_rx_bss_list_init(struct ieee80211_local *local) -{ - spin_lock_init(&local->sta_bss_lock); - INIT_LIST_HEAD(&local->sta_bss_list); -} - - -void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local) -{ - struct ieee80211_sta_bss *bss, *tmp; - - list_for_each_entry_safe(bss, tmp, &local->sta_bss_list, list) - ieee80211_rx_bss_put(local, bss); -} - - -static int ieee80211_sta_join_ibss(struct net_device *dev, +static int ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, - struct ieee80211_sta_bss *bss) + struct ieee80211_bss *bss) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; int res, rates, i, j; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos; - struct ieee80211_sub_if_data *sdata; struct ieee80211_supported_band *sband; union iwreq_data wrqu; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - /* Remove possible STA entries from other IBSS networks. */ sta_info_flush_delayed(sdata); @@ -2433,7 +1405,7 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, sdata->drop_unencrypted = bss->capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - res = ieee80211_set_freq(dev, bss->freq); + res = ieee80211_set_freq(sdata, bss->freq); if (res) return res; @@ -2446,10 +1418,10 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, mgmt = (struct ieee80211_mgmt *) skb_put(skb, 24 + sizeof(mgmt->u.beacon)); memset(mgmt, 0, 24 + sizeof(mgmt->u.beacon)); - mgmt->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_PROBE_RESP); + mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_PROBE_RESP); memset(mgmt->da, 0xff, ETH_ALEN); - memcpy(mgmt->sa, dev->dev_addr, ETH_ALEN); + memcpy(mgmt->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(mgmt->bssid, ifsta->bssid, ETH_ALEN); mgmt->u.beacon.beacon_int = cpu_to_le16(local->hw.conf.beacon_int); @@ -2506,108 +1478,38 @@ static int ieee80211_sta_join_ibss(struct net_device *dev, } ifsta->supp_rates_bits[local->hw.conf.channel->band] = rates; - ieee80211_sta_def_wmm_params(dev, bss, 1); + ieee80211_sta_def_wmm_params(sdata, bss); - ifsta->state = IEEE80211_IBSS_JOINED; + ifsta->state = IEEE80211_STA_MLME_IBSS_JOINED; mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); + ieee80211_led_assoc(local, true); + memset(&wrqu, 0, sizeof(wrqu)); memcpy(wrqu.ap_addr.sa_data, bss->bssid, ETH_ALEN); - wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL); + wireless_send_event(sdata->dev, SIOCGIWAP, &wrqu, NULL); return res; } -u64 ieee80211_sta_get_rates(struct ieee80211_local *local, - struct ieee802_11_elems *elems, - enum ieee80211_band band) -{ - struct ieee80211_supported_band *sband; - struct ieee80211_rate *bitrates; - size_t num_rates; - u64 supp_rates; - int i, j; - sband = local->hw.wiphy->bands[band]; - - if (!sband) { - WARN_ON(1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - } - - bitrates = sband->bitrates; - num_rates = sband->n_bitrates; - supp_rates = 0; - for (i = 0; i < elems->supp_rates_len + - elems->ext_supp_rates_len; i++) { - u8 rate = 0; - int own_rate; - if (i < elems->supp_rates_len) - rate = elems->supp_rates[i]; - else if (elems->ext_supp_rates) - rate = elems->ext_supp_rates - [i - elems->supp_rates_len]; - own_rate = 5 * (rate & 0x7f); - for (j = 0; j < num_rates; j++) - if (bitrates[j].bitrate == own_rate) - supp_rates |= BIT(j); - } - return supp_rates; -} - - -static void ieee80211_rx_bss_info(struct net_device *dev, +static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems, - int beacon) + bool beacon) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - int freq, clen; - struct ieee80211_sta_bss *bss; + struct ieee80211_local *local = sdata->local; + int freq; + struct ieee80211_bss *bss; struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - u64 beacon_timestamp, rx_timestamp; struct ieee80211_channel *channel; + u64 beacon_timestamp, rx_timestamp; + u64 supp_rates = 0; + enum ieee80211_band band = rx_status->band; DECLARE_MAC_BUF(mac); DECLARE_MAC_BUF(mac2); - if (!beacon && memcmp(mgmt->da, dev->dev_addr, ETH_ALEN)) - return; /* ignore ProbeResp to foreign address */ - - beacon_timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); - - if (ieee80211_vif_is_mesh(&sdata->vif) && elems->mesh_id && - elems->mesh_config && mesh_matches_local(elems, dev)) { - u64 rates = ieee80211_sta_get_rates(local, elems, - rx_status->band); - - mesh_neighbour_update(mgmt->sa, rates, dev, - mesh_peer_accepts_plinks(elems, dev)); - } - - rcu_read_lock(); - - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && elems->supp_rates && - memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0 && - (sta = sta_info_get(local, mgmt->sa))) { - u64 prev_rates; - u64 supp_rates = ieee80211_sta_get_rates(local, elems, - rx_status->band); - - prev_rates = sta->supp_rates[rx_status->band]; - sta->supp_rates[rx_status->band] &= supp_rates; - if (sta->supp_rates[rx_status->band] == 0) { - /* No matching rates - this should not really happen. - * Make sure that at least one rate is marked - * supported to avoid issues with TX rate ctrl. */ - sta->supp_rates[rx_status->band] = - sdata->u.sta.supp_rates_bits[rx_status->band]; - } - } - - rcu_read_unlock(); - if (elems->ds_params && elems->ds_params_len == 1) freq = ieee80211_channel_to_frequency(elems->ds_params[0]); else @@ -2618,215 +1520,60 @@ static void ieee80211_rx_bss_info(struct net_device *dev, if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) return; -#ifdef CONFIG_MAC80211_MESH - if (elems->mesh_config) - bss = ieee80211_rx_mesh_bss_get(dev, elems->mesh_id, - elems->mesh_id_len, elems->mesh_config, freq); - else -#endif - bss = ieee80211_rx_bss_get(dev, mgmt->bssid, freq, - elems->ssid, elems->ssid_len); - if (!bss) { -#ifdef CONFIG_MAC80211_MESH - if (elems->mesh_config) - bss = ieee80211_rx_mesh_bss_add(dev, elems->mesh_id, - elems->mesh_id_len, elems->mesh_config, - elems->mesh_config_len, freq); - else -#endif - bss = ieee80211_rx_bss_add(dev, mgmt->bssid, freq, - elems->ssid, elems->ssid_len); - if (!bss) - return; - } else { -#if 0 - /* TODO: order by RSSI? */ - spin_lock_bh(&local->sta_bss_lock); - list_move_tail(&bss->list, &local->sta_bss_list); - spin_unlock_bh(&local->sta_bss_lock); -#endif - } - - /* save the ERP value so that it is available at association time */ - if (elems->erp_info && elems->erp_info_len >= 1) { - bss->erp_value = elems->erp_info[0]; - bss->has_erp_value = 1; - } + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && elems->supp_rates && + memcmp(mgmt->bssid, sdata->u.sta.bssid, ETH_ALEN) == 0) { + supp_rates = ieee80211_sta_get_rates(local, elems, band); - if (elems->ht_cap_elem && - (!bss->ht_ie || bss->ht_ie_len != elems->ht_cap_elem_len || - memcmp(bss->ht_ie, elems->ht_cap_elem, elems->ht_cap_elem_len))) { - kfree(bss->ht_ie); - bss->ht_ie = kmalloc(elems->ht_cap_elem_len + 2, GFP_ATOMIC); - if (bss->ht_ie) { - memcpy(bss->ht_ie, elems->ht_cap_elem - 2, - elems->ht_cap_elem_len + 2); - bss->ht_ie_len = elems->ht_cap_elem_len + 2; - } else - bss->ht_ie_len = 0; - } else if (!elems->ht_cap_elem && bss->ht_ie) { - kfree(bss->ht_ie); - bss->ht_ie = NULL; - bss->ht_ie_len = 0; - } + rcu_read_lock(); - if (elems->ht_info_elem && - (!bss->ht_add_ie || - bss->ht_add_ie_len != elems->ht_info_elem_len || - memcmp(bss->ht_add_ie, elems->ht_info_elem, - elems->ht_info_elem_len))) { - kfree(bss->ht_add_ie); - bss->ht_add_ie = - kmalloc(elems->ht_info_elem_len + 2, GFP_ATOMIC); - if (bss->ht_add_ie) { - memcpy(bss->ht_add_ie, elems->ht_info_elem - 2, - elems->ht_info_elem_len + 2); - bss->ht_add_ie_len = elems->ht_info_elem_len + 2; - } else - bss->ht_add_ie_len = 0; - } else if (!elems->ht_info_elem && bss->ht_add_ie) { - kfree(bss->ht_add_ie); - bss->ht_add_ie = NULL; - bss->ht_add_ie_len = 0; - } + sta = sta_info_get(local, mgmt->sa); + if (sta) { + u64 prev_rates; - bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); - bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); + prev_rates = sta->sta.supp_rates[band]; + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); - if (elems->tim) { - struct ieee80211_tim_ie *tim_ie = - (struct ieee80211_tim_ie *)elems->tim; - bss->dtim_period = tim_ie->dtim_period; - } +#ifdef CONFIG_MAC80211_IBSS_DEBUG + if (sta->sta.supp_rates[band] != prev_rates) + printk(KERN_DEBUG "%s: updated supp_rates set " + "for %s based on beacon info (0x%llx | " + "0x%llx -> 0x%llx)\n", + sdata->dev->name, + print_mac(mac, sta->sta.addr), + (unsigned long long) prev_rates, + (unsigned long long) supp_rates, + (unsigned long long) sta->sta.supp_rates[band]); +#endif + } else { + ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid, + mgmt->sa, supp_rates); + } - /* set default value for buggy APs */ - if (!elems->tim || bss->dtim_period == 0) - bss->dtim_period = 1; - - bss->supp_rates_len = 0; - if (elems->supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems->supp_rates_len) - clen = elems->supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, - clen); - bss->supp_rates_len += clen; - } - if (elems->ext_supp_rates) { - clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; - if (clen > elems->ext_supp_rates_len) - clen = elems->ext_supp_rates_len; - memcpy(&bss->supp_rates[bss->supp_rates_len], - elems->ext_supp_rates, clen); - bss->supp_rates_len += clen; + rcu_read_unlock(); } - bss->band = rx_status->band; + bss = ieee80211_bss_info_update(local, rx_status, mgmt, len, elems, + freq, beacon); + if (!bss) + return; - bss->timestamp = beacon_timestamp; - bss->last_update = jiffies; - bss->signal = rx_status->signal; - bss->noise = rx_status->noise; - bss->qual = rx_status->qual; - if (!beacon && !bss->probe_resp) - bss->probe_resp = true; + /* was just updated in ieee80211_bss_info_update */ + beacon_timestamp = bss->timestamp; /* * In STA mode, the remaining parameters should not be overridden * by beacons because they're not necessarily accurate there. */ - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - bss->probe_resp && beacon) { + if (sdata->vif.type != NL80211_IFTYPE_ADHOC && + bss->last_probe_resp && beacon) { ieee80211_rx_bss_put(local, bss); return; } - if (elems->wpa && - (!bss->wpa_ie || bss->wpa_ie_len != elems->wpa_len || - memcmp(bss->wpa_ie, elems->wpa, elems->wpa_len))) { - kfree(bss->wpa_ie); - bss->wpa_ie = kmalloc(elems->wpa_len + 2, GFP_ATOMIC); - if (bss->wpa_ie) { - memcpy(bss->wpa_ie, elems->wpa - 2, elems->wpa_len + 2); - bss->wpa_ie_len = elems->wpa_len + 2; - } else - bss->wpa_ie_len = 0; - } else if (!elems->wpa && bss->wpa_ie) { - kfree(bss->wpa_ie); - bss->wpa_ie = NULL; - bss->wpa_ie_len = 0; - } - - if (elems->rsn && - (!bss->rsn_ie || bss->rsn_ie_len != elems->rsn_len || - memcmp(bss->rsn_ie, elems->rsn, elems->rsn_len))) { - kfree(bss->rsn_ie); - bss->rsn_ie = kmalloc(elems->rsn_len + 2, GFP_ATOMIC); - if (bss->rsn_ie) { - memcpy(bss->rsn_ie, elems->rsn - 2, elems->rsn_len + 2); - bss->rsn_ie_len = elems->rsn_len + 2; - } else - bss->rsn_ie_len = 0; - } else if (!elems->rsn && bss->rsn_ie) { - kfree(bss->rsn_ie); - bss->rsn_ie = NULL; - bss->rsn_ie_len = 0; - } - - /* - * Cf. - * http://www.wipo.int/pctdb/en/wo.jsp?wo=2007047181&IA=WO2007047181&DISPLAY=DESC - * - * quoting: - * - * In particular, "Wi-Fi CERTIFIED for WMM - Support for Multimedia - * Applications with Quality of Service in Wi-Fi Networks," Wi- Fi - * Alliance (September 1, 2004) is incorporated by reference herein. - * The inclusion of the WMM Parameters in probe responses and - * association responses is mandatory for WMM enabled networks. The - * inclusion of the WMM Parameters in beacons, however, is optional. - */ - - if (elems->wmm_param && - (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_param_len || - memcmp(bss->wmm_ie, elems->wmm_param, elems->wmm_param_len))) { - kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems->wmm_param_len + 2, GFP_ATOMIC); - if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems->wmm_param - 2, - elems->wmm_param_len + 2); - bss->wmm_ie_len = elems->wmm_param_len + 2; - } else - bss->wmm_ie_len = 0; - } else if (elems->wmm_info && - (!bss->wmm_ie || bss->wmm_ie_len != elems->wmm_info_len || - memcmp(bss->wmm_ie, elems->wmm_info, - elems->wmm_info_len))) { - /* As for certain AP's Fifth bit is not set in WMM IE in - * beacon frames.So while parsing the beacon frame the - * wmm_info structure is used instead of wmm_param. - * wmm_info structure was never used to set bss->wmm_ie. - * This code fixes this problem by copying the WME - * information from wmm_info to bss->wmm_ie and enabling - * n-band association. - */ - kfree(bss->wmm_ie); - bss->wmm_ie = kmalloc(elems->wmm_info_len + 2, GFP_ATOMIC); - if (bss->wmm_ie) { - memcpy(bss->wmm_ie, elems->wmm_info - 2, - elems->wmm_info_len + 2); - bss->wmm_ie_len = elems->wmm_info_len + 2; - } else - bss->wmm_ie_len = 0; - } else if (!elems->wmm_param && !elems->wmm_info && bss->wmm_ie) { - kfree(bss->wmm_ie); - bss->wmm_ie = NULL; - bss->wmm_ie_len = 0; - } - /* check if we need to merge IBSS */ - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && beacon && - !local->sta_sw_scanning && !local->sta_hw_scanning && + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && beacon && bss->capability & WLAN_CAPABILITY_IBSS && bss->freq == local->oper_channel->center_freq && elems->ssid_len == sdata->u.sta.ssid_len && @@ -2848,7 +1595,7 @@ static void ieee80211_rx_bss_info(struct net_device *dev, * e.g: at 1 MBit that means mactime is 192 usec earlier * (=24 bytes * 8 usecs/byte) than the beacon timestamp. */ - int rate = local->hw.wiphy->bands[rx_status->band]-> + int rate = local->hw.wiphy->bands[band]-> bitrates[rx_status->rate_idx].bitrate; rx_timestamp = rx_status->mactime + (24 * 8 * 10 / rate); } else if (local && local->ops && local->ops->get_tsf) @@ -2871,12 +1618,12 @@ static void ieee80211_rx_bss_info(struct net_device *dev, #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: beacon TSF higher than " "local TSF - IBSS merge with BSSID %s\n", - dev->name, print_mac(mac, mgmt->bssid)); + sdata->dev->name, print_mac(mac, mgmt->bssid)); #endif - ieee80211_sta_join_ibss(dev, &sdata->u.sta, bss); - ieee80211_ibss_add_sta(dev, NULL, + ieee80211_sta_join_ibss(sdata, &sdata->u.sta, bss); + ieee80211_ibss_add_sta(sdata, NULL, mgmt->bssid, mgmt->sa, - BIT(rx_status->rate_idx)); + supp_rates); } } @@ -2884,13 +1631,17 @@ static void ieee80211_rx_bss_info(struct net_device *dev, } -static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev, +static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { size_t baselen; struct ieee802_11_elems elems; + struct ieee80211_if_sta *ifsta = &sdata->u.sta; + + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) + return; /* ignore ProbeResp to foreign address */ baselen = (u8 *) mgmt->u.probe_resp.variable - (u8 *) mgmt; if (baselen > len) @@ -2899,20 +1650,27 @@ static void ieee80211_rx_mgmt_probe_resp(struct net_device *dev, ieee802_11_parse_elems(mgmt->u.probe_resp.variable, len - baselen, &elems); - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 0); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, false); + + /* direct probe may be part of the association flow */ + if (test_and_clear_bit(IEEE80211_STA_REQ_DIRECT_PROBE, + &ifsta->request)) { + printk(KERN_DEBUG "%s direct probe responded\n", + sdata->dev->name); + ieee80211_authenticate(sdata, ifsta); + } } -static void ieee80211_rx_mgmt_beacon(struct net_device *dev, +static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; size_t baselen; struct ieee802_11_elems elems; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct ieee80211_conf *conf = &local->hw.conf; u32 changed = 0; @@ -2923,10 +1681,9 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, ieee802_11_parse_elems(mgmt->u.beacon.variable, len - baselen, &elems); - ieee80211_rx_bss_info(dev, mgmt, len, rx_status, &elems, 1); + ieee80211_rx_bss_info(sdata, mgmt, len, rx_status, &elems, true); - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; ifsta = &sdata->u.sta; @@ -2934,15 +1691,9 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, memcmp(ifsta->bssid, mgmt->bssid, ETH_ALEN) != 0) return; - ieee80211_sta_wmm_params(dev, ifsta, elems.wmm_param, + ieee80211_sta_wmm_params(local, ifsta, elems.wmm_param, elems.wmm_param_len); - /* Do not send changes to driver if we are scanning. This removes - * requirement that driver's bss_info_changed function needs to be - * atomic. */ - if (local->sta_sw_scanning || local->sta_hw_scanning) - return; - if (elems.erp_info && elems.erp_info_len >= 1) changed |= ieee80211_handle_erp_ie(sdata, elems.erp_info[0]); else { @@ -2956,7 +1707,6 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, struct ieee80211_ht_bss_info bss_info; ieee80211_ht_addt_info_ie_to_ht_bss_info( - (struct ieee80211_ht_addt_info *) elems.ht_info_elem, &bss_info); changed |= ieee80211_handle_ht(local, 1, &conf->ht_conf, &bss_info); @@ -2966,14 +1716,13 @@ static void ieee80211_rx_mgmt_beacon(struct net_device *dev, } -static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, +static void ieee80211_rx_mgmt_probe_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta, struct ieee80211_mgmt *mgmt, size_t len, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int tx_last_beacon; struct sk_buff *skb; struct ieee80211_mgmt *resp; @@ -2984,8 +1733,8 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, DECLARE_MAC_BUF(mac3); #endif - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS || - ifsta->state != IEEE80211_IBSS_JOINED || + if (sdata->vif.type != NL80211_IFTYPE_ADHOC || + ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED || len < 24 + 2 || !ifsta->probe_resp) return; @@ -2997,7 +1746,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: RX ProbeReq SA=%s DA=%s BSSID=" "%s (tx_last_beacon=%d)\n", - dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), + sdata->dev->name, print_mac(mac, mgmt->sa), print_mac(mac2, mgmt->da), print_mac(mac3, mgmt->bssid), tx_last_beacon); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ @@ -3015,7 +1764,7 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: Invalid SSID IE in ProbeReq " "from %s\n", - dev->name, print_mac(mac, mgmt->sa)); + sdata->dev->name, print_mac(mac, mgmt->sa)); #endif return; } @@ -3035,74 +1784,15 @@ static void ieee80211_rx_mgmt_probe_req(struct net_device *dev, memcpy(resp->da, mgmt->sa, ETH_ALEN); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: Sending ProbeResp to %s\n", - dev->name, print_mac(mac, resp->da)); + sdata->dev->name, print_mac(mac, resp->da)); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - ieee80211_sta_tx(dev, skb, 0); + ieee80211_tx_skb(sdata, skb, 0); } -static void ieee80211_rx_mgmt_action(struct net_device *dev, - struct ieee80211_if_sta *ifsta, - struct ieee80211_mgmt *mgmt, - size_t len, - struct ieee80211_rx_status *rx_status) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (len < IEEE80211_MIN_ACTION_SIZE) - return; - - switch (mgmt->u.action.category) { - case WLAN_CATEGORY_SPECTRUM_MGMT: - if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) - break; - switch (mgmt->u.action.u.chan_switch.action_code) { - case WLAN_ACTION_SPCT_MSR_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.measurement))) - break; - ieee80211_sta_process_measurement_req(dev, mgmt, len); - break; - } - break; - case WLAN_CATEGORY_BACK: - switch (mgmt->u.action.u.addba_req.action_code) { - case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) - break; - ieee80211_sta_process_addba_request(dev, mgmt, len); - break; - case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) - break; - ieee80211_sta_process_addba_resp(dev, mgmt, len); - break; - case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) - break; - ieee80211_sta_process_delba(dev, mgmt, len); - break; - } - break; - case PLINK_CATEGORY: - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rx_plink_frame(dev, mgmt, len, rx_status); - break; - case MESH_PATH_SEL_CATEGORY: - if (ieee80211_vif_is_mesh(&sdata->vif)) - mesh_rx_path_sel_frame(dev, mgmt, len); - break; - } -} - -void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, +void ieee80211_sta_rx_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, struct ieee80211_rx_status *rx_status) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; @@ -3110,7 +1800,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, if (skb->len < 24) goto fail; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifsta = &sdata->u.sta; mgmt = (struct ieee80211_mgmt *) skb->data; @@ -3120,7 +1809,6 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, case IEEE80211_STYPE_PROBE_REQ: case IEEE80211_STYPE_PROBE_RESP: case IEEE80211_STYPE_BEACON: - case IEEE80211_STYPE_ACTION: memcpy(skb->cb, rx_status, sizeof(*rx_status)); case IEEE80211_STYPE_AUTH: case IEEE80211_STYPE_ASSOC_RESP: @@ -3136,17 +1824,14 @@ void ieee80211_sta_rx_mgmt(struct net_device *dev, struct sk_buff *skb, kfree_skb(skb); } - -static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, +static void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status; - struct ieee80211_sub_if_data *sdata; struct ieee80211_if_sta *ifsta; struct ieee80211_mgmt *mgmt; u16 fc; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); ifsta = &sdata->u.sta; rx_status = (struct ieee80211_rx_status *) skb->cb; @@ -3155,17 +1840,17 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, switch (fc & IEEE80211_FCTL_STYPE) { case IEEE80211_STYPE_PROBE_REQ: - ieee80211_rx_mgmt_probe_req(dev, ifsta, mgmt, skb->len, + ieee80211_rx_mgmt_probe_req(sdata, ifsta, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_PROBE_RESP: - ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_probe_resp(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_BEACON: - ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_beacon(sdata, mgmt, skb->len, rx_status); break; case IEEE80211_STYPE_AUTH: - ieee80211_rx_mgmt_auth(dev, ifsta, mgmt, skb->len); + ieee80211_rx_mgmt_auth(sdata, ifsta, mgmt, skb->len); break; case IEEE80211_STYPE_ASSOC_RESP: ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 0); @@ -3174,13 +1859,10 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, ieee80211_rx_mgmt_assoc_resp(sdata, ifsta, mgmt, skb->len, 1); break; case IEEE80211_STYPE_DEAUTH: - ieee80211_rx_mgmt_deauth(dev, ifsta, mgmt, skb->len); + ieee80211_rx_mgmt_deauth(sdata, ifsta, mgmt, skb->len); break; case IEEE80211_STYPE_DISASSOC: - ieee80211_rx_mgmt_disassoc(dev, ifsta, mgmt, skb->len); - break; - case IEEE80211_STYPE_ACTION: - ieee80211_rx_mgmt_action(dev, ifsta, mgmt, skb->len, rx_status); + ieee80211_rx_mgmt_disassoc(sdata, ifsta, mgmt, skb->len); break; } @@ -3188,47 +1870,11 @@ static void ieee80211_sta_rx_queued_mgmt(struct net_device *dev, } -ieee80211_rx_result -ieee80211_sta_rx_scan(struct net_device *dev, struct sk_buff *skb, - struct ieee80211_rx_status *rx_status) -{ - struct ieee80211_mgmt *mgmt; - __le16 fc; - - if (skb->len < 2) - return RX_DROP_UNUSABLE; - - mgmt = (struct ieee80211_mgmt *) skb->data; - fc = mgmt->frame_control; - - if (ieee80211_is_ctl(fc)) - return RX_CONTINUE; - - if (skb->len < 24) - return RX_DROP_MONITOR; - - if (ieee80211_is_probe_resp(fc)) { - ieee80211_rx_mgmt_probe_resp(dev, mgmt, skb->len, rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } - - if (ieee80211_is_beacon(fc)) { - ieee80211_rx_mgmt_beacon(dev, mgmt, skb->len, rx_status); - dev_kfree_skb(skb); - return RX_QUEUED; - } - - return RX_CONTINUE; -} - - -static int ieee80211_sta_active_ibss(struct net_device *dev) +static int ieee80211_sta_active_ibss(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; int active = 0; struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); rcu_read_lock(); @@ -3247,179 +1893,36 @@ static int ieee80211_sta_active_ibss(struct net_device *dev) } -static void ieee80211_sta_expire(struct net_device *dev, unsigned long exp_time) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta, *tmp; - LIST_HEAD(tmp_list); - DECLARE_MAC_BUF(mac); - unsigned long flags; - - spin_lock_irqsave(&local->sta_lock, flags); - list_for_each_entry_safe(sta, tmp, &local->sta_list, list) - if (time_after(jiffies, sta->last_rx + exp_time)) { -#ifdef CONFIG_MAC80211_IBSS_DEBUG - printk(KERN_DEBUG "%s: expiring inactive STA %s\n", - dev->name, print_mac(mac, sta->addr)); -#endif - __sta_info_unlink(&sta); - if (sta) - list_add(&sta->list, &tmp_list); - } - spin_unlock_irqrestore(&local->sta_lock, flags); - - list_for_each_entry_safe(sta, tmp, &tmp_list, list) - sta_info_destroy(sta); -} - - -static void ieee80211_sta_merge_ibss(struct net_device *dev, +static void ieee80211_sta_merge_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); - ieee80211_sta_expire(dev, IEEE80211_IBSS_INACTIVITY_LIMIT); - if (ieee80211_sta_active_ibss(dev)) + ieee80211_sta_expire(sdata, IEEE80211_IBSS_INACTIVITY_LIMIT); + if (ieee80211_sta_active_ibss(sdata)) return; printk(KERN_DEBUG "%s: No active IBSS STAs - trying to scan for other " - "IBSS networks with same SSID (merge)\n", dev->name); - ieee80211_sta_req_scan(dev, ifsta->ssid, ifsta->ssid_len); -} - - -#ifdef CONFIG_MAC80211_MESH -static void ieee80211_mesh_housekeeping(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - bool free_plinks; - - ieee80211_sta_expire(dev, IEEE80211_MESH_PEER_INACTIVITY_LIMIT); - mesh_path_expire(dev); - - free_plinks = mesh_plink_availables(sdata); - if (free_plinks != sdata->u.sta.accepting_plinks) - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); - - mod_timer(&ifsta->timer, jiffies + - IEEE80211_MESH_HOUSEKEEPING_INTERVAL); + "IBSS networks with same SSID (merge)\n", sdata->dev->name); + ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); } -void ieee80211_start_mesh(struct net_device *dev) -{ - struct ieee80211_if_sta *ifsta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ifsta = &sdata->u.sta; - ifsta->state = IEEE80211_MESH_UP; - ieee80211_sta_timer((unsigned long)sdata); - ieee80211_if_config(sdata, IEEE80211_IFCC_BEACON); -} -#endif - - -void ieee80211_sta_timer(unsigned long data) +static void ieee80211_sta_timer(unsigned long data) { struct ieee80211_sub_if_data *sdata = (struct ieee80211_sub_if_data *) data; struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(&sdata->wdev); + struct ieee80211_local *local = sdata->local; set_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); queue_work(local->hw.workqueue, &ifsta->work); } -void ieee80211_sta_work(struct work_struct *work) -{ - struct ieee80211_sub_if_data *sdata = - container_of(work, struct ieee80211_sub_if_data, u.sta.work); - struct net_device *dev = sdata->dev; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_if_sta *ifsta; - struct sk_buff *skb; - - if (!netif_running(dev)) - return; - - if (local->sta_sw_scanning || local->sta_hw_scanning) - return; - - if (WARN_ON(sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) - return; - ifsta = &sdata->u.sta; - - while ((skb = skb_dequeue(&ifsta->skb_queue))) - ieee80211_sta_rx_queued_mgmt(dev, skb); - -#ifdef CONFIG_MAC80211_MESH - if (ifsta->preq_queue_len && - time_after(jiffies, - ifsta->last_preq + msecs_to_jiffies(ifsta->mshcfg.dot11MeshHWMPpreqMinInterval))) - mesh_path_start_discovery(dev); -#endif - - if (ifsta->state != IEEE80211_AUTHENTICATE && - ifsta->state != IEEE80211_ASSOCIATE && - test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { - if (ifsta->scan_ssid_len) - ieee80211_sta_start_scan(dev, ifsta->scan_ssid, ifsta->scan_ssid_len); - else - ieee80211_sta_start_scan(dev, NULL, 0); - return; - } - - if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { - if (ieee80211_sta_config_auth(dev, ifsta)) - return; - clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); - } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) - return; - - switch (ifsta->state) { - case IEEE80211_DISABLED: - break; - case IEEE80211_AUTHENTICATE: - ieee80211_authenticate(dev, ifsta); - break; - case IEEE80211_ASSOCIATE: - ieee80211_associate(dev, ifsta); - break; - case IEEE80211_ASSOCIATED: - ieee80211_associated(dev, ifsta); - break; - case IEEE80211_IBSS_SEARCH: - ieee80211_sta_find_ibss(dev, ifsta); - break; - case IEEE80211_IBSS_JOINED: - ieee80211_sta_merge_ibss(dev, ifsta); - break; -#ifdef CONFIG_MAC80211_MESH - case IEEE80211_MESH_UP: - ieee80211_mesh_housekeeping(dev, ifsta); - break; -#endif - default: - WARN_ON(1); - break; - } - - if (ieee80211_privacy_mismatch(dev, ifsta)) { - printk(KERN_DEBUG "%s: privacy configuration mismatch and " - "mixed-cell disabled - disassociate\n", dev->name); - - ieee80211_send_disassoc(dev, ifsta, WLAN_REASON_UNSPECIFIED); - ieee80211_set_disassoc(dev, ifsta, 0); - } -} - - -static void ieee80211_sta_reset_auth(struct net_device *dev, +static void ieee80211_sta_reset_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; if (local->ops->reset_tsf) { /* Reset own TSF to allow time synchronization work. */ @@ -3439,29 +1942,15 @@ static void ieee80211_sta_reset_auth(struct net_device *dev, ifsta->auth_alg = WLAN_AUTH_OPEN; ifsta->auth_transaction = -1; ifsta->flags &= ~IEEE80211_STA_ASSOCIATED; - ifsta->auth_tries = ifsta->assoc_tries = 0; - netif_carrier_off(dev); + ifsta->assoc_scan_tries = 0; + ifsta->direct_probe_tries = 0; + ifsta->auth_tries = 0; + ifsta->assoc_tries = 0; + netif_tx_stop_all_queues(sdata->dev); + netif_carrier_off(sdata->dev); } -void ieee80211_sta_req_auth(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return; - - if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | - IEEE80211_STA_AUTO_BSSID_SEL)) && - (ifsta->flags & (IEEE80211_STA_SSID_SET | - IEEE80211_STA_AUTO_SSID_SEL))) { - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); - } -} - static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, const char *ssid, int ssid_len) { @@ -3492,81 +1981,11 @@ static int ieee80211_sta_match_ssid(struct ieee80211_if_sta *ifsta, return 0; } -static int ieee80211_sta_config_auth(struct net_device *dev, +static int ieee80211_sta_create_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_sta_bss *bss, *selected = NULL; - int top_rssi = 0, freq; - - spin_lock_bh(&local->sta_bss_lock); - freq = local->oper_channel->center_freq; - list_for_each_entry(bss, &local->sta_bss_list, list) { - if (!(bss->capability & WLAN_CAPABILITY_ESS)) - continue; - - if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | - IEEE80211_STA_AUTO_BSSID_SEL | - IEEE80211_STA_AUTO_CHANNEL_SEL)) && - (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ - !!sdata->default_key)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && - bss->freq != freq) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && - memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) - continue; - - if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && - !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) - continue; - - if (!selected || top_rssi < bss->signal) { - selected = bss; - top_rssi = bss->signal; - } - } - if (selected) - atomic_inc(&selected->users); - spin_unlock_bh(&local->sta_bss_lock); - - if (selected) { - ieee80211_set_freq(dev, selected->freq); - if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) - ieee80211_sta_set_ssid(dev, selected->ssid, - selected->ssid_len); - ieee80211_sta_set_bssid(dev, selected->bssid); - ieee80211_sta_def_wmm_params(dev, selected, 0); - ieee80211_rx_bss_put(local, selected); - ifsta->state = IEEE80211_AUTHENTICATE; - ieee80211_sta_reset_auth(dev, ifsta); - return 0; - } else { - if (ifsta->state != IEEE80211_AUTHENTICATE) { - if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) - ieee80211_sta_start_scan(dev, NULL, 0); - else - ieee80211_sta_start_scan(dev, ifsta->ssid, - ifsta->ssid_len); - ifsta->state = IEEE80211_AUTHENTICATE; - set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); - } else - ifsta->state = IEEE80211_DISABLED; - } - return -1; -} - - -static int ieee80211_sta_create_ibss(struct net_device *dev, - struct ieee80211_if_sta *ifsta) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; struct ieee80211_supported_band *sband; u8 bssid[ETH_ALEN], *pos; int i; @@ -3582,15 +2001,15 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, * random number generator get different BSSID. */ get_random_bytes(bssid, ETH_ALEN); for (i = 0; i < ETH_ALEN; i++) - bssid[i] ^= dev->dev_addr[i]; + bssid[i] ^= sdata->dev->dev_addr[i]; bssid[0] &= ~0x01; bssid[0] |= 0x02; #endif printk(KERN_DEBUG "%s: Creating new IBSS network, BSSID %s\n", - dev->name, print_mac(mac, bssid)); + sdata->dev->name, print_mac(mac, bssid)); - bss = ieee80211_rx_bss_add(dev, bssid, + bss = ieee80211_rx_bss_add(local, bssid, local->hw.conf.channel->center_freq, sdata->u.sta.ssid, sdata->u.sta.ssid_len); if (!bss) @@ -3617,17 +2036,17 @@ static int ieee80211_sta_create_ibss(struct net_device *dev, *pos++ = (u8) (rate / 5); } - ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); ieee80211_rx_bss_put(local, bss); return ret; } -static int ieee80211_sta_find_ibss(struct net_device *dev, +static int ieee80211_sta_find_ibss(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sta_bss *bss; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss; int found = 0; u8 bssid[ETH_ALEN]; int active_ibss; @@ -3637,13 +2056,13 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (ifsta->ssid_len == 0) return -EINVAL; - active_ibss = ieee80211_sta_active_ibss(dev); + active_ibss = ieee80211_sta_active_ibss(sdata); #ifdef CONFIG_MAC80211_IBSS_DEBUG printk(KERN_DEBUG "%s: sta_find_ibss (active_ibss=%d)\n", - dev->name, active_ibss); + sdata->dev->name, active_ibss); #endif /* CONFIG_MAC80211_IBSS_DEBUG */ - spin_lock_bh(&local->sta_bss_lock); - list_for_each_entry(bss, &local->sta_bss_list, list) { + spin_lock_bh(&local->bss_lock); + list_for_each_entry(bss, &local->bss_list, list) { if (ifsta->ssid_len != bss->ssid_len || memcmp(ifsta->ssid, bss->ssid, bss->ssid_len) != 0 || !(bss->capability & WLAN_CAPABILITY_IBSS)) @@ -3657,7 +2076,7 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, if (active_ibss || memcmp(bssid, ifsta->bssid, ETH_ALEN) != 0) break; } - spin_unlock_bh(&local->sta_bss_lock); + spin_unlock_bh(&local->bss_lock); #ifdef CONFIG_MAC80211_IBSS_DEBUG if (found) @@ -3675,15 +2094,15 @@ static int ieee80211_sta_find_ibss(struct net_device *dev, else search_freq = local->hw.conf.channel->center_freq; - bss = ieee80211_rx_bss_get(dev, bssid, search_freq, + bss = ieee80211_rx_bss_get(local, bssid, search_freq, ifsta->ssid, ifsta->ssid_len); if (!bss) goto dont_join; printk(KERN_DEBUG "%s: Selected IBSS BSSID %s" " based on configured SSID\n", - dev->name, print_mac(mac, bssid)); - ret = ieee80211_sta_join_ibss(dev, ifsta, bss); + sdata->dev->name, print_mac(mac, bssid)); + ret = ieee80211_sta_join_ibss(sdata, ifsta, bss); ieee80211_rx_bss_put(local, bss); return ret; } @@ -3694,17 +2113,17 @@ dont_join: #endif /* CONFIG_MAC80211_IBSS_DEBUG */ /* Selected IBSS not found in current scan results - try to scan */ - if (ifsta->state == IEEE80211_IBSS_JOINED && - !ieee80211_sta_active_ibss(dev)) { + if (ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED && + !ieee80211_sta_active_ibss(sdata)) { mod_timer(&ifsta->timer, jiffies + IEEE80211_IBSS_MERGE_INTERVAL); } else if (time_after(jiffies, local->last_scan_completed + IEEE80211_SCAN_INTERVAL)) { printk(KERN_DEBUG "%s: Trigger new scan to find an IBSS to " - "join\n", dev->name); - return ieee80211_sta_req_scan(dev, ifsta->ssid, + "join\n", sdata->dev->name); + return ieee80211_request_scan(sdata, ifsta->ssid, ifsta->ssid_len); - } else if (ifsta->state != IEEE80211_IBSS_JOINED) { + } else if (ifsta->state != IEEE80211_STA_MLME_IBSS_JOINED) { int interval = IEEE80211_SCAN_INTERVAL; if (time_after(jiffies, ifsta->ibss_join_req + @@ -3712,10 +2131,10 @@ dont_join: if ((ifsta->flags & IEEE80211_STA_CREATE_IBSS) && (!(local->oper_channel->flags & IEEE80211_CHAN_NO_IBSS))) - return ieee80211_sta_create_ibss(dev, ifsta); + return ieee80211_sta_create_ibss(sdata, ifsta); if (ifsta->flags & IEEE80211_STA_CREATE_IBSS) { printk(KERN_DEBUG "%s: IBSS not allowed on" - " %d MHz\n", dev->name, + " %d MHz\n", sdata->dev->name, local->hw.conf.channel->center_freq); } @@ -3724,7 +2143,7 @@ dont_join: interval = IEEE80211_SCAN_INTERVAL_SLOW; } - ifsta->state = IEEE80211_IBSS_SEARCH; + ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; mod_timer(&ifsta->timer, jiffies + interval); return 0; } @@ -3733,620 +2152,344 @@ dont_join: } -int ieee80211_sta_set_ssid(struct net_device *dev, char *ssid, size_t len) +static int ieee80211_sta_config_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta; - int res; + struct ieee80211_local *local = sdata->local; + struct ieee80211_bss *bss, *selected = NULL; + int top_rssi = 0, freq; - if (len > IEEE80211_MAX_SSID_LEN) - return -EINVAL; + spin_lock_bh(&local->bss_lock); + freq = local->oper_channel->center_freq; + list_for_each_entry(bss, &local->bss_list, list) { + if (!(bss->capability & WLAN_CAPABILITY_ESS)) + continue; - ifsta = &sdata->u.sta; + if ((ifsta->flags & (IEEE80211_STA_AUTO_SSID_SEL | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL)) && + (!!(bss->capability & WLAN_CAPABILITY_PRIVACY) ^ + !!sdata->default_key)) + continue; - if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { - memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); - memcpy(ifsta->ssid, ssid, len); - ifsta->ssid_len = len; - ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; + if (!(ifsta->flags & IEEE80211_STA_AUTO_CHANNEL_SEL) && + bss->freq != freq) + continue; - res = 0; - /* - * Hack! MLME code needs to be cleaned up to have different - * entry points for configuration and internal selection change - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new SSID to " - "the low-level driver\n", dev->name); - return res; - } - } + if (!(ifsta->flags & IEEE80211_STA_AUTO_BSSID_SEL) && + memcmp(bss->bssid, ifsta->bssid, ETH_ALEN)) + continue; - if (len) - ifsta->flags |= IEEE80211_STA_SSID_SET; - else - ifsta->flags &= ~IEEE80211_STA_SSID_SET; + if (!(ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) && + !ieee80211_sta_match_ssid(ifsta, bss->ssid, bss->ssid_len)) + continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { - ifsta->ibss_join_req = jiffies; - ifsta->state = IEEE80211_IBSS_SEARCH; - return ieee80211_sta_find_ibss(dev, ifsta); + if (!selected || top_rssi < bss->signal) { + selected = bss; + top_rssi = bss->signal; + } } + if (selected) + atomic_inc(&selected->users); + spin_unlock_bh(&local->bss_lock); - return 0; -} + if (selected) { + ieee80211_set_freq(sdata, selected->freq); + if (!(ifsta->flags & IEEE80211_STA_SSID_SET)) + ieee80211_sta_set_ssid(sdata, selected->ssid, + selected->ssid_len); + ieee80211_sta_set_bssid(sdata, selected->bssid); + ieee80211_sta_def_wmm_params(sdata, selected); + /* Send out direct probe if no probe resp was received or + * the one we have is outdated + */ + if (!selected->last_probe_resp || + time_after(jiffies, selected->last_probe_resp + + IEEE80211_SCAN_RESULT_EXPIRE)) + ifsta->state = IEEE80211_STA_MLME_DIRECT_PROBE; + else + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; -int ieee80211_sta_get_ssid(struct net_device *dev, char *ssid, size_t *len) -{ - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - memcpy(ssid, ifsta->ssid, ifsta->ssid_len); - *len = ifsta->ssid_len; - return 0; + ieee80211_rx_bss_put(local, selected); + ieee80211_sta_reset_auth(sdata, ifsta); + return 0; + } else { + if (ifsta->assoc_scan_tries < IEEE80211_ASSOC_SCANS_MAX_TRIES) { + ifsta->assoc_scan_tries++; + if (ifsta->flags & IEEE80211_STA_AUTO_SSID_SEL) + ieee80211_start_scan(sdata, NULL, 0); + else + ieee80211_start_scan(sdata, ifsta->ssid, + ifsta->ssid_len); + ifsta->state = IEEE80211_STA_MLME_AUTHENTICATE; + set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); + } else + ifsta->state = IEEE80211_STA_MLME_DISABLED; + } + return -1; } -int ieee80211_sta_set_bssid(struct net_device *dev, u8 *bssid) +static void ieee80211_sta_work(struct work_struct *work) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, u.sta.work); + struct ieee80211_local *local = sdata->local; struct ieee80211_if_sta *ifsta; - int res; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - ifsta = &sdata->u.sta; + struct sk_buff *skb; - if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { - memcpy(ifsta->bssid, bssid, ETH_ALEN); - res = 0; - /* - * Hack! See also ieee80211_sta_set_ssid. - */ - if (netif_running(sdata->dev)) - res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); - if (res) { - printk(KERN_DEBUG "%s: Failed to config new BSSID to " - "the low-level driver\n", dev->name); - return res; - } - } + if (!netif_running(sdata->dev)) + return; - if (is_valid_ether_addr(bssid)) - ifsta->flags |= IEEE80211_STA_BSSID_SET; - else - ifsta->flags &= ~IEEE80211_STA_BSSID_SET; + if (local->sw_scanning || local->hw_scanning) + return; - return 0; -} + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC)) + return; + ifsta = &sdata->u.sta; + while ((skb = skb_dequeue(&ifsta->skb_queue))) + ieee80211_sta_rx_queued_mgmt(sdata, skb); -static void ieee80211_send_nullfunc(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - int powersave) -{ - struct sk_buff *skb; - struct ieee80211_hdr *nullfunc; - __le16 fc; + if (ifsta->state != IEEE80211_STA_MLME_DIRECT_PROBE && + ifsta->state != IEEE80211_STA_MLME_AUTHENTICATE && + ifsta->state != IEEE80211_STA_MLME_ASSOCIATE && + test_and_clear_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request)) { + ieee80211_start_scan(sdata, ifsta->scan_ssid, + ifsta->scan_ssid_len); + return; + } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); - if (!skb) { - printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " - "frame\n", sdata->dev->name); + if (test_and_clear_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request)) { + if (ieee80211_sta_config_auth(sdata, ifsta)) + return; + clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request); + } else if (!test_and_clear_bit(IEEE80211_STA_REQ_RUN, &ifsta->request)) return; + + switch (ifsta->state) { + case IEEE80211_STA_MLME_DISABLED: + break; + case IEEE80211_STA_MLME_DIRECT_PROBE: + ieee80211_direct_probe(sdata, ifsta); + break; + case IEEE80211_STA_MLME_AUTHENTICATE: + ieee80211_authenticate(sdata, ifsta); + break; + case IEEE80211_STA_MLME_ASSOCIATE: + ieee80211_associate(sdata, ifsta); + break; + case IEEE80211_STA_MLME_ASSOCIATED: + ieee80211_associated(sdata, ifsta); + break; + case IEEE80211_STA_MLME_IBSS_SEARCH: + ieee80211_sta_find_ibss(sdata, ifsta); + break; + case IEEE80211_STA_MLME_IBSS_JOINED: + ieee80211_sta_merge_ibss(sdata, ifsta); + break; + default: + WARN_ON(1); + break; } - skb_reserve(skb, local->hw.extra_tx_headroom); - nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); - memset(nullfunc, 0, 24); - fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | - IEEE80211_FCTL_TODS); - if (powersave) - fc |= cpu_to_le16(IEEE80211_FCTL_PM); - nullfunc->frame_control = fc; - memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); - memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); - - ieee80211_sta_tx(sdata->dev, skb, 0); -} + if (ieee80211_privacy_mismatch(sdata, ifsta)) { + printk(KERN_DEBUG "%s: privacy configuration mismatch and " + "mixed-cell disabled - disassociate\n", sdata->dev->name); + ieee80211_set_disassoc(sdata, ifsta, false, true, + WLAN_REASON_UNSPECIFIED); + } +} static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - ieee80211_vif_is_mesh(&sdata->vif)) - ieee80211_sta_timer((unsigned long)sdata); + if (sdata->vif.type == NL80211_IFTYPE_STATION) + queue_work(sdata->local->hw.workqueue, + &sdata->u.sta.work); } -void ieee80211_scan_completed(struct ieee80211_hw *hw) +/* interface setup */ +void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) { - struct ieee80211_local *local = hw_to_local(hw); - struct net_device *dev = local->scan_dev; - struct ieee80211_sub_if_data *sdata; - union iwreq_data wrqu; + struct ieee80211_if_sta *ifsta; - local->last_scan_completed = jiffies; - memset(&wrqu, 0, sizeof(wrqu)); - wireless_send_event(dev, SIOCGIWSCAN, &wrqu, NULL); - - if (local->sta_hw_scanning) { - local->sta_hw_scanning = 0; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", dev->name); - /* Restart STA timer for HW scan case */ - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - ieee80211_restart_sta_timer(sdata); - rcu_read_unlock(); + ifsta = &sdata->u.sta; + INIT_WORK(&ifsta->work, ieee80211_sta_work); + setup_timer(&ifsta->timer, ieee80211_sta_timer, + (unsigned long) sdata); + skb_queue_head_init(&ifsta->skb_queue); - goto done; + ifsta->capab = WLAN_CAPABILITY_ESS; + ifsta->auth_algs = IEEE80211_AUTH_ALG_OPEN | + IEEE80211_AUTH_ALG_SHARED_KEY; + ifsta->flags |= IEEE80211_STA_CREATE_IBSS | + IEEE80211_STA_AUTO_BSSID_SEL | + IEEE80211_STA_AUTO_CHANNEL_SEL; + if (ieee80211_num_regular_queues(&sdata->local->hw) >= 4) + ifsta->flags |= IEEE80211_STA_WMM_ENABLED; +} + +/* + * Add a new IBSS station, will also be called by the RX code when, + * in IBSS mode, receiving a frame from a yet-unknown station, hence + * must be callable in atomic context. + */ +struct sta_info *ieee80211_ibss_add_sta(struct ieee80211_sub_if_data *sdata, + struct sk_buff *skb, u8 *bssid, + u8 *addr, u64 supp_rates) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + DECLARE_MAC_BUF(mac); + int band = local->hw.conf.channel->band; + + /* TODO: Could consider removing the least recently used entry and + * allow new one to be added. */ + if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { + if (net_ratelimit()) { + printk(KERN_DEBUG "%s: No room for a new IBSS STA " + "entry %s\n", sdata->dev->name, print_mac(mac, addr)); + } + return NULL; } - local->sta_sw_scanning = 0; - if (ieee80211_hw_config(local)) - printk(KERN_DEBUG "%s: failed to restore operational " - "channel after scan\n", dev->name); + if (compare_ether_addr(bssid, sdata->u.sta.bssid)) + return NULL; +#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", + wiphy_name(local->hw.wiphy), print_mac(mac, addr), sdata->dev->name); +#endif - netif_tx_lock_bh(local->mdev); - netif_addr_lock(local->mdev); - local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; - local->ops->configure_filter(local_to_hw(local), - FIF_BCN_PRBRESP_PROMISC, - &local->filter_flags, - local->mdev->mc_count, - local->mdev->mc_list); + sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); + if (!sta) + return NULL; - netif_addr_unlock(local->mdev); - netif_tx_unlock_bh(local->mdev); + set_sta_flags(sta, WLAN_STA_AUTHORIZED); - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - /* Tell AP we're back */ - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) - ieee80211_send_nullfunc(local, sdata, 0); + /* make sure mandatory rates are always added */ + sta->sta.supp_rates[band] = supp_rates | + ieee80211_mandatory_rates(local, band); - ieee80211_restart_sta_timer(sdata); + rate_control_rate_init(sta); - netif_wake_queue(sdata->dev); - } - rcu_read_unlock(); + if (sta_info_insert(sta)) + return NULL; -done: - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - struct ieee80211_if_sta *ifsta = &sdata->u.sta; - if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || - (!(ifsta->state == IEEE80211_IBSS_JOINED) && - !ieee80211_sta_active_ibss(dev))) - ieee80211_sta_find_ibss(dev, ifsta); - } + return sta; } -EXPORT_SYMBOL(ieee80211_scan_completed); -void ieee80211_sta_scan_work(struct work_struct *work) +/* configuration hooks */ +void ieee80211_sta_req_auth(struct ieee80211_sub_if_data *sdata, + struct ieee80211_if_sta *ifsta) { - struct ieee80211_local *local = - container_of(work, struct ieee80211_local, scan_work.work); - struct net_device *dev = local->scan_dev; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - struct ieee80211_supported_band *sband; - struct ieee80211_channel *chan; - int skip; - unsigned long next_delay = 0; + struct ieee80211_local *local = sdata->local; - if (!local->sta_sw_scanning) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return; - switch (local->scan_state) { - case SCAN_SET_CHANNEL: - /* - * Get current scan band. scan_band may be IEEE80211_NUM_BANDS - * after we successfully scanned the last channel of the last - * band (and the last band is supported by the hw) - */ - if (local->scan_band < IEEE80211_NUM_BANDS) - sband = local->hw.wiphy->bands[local->scan_band]; - else - sband = NULL; - - /* - * If we are at an unsupported band and have more bands - * left to scan, advance to the next supported one. - */ - while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) { - local->scan_band++; - sband = local->hw.wiphy->bands[local->scan_band]; - local->scan_channel_idx = 0; - } - - /* if no more bands/channels left, complete scan */ - if (!sband || local->scan_channel_idx >= sband->n_channels) { - ieee80211_scan_completed(local_to_hw(local)); - return; - } - skip = 0; - chan = &sband->channels[local->scan_channel_idx]; - - if (chan->flags & IEEE80211_CHAN_DISABLED || - (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - chan->flags & IEEE80211_CHAN_NO_IBSS)) - skip = 1; - - if (!skip) { - local->scan_channel = chan; - if (ieee80211_hw_config(local)) { - printk(KERN_DEBUG "%s: failed to set freq to " - "%d MHz for scan\n", dev->name, - chan->center_freq); - skip = 1; - } - } - - /* advance state machine to next channel/band */ - local->scan_channel_idx++; - if (local->scan_channel_idx >= sband->n_channels) { - /* - * scan_band may end up == IEEE80211_NUM_BANDS, but - * we'll catch that case above and complete the scan - * if that is the case. - */ - local->scan_band++; - local->scan_channel_idx = 0; - } - - if (skip) - break; + if ((ifsta->flags & (IEEE80211_STA_BSSID_SET | + IEEE80211_STA_AUTO_BSSID_SEL)) && + (ifsta->flags & (IEEE80211_STA_SSID_SET | + IEEE80211_STA_AUTO_SSID_SEL))) { - next_delay = IEEE80211_PROBE_DELAY + - usecs_to_jiffies(local->hw.channel_change_time); - local->scan_state = SCAN_SEND_PROBE; - break; - case SCAN_SEND_PROBE: - next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; - local->scan_state = SCAN_SET_CHANNEL; + if (ifsta->state == IEEE80211_STA_MLME_ASSOCIATED) + ieee80211_set_disassoc(sdata, ifsta, true, true, + WLAN_REASON_DEAUTH_LEAVING); - if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) - break; - ieee80211_send_probe_req(dev, NULL, local->scan_ssid, - local->scan_ssid_len); - next_delay = IEEE80211_CHANNEL_TIME; - break; + set_bit(IEEE80211_STA_REQ_AUTH, &ifsta->request); + queue_work(local->hw.workqueue, &ifsta->work); } - - if (local->sta_sw_scanning) - queue_delayed_work(local->hw.workqueue, &local->scan_work, - next_delay); } - -static int ieee80211_sta_start_scan(struct net_device *dev, - u8 *ssid, size_t ssid_len) +int ieee80211_sta_set_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_if_sta *ifsta; + int res; - if (ssid_len > IEEE80211_MAX_SSID_LEN) + if (len > IEEE80211_MAX_SSID_LEN) return -EINVAL; - /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) - * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS - * BSSID: MACAddress - * SSID - * ScanType: ACTIVE, PASSIVE - * ProbeDelay: delay (in microseconds) to be used prior to transmitting - * a Probe frame during active scanning - * ChannelList - * MinChannelTime (>= ProbeDelay), in TU - * MaxChannelTime: (>= MinChannelTime), in TU - */ - - /* MLME-SCAN.confirm - * BSSDescriptionSet - * ResultCode: SUCCESS, INVALID_PARAMETERS - */ + ifsta = &sdata->u.sta; - if (local->sta_sw_scanning || local->sta_hw_scanning) { - if (local->scan_dev == dev) - return 0; - return -EBUSY; - } + if (ifsta->ssid_len != len || memcmp(ifsta->ssid, ssid, len) != 0) { + memset(ifsta->ssid, 0, sizeof(ifsta->ssid)); + memcpy(ifsta->ssid, ssid, len); + ifsta->ssid_len = len; + ifsta->flags &= ~IEEE80211_STA_PREV_BSSID_SET; - if (local->ops->hw_scan) { - int rc = local->ops->hw_scan(local_to_hw(local), - ssid, ssid_len); - if (!rc) { - local->sta_hw_scanning = 1; - local->scan_dev = dev; + res = 0; + /* + * Hack! MLME code needs to be cleaned up to have different + * entry points for configuration and internal selection change + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_SSID); + if (res) { + printk(KERN_DEBUG "%s: Failed to config new SSID to " + "the low-level driver\n", sdata->dev->name); + return res; } - return rc; } - local->sta_sw_scanning = 1; + if (len) + ifsta->flags |= IEEE80211_STA_SSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_SSID_SET; - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - netif_stop_queue(sdata->dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA && - (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED)) - ieee80211_send_nullfunc(local, sdata, 1); + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + !(ifsta->flags & IEEE80211_STA_BSSID_SET)) { + ifsta->ibss_join_req = jiffies; + ifsta->state = IEEE80211_STA_MLME_IBSS_SEARCH; + return ieee80211_sta_find_ibss(sdata, ifsta); } - rcu_read_unlock(); - - if (ssid) { - local->scan_ssid_len = ssid_len; - memcpy(local->scan_ssid, ssid, ssid_len); - } else - local->scan_ssid_len = 0; - local->scan_state = SCAN_SET_CHANNEL; - local->scan_channel_idx = 0; - local->scan_band = IEEE80211_BAND_2GHZ; - local->scan_dev = dev; - - netif_addr_lock_bh(local->mdev); - local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; - local->ops->configure_filter(local_to_hw(local), - FIF_BCN_PRBRESP_PROMISC, - &local->filter_flags, - local->mdev->mc_count, - local->mdev->mc_list); - netif_addr_unlock_bh(local->mdev); - - /* TODO: start scan as soon as all nullfunc frames are ACKed */ - queue_delayed_work(local->hw.workqueue, &local->scan_work, - IEEE80211_CHANNEL_TIME); return 0; } - -int ieee80211_sta_req_scan(struct net_device *dev, u8 *ssid, size_t ssid_len) +int ieee80211_sta_get_ssid(struct ieee80211_sub_if_data *sdata, char *ssid, size_t *len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - return ieee80211_sta_start_scan(dev, ssid, ssid_len); - - if (local->sta_sw_scanning || local->sta_hw_scanning) { - if (local->scan_dev == dev) - return 0; - return -EBUSY; - } - - ifsta->scan_ssid_len = ssid_len; - if (ssid_len) - memcpy(ifsta->scan_ssid, ssid, ssid_len); - set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); - queue_work(local->hw.workqueue, &ifsta->work); + memcpy(ssid, ifsta->ssid, ifsta->ssid_len); + *len = ifsta->ssid_len; return 0; } -static char * -ieee80211_sta_scan_result(struct net_device *dev, - struct iw_request_info *info, - struct ieee80211_sta_bss *bss, - char *current_ev, char *end_buf) +int ieee80211_sta_set_bssid(struct ieee80211_sub_if_data *sdata, u8 *bssid) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct iw_event iwe; - - if (time_after(jiffies, - bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE)) - return current_ev; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWAP; - iwe.u.ap_addr.sa_family = ARPHRD_ETHER; - memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_ADDR_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWESSID; - if (bss_mesh_cfg(bss)) { - iwe.u.data.length = bss_mesh_id_len(bss); - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss_mesh_id(bss)); - } else { - iwe.u.data.length = bss->ssid_len; - iwe.u.data.flags = 1; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss->ssid); - } - - if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) - || bss_mesh_cfg(bss)) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWMODE; - if (bss_mesh_cfg(bss)) - iwe.u.mode = IW_MODE_MESH; - else if (bss->capability & WLAN_CAPABILITY_ESS) - iwe.u.mode = IW_MODE_MASTER; - else - iwe.u.mode = IW_MODE_ADHOC; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, - &iwe, IW_EV_UINT_LEN); - } - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); - iwe.u.freq.e = 0; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWFREQ; - iwe.u.freq.m = bss->freq; - iwe.u.freq.e = 6; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_FREQ_LEN); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVQUAL; - iwe.u.qual.qual = bss->qual; - iwe.u.qual.level = bss->signal; - iwe.u.qual.noise = bss->noise; - iwe.u.qual.updated = local->wstats_flags; - current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, - IW_EV_QUAL_LEN); - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWENCODE; - if (bss->capability & WLAN_CAPABILITY_PRIVACY) - iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; - else - iwe.u.data.flags = IW_ENCODE_DISABLED; - iwe.u.data.length = 0; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, ""); - - if (bss && bss->wpa_ie) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = bss->wpa_ie_len; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss->wpa_ie); - } - - if (bss && bss->rsn_ie) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = bss->rsn_ie_len; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss->rsn_ie); - } - - if (bss && bss->ht_ie) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVGENIE; - iwe.u.data.length = bss->ht_ie_len; - current_ev = iwe_stream_add_point(info, current_ev, end_buf, - &iwe, bss->ht_ie); - } - - if (bss && bss->supp_rates_len > 0) { - /* display all supported rates in readable format */ - char *p = current_ev + iwe_stream_lcp_len(info); - int i; - - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = SIOCGIWRATE; - /* Those two flags are ignored... */ - iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; - - for (i = 0; i < bss->supp_rates_len; i++) { - iwe.u.bitrate.value = ((bss->supp_rates[i] & - 0x7f) * 500000); - p = iwe_stream_add_value(info, current_ev, p, - end_buf, &iwe, IW_EV_PARAM_LEN); - } - current_ev = p; - } + struct ieee80211_if_sta *ifsta; + int res; - if (bss) { - char *buf; - buf = kmalloc(30, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, " Last beacon: %dms ago", - jiffies_to_msecs(jiffies - bss->last_update)); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, &iwe, buf); - kfree(buf); - } - } + ifsta = &sdata->u.sta; - if (bss_mesh_cfg(bss)) { - char *buf; - u8 *cfg = bss_mesh_cfg(bss); - buf = kmalloc(50, GFP_ATOMIC); - if (buf) { - memset(&iwe, 0, sizeof(iwe)); - iwe.cmd = IWEVCUSTOM; - sprintf(buf, "Mesh network (version %d)", cfg[0]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Protocol ID: " - "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], - cfg[4]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Path Selection Metric ID: " - "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], - cfg[8]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Congestion Control Mode ID: " - "0x%02X%02X%02X%02X", cfg[9], cfg[10], - cfg[11], cfg[12]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - sprintf(buf, "Channel Precedence: " - "0x%02X%02X%02X%02X", cfg[13], cfg[14], - cfg[15], cfg[16]); - iwe.u.data.length = strlen(buf); - current_ev = iwe_stream_add_point(info, current_ev, - end_buf, - &iwe, buf); - kfree(buf); + if (memcmp(ifsta->bssid, bssid, ETH_ALEN) != 0) { + memcpy(ifsta->bssid, bssid, ETH_ALEN); + res = 0; + /* + * Hack! See also ieee80211_sta_set_ssid. + */ + if (netif_running(sdata->dev)) + res = ieee80211_if_config(sdata, IEEE80211_IFCC_BSSID); + if (res) { + printk(KERN_DEBUG "%s: Failed to config new BSSID to " + "the low-level driver\n", sdata->dev->name); + return res; } } - return current_ev; -} - + if (is_valid_ether_addr(bssid)) + ifsta->flags |= IEEE80211_STA_BSSID_SET; + else + ifsta->flags &= ~IEEE80211_STA_BSSID_SET; -int ieee80211_sta_scan_results(struct net_device *dev, - struct iw_request_info *info, - char *buf, size_t len) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - char *current_ev = buf; - char *end_buf = buf + len; - struct ieee80211_sta_bss *bss; - - spin_lock_bh(&local->sta_bss_lock); - list_for_each_entry(bss, &local->sta_bss_list, list) { - if (buf + len - current_ev <= IW_EV_ADDR_LEN) { - spin_unlock_bh(&local->sta_bss_lock); - return -E2BIG; - } - current_ev = ieee80211_sta_scan_result(dev, info, bss, - current_ev, end_buf); - } - spin_unlock_bh(&local->sta_bss_lock); - return current_ev - buf; + return 0; } - -int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) +int ieee80211_sta_set_extra_ie(struct ieee80211_sub_if_data *sdata, char *ie, size_t len) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; kfree(ifsta->extra_ie); @@ -4365,109 +2508,55 @@ int ieee80211_sta_set_extra_ie(struct net_device *dev, char *ie, size_t len) return 0; } - -struct sta_info *ieee80211_ibss_add_sta(struct net_device *dev, - struct sk_buff *skb, u8 *bssid, - u8 *addr, u64 supp_rates) -{ - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct sta_info *sta; - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - DECLARE_MAC_BUF(mac); - int band = local->hw.conf.channel->band; - - /* TODO: Could consider removing the least recently used entry and - * allow new one to be added. */ - if (local->num_sta >= IEEE80211_IBSS_MAX_STA_ENTRIES) { - if (net_ratelimit()) { - printk(KERN_DEBUG "%s: No room for a new IBSS STA " - "entry %s\n", dev->name, print_mac(mac, addr)); - } - return NULL; - } - - if (compare_ether_addr(bssid, sdata->u.sta.bssid)) - return NULL; - -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG - printk(KERN_DEBUG "%s: Adding new IBSS station %s (dev=%s)\n", - wiphy_name(local->hw.wiphy), print_mac(mac, addr), dev->name); -#endif - - sta = sta_info_alloc(sdata, addr, GFP_ATOMIC); - if (!sta) - return NULL; - - set_sta_flags(sta, WLAN_STA_AUTHORIZED); - - if (supp_rates) - sta->supp_rates[band] = supp_rates; - else - sta->supp_rates[band] = sdata->u.sta.supp_rates_bits[band]; - - rate_control_rate_init(sta, local); - - if (sta_info_insert(sta)) - return NULL; - - return sta; -} - - -int ieee80211_sta_deauthenticate(struct net_device *dev, u16 reason) +int ieee80211_sta_deauthenticate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; printk(KERN_DEBUG "%s: deauthenticating by local choice (reason=%d)\n", - dev->name, reason); + sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; - ieee80211_send_deauth(dev, ifsta, reason); - ieee80211_set_disassoc(dev, ifsta, 1); + ieee80211_set_disassoc(sdata, ifsta, true, true, reason); return 0; } - -int ieee80211_sta_disassociate(struct net_device *dev, u16 reason) +int ieee80211_sta_disassociate(struct ieee80211_sub_if_data *sdata, u16 reason) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_if_sta *ifsta = &sdata->u.sta; printk(KERN_DEBUG "%s: disassociating by local choice (reason=%d)\n", - dev->name, reason); + sdata->dev->name, reason); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EINVAL; if (!(ifsta->flags & IEEE80211_STA_ASSOCIATED)) return -1; - ieee80211_send_disassoc(dev, ifsta, reason); - ieee80211_set_disassoc(dev, ifsta, 0); + ieee80211_set_disassoc(sdata, ifsta, false, true, reason); return 0; } -void ieee80211_notify_mac(struct ieee80211_hw *hw, - enum ieee80211_notification_types notif_type) +/* scan finished notification */ +void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) { - struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata; - - switch (notif_type) { - case IEEE80211_NOTIFY_RE_ASSOC: - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) - continue; + struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_if_sta *ifsta; - ieee80211_sta_req_auth(sdata->dev, &sdata->u.sta); - } - rcu_read_unlock(); - break; + if (sdata && sdata->vif.type == NL80211_IFTYPE_ADHOC) { + ifsta = &sdata->u.sta; + if (!(ifsta->flags & IEEE80211_STA_BSSID_SET) || + (!(ifsta->state == IEEE80211_STA_MLME_IBSS_JOINED) && + !ieee80211_sta_active_ibss(sdata))) + ieee80211_sta_find_ibss(sdata, ifsta); } + + /* Restart STA timers */ + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) + ieee80211_restart_sta_timer(sdata); + rcu_read_unlock(); } -EXPORT_SYMBOL(ieee80211_notify_mac); diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 0388c090dfe9..5d786720d935 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -12,6 +12,7 @@ #include <linux/rtnetlink.h> #include "rate.h" #include "ieee80211_i.h" +#include "debugfs.h" struct rate_control_alg { struct list_head list; @@ -127,19 +128,46 @@ static void ieee80211_rate_control_ops_put(struct rate_control_ops *ops) module_put(ops->module); } +#ifdef CONFIG_MAC80211_DEBUGFS +static ssize_t rcname_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct rate_control_ref *ref = file->private_data; + int len = strlen(ref->ops->name); + + return simple_read_from_buffer(userbuf, count, ppos, + ref->ops->name, len); +} + +static const struct file_operations rcname_ops = { + .read = rcname_read, + .open = mac80211_open_file_generic, +}; +#endif + struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local) { + struct dentry *debugfsdir = NULL; struct rate_control_ref *ref; ref = kmalloc(sizeof(struct rate_control_ref), GFP_KERNEL); if (!ref) goto fail_ref; kref_init(&ref->kref); + ref->local = local; ref->ops = ieee80211_rate_control_ops_get(name); if (!ref->ops) goto fail_ops; - ref->priv = ref->ops->alloc(local); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfsdir = debugfs_create_dir("rc", local->hw.wiphy->debugfsdir); + local->debugfs.rcdir = debugfsdir; + local->debugfs.rcname = debugfs_create_file("name", 0400, debugfsdir, + ref, &rcname_ops); +#endif + + ref->priv = ref->ops->alloc(&local->hw, debugfsdir); if (!ref->priv) goto fail_priv; return ref; @@ -158,29 +186,46 @@ static void rate_control_release(struct kref *kref) ctrl_ref = container_of(kref, struct rate_control_ref, kref); ctrl_ref->ops->free(ctrl_ref->priv); + +#ifdef CONFIG_MAC80211_DEBUGFS + debugfs_remove(ctrl_ref->local->debugfs.rcname); + ctrl_ref->local->debugfs.rcname = NULL; + debugfs_remove(ctrl_ref->local->debugfs.rcdir); + ctrl_ref->local->debugfs.rcdir = NULL; +#endif + ieee80211_rate_control_ops_put(ctrl_ref->ops); kfree(ctrl_ref); } -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct rate_control_ref *ref = local->rate_ctrl; - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct sta_info *sta; + struct rate_control_ref *ref = sdata->local->rate_ctrl; + void *priv_sta = NULL; + struct ieee80211_sta *ista = NULL; int i; - rcu_read_lock(); - sta = sta_info_get(local, hdr->addr1); - sel->rate_idx = -1; sel->nonerp_idx = -1; sel->probe_idx = -1; + sel->max_rate_idx = sdata->max_ratectrl_rateidx; + + if (sta) { + ista = &sta->sta; + priv_sta = sta->rate_ctrl_priv; + } + + if (sta && sdata->force_unicast_rateidx > -1) + sel->rate_idx = sdata->force_unicast_rateidx; + else + ref->ops->get_rate(ref->priv, sband, ista, priv_sta, skb, sel); - ref->ops->get_rate(ref->priv, dev, sband, skb, sel); + if (sdata->max_ratectrl_rateidx > -1 && + sel->rate_idx > sdata->max_ratectrl_rateidx) + sel->rate_idx = sdata->max_ratectrl_rateidx; BUG_ON(sel->rate_idx < 0); @@ -191,13 +236,11 @@ void rate_control_get_rate(struct net_device *dev, if (sband->bitrates[sel->rate_idx].bitrate < rate->bitrate) break; - if (rate_supported(sta, sband->band, i) && + if (rate_supported(ista, sband->band, i) && !(rate->flags & IEEE80211_RATE_ERP_G)) sel->nonerp_idx = i; } } - - rcu_read_unlock(); } struct rate_control_ref *rate_control_get(struct rate_control_ref *ref) diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h index ede7ab56f65b..d0092f847f82 100644 --- a/net/mac80211/rate.h +++ b/net/mac80211/rate.h @@ -19,77 +19,48 @@ #include "ieee80211_i.h" #include "sta_info.h" -/** - * struct rate_selection - rate selection for rate control algos - * @rate: selected transmission rate index - * @nonerp: Non-ERP rate to use instead if ERP cannot be used - * @probe: rate for probing (or -1) - * - */ -struct rate_selection { - s8 rate_idx, nonerp_idx, probe_idx; -}; - -struct rate_control_ops { - struct module *module; - const char *name; - void (*tx_status)(void *priv, struct net_device *dev, - struct sk_buff *skb); - void (*get_rate)(void *priv, struct net_device *dev, - struct ieee80211_supported_band *band, - struct sk_buff *skb, - struct rate_selection *sel); - void (*rate_init)(void *priv, void *priv_sta, - struct ieee80211_local *local, struct sta_info *sta); - void (*clear)(void *priv); - - void *(*alloc)(struct ieee80211_local *local); - void (*free)(void *priv); - void *(*alloc_sta)(void *priv, gfp_t gfp); - void (*free_sta)(void *priv, void *priv_sta); - - int (*add_attrs)(void *priv, struct kobject *kobj); - void (*remove_attrs)(void *priv, struct kobject *kobj); - void (*add_sta_debugfs)(void *priv, void *priv_sta, - struct dentry *dir); - void (*remove_sta_debugfs)(void *priv, void *priv_sta); -}; - struct rate_control_ref { + struct ieee80211_local *local; struct rate_control_ops *ops; void *priv; struct kref kref; }; -int ieee80211_rate_control_register(struct rate_control_ops *ops); -void ieee80211_rate_control_unregister(struct rate_control_ops *ops); - /* Get a reference to the rate control algorithm. If `name' is NULL, get the * first available algorithm. */ struct rate_control_ref *rate_control_alloc(const char *name, struct ieee80211_local *local); -void rate_control_get_rate(struct net_device *dev, +void rate_control_get_rate(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, - struct sk_buff *skb, + struct sta_info *sta, struct sk_buff *skb, struct rate_selection *sel); struct rate_control_ref *rate_control_get(struct rate_control_ref *ref); void rate_control_put(struct rate_control_ref *ref); -static inline void rate_control_tx_status(struct net_device *dev, +static inline void rate_control_tx_status(struct ieee80211_local *local, + struct ieee80211_supported_band *sband, + struct sta_info *sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct rate_control_ref *ref = local->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; - ref->ops->tx_status(ref->priv, dev, skb); + ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb); } -static inline void rate_control_rate_init(struct sta_info *sta, - struct ieee80211_local *local) +static inline void rate_control_rate_init(struct sta_info *sta) { + struct ieee80211_local *local = sta->sdata->local; struct rate_control_ref *ref = sta->rate_ctrl; - ref->ops->rate_init(ref->priv, sta->rate_ctrl_priv, local, sta); + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + + ref->ops->rate_init(ref->priv, sband, ista, priv_sta); } @@ -100,15 +71,19 @@ static inline void rate_control_clear(struct ieee80211_local *local) } static inline void *rate_control_alloc_sta(struct rate_control_ref *ref, + struct ieee80211_sta *sta, gfp_t gfp) { - return ref->ops->alloc_sta(ref->priv, gfp); + return ref->ops->alloc_sta(ref->priv, sta, gfp); } -static inline void rate_control_free_sta(struct rate_control_ref *ref, - void *priv) +static inline void rate_control_free_sta(struct sta_info *sta) { - ref->ops->free_sta(ref->priv, priv); + struct rate_control_ref *ref = sta->rate_ctrl; + struct ieee80211_sta *ista = &sta->sta; + void *priv_sta = sta->rate_ctrl_priv; + + ref->ops->free_sta(ref->priv, ista, priv_sta); } static inline void rate_control_add_sta_debugfs(struct sta_info *sta) @@ -130,31 +105,6 @@ static inline void rate_control_remove_sta_debugfs(struct sta_info *sta) #endif } -static inline int rate_supported(struct sta_info *sta, - enum ieee80211_band band, - int index) -{ - return (sta == NULL || sta->supp_rates[band] & BIT(index)); -} - -static inline s8 -rate_lowest_index(struct ieee80211_local *local, - struct ieee80211_supported_band *sband, - struct sta_info *sta) -{ - int i; - - for (i = 0; i < sband->n_bitrates; i++) - if (rate_supported(sta, sband->band, i)) - return i; - - /* warn when we cannot find a rate. */ - WARN_ON(1); - - return 0; -} - - /* functions for rate control related to a device */ int ieee80211_init_rate_ctrl_alg(struct ieee80211_local *local, const char *name); @@ -175,4 +125,18 @@ static inline void rc80211_pid_exit(void) } #endif +#ifdef CONFIG_MAC80211_RC_MINSTREL +extern int rc80211_minstrel_init(void); +extern void rc80211_minstrel_exit(void); +#else +static inline int rc80211_minstrel_init(void) +{ + return 0; +} +static inline void rc80211_minstrel_exit(void) +{ +} +#endif + + #endif /* IEEE80211_RATE_H */ diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c new file mode 100644 index 000000000000..f6d69dab07a3 --- /dev/null +++ b/net/mac80211/rc80211_minstrel.c @@ -0,0 +1,583 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on minstrel.c: + * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz> + * Sponsored by Indranet Technologies Ltd + * + * Based on sample.c: + * Copyright (c) 2005 John Bicket + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/random.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rate.h" +#include "rc80211_minstrel.h" + +#define SAMPLE_COLUMNS 10 +#define SAMPLE_TBL(_mi, _idx, _col) \ + _mi->sample_table[(_idx * SAMPLE_COLUMNS) + _col] + +/* convert mac80211 rate index to local array index */ +static inline int +rix_to_ndx(struct minstrel_sta_info *mi, int rix) +{ + int i = rix; + for (i = rix; i >= 0; i--) + if (mi->r[i].rix == rix) + break; + WARN_ON(mi->r[i].rix != rix); + return i; +} + +static inline bool +use_low_rate(struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + u16 fc; + + fc = le16_to_cpu(hdr->frame_control); + + return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)); +} + + +static void +minstrel_update_stats(struct minstrel_priv *mp, struct minstrel_sta_info *mi) +{ + u32 max_tp = 0, index_max_tp = 0, index_max_tp2 = 0; + u32 max_prob = 0, index_max_prob = 0; + u32 usecs; + u32 p; + int i; + + mi->stats_update = jiffies; + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + usecs = mr->perfect_tx_time; + if (!usecs) + usecs = 1000000; + + /* To avoid rounding issues, probabilities scale from 0 (0%) + * to 18000 (100%) */ + if (mr->attempts) { + p = (mr->success * 18000) / mr->attempts; + mr->succ_hist += mr->success; + mr->att_hist += mr->attempts; + mr->cur_prob = p; + p = ((p * (100 - mp->ewma_level)) + (mr->probability * + mp->ewma_level)) / 100; + mr->probability = p; + mr->cur_tp = p * (1000000 / usecs); + } + + mr->last_success = mr->success; + mr->last_attempts = mr->attempts; + mr->success = 0; + mr->attempts = 0; + + /* Sample less often below the 10% chance of success. + * Sample less often above the 95% chance of success. */ + if ((mr->probability > 17100) || (mr->probability < 1800)) { + mr->adjusted_retry_count = mr->retry_count >> 1; + if (mr->adjusted_retry_count > 2) + mr->adjusted_retry_count = 2; + } else { + mr->adjusted_retry_count = mr->retry_count; + } + if (!mr->adjusted_retry_count) + mr->adjusted_retry_count = 2; + } + + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + if (max_tp < mr->cur_tp) { + index_max_tp = i; + max_tp = mr->cur_tp; + } + if (max_prob < mr->probability) { + index_max_prob = i; + max_prob = mr->probability; + } + } + + max_tp = 0; + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + if (i == index_max_tp) + continue; + + if (max_tp < mr->cur_tp) { + index_max_tp2 = i; + max_tp = mr->cur_tp; + } + } + mi->max_tp_rate = index_max_tp; + mi->max_tp_rate2 = index_max_tp2; + mi->max_prob_rate = index_max_prob; +} + +static void +minstrel_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb) +{ + struct minstrel_sta_info *mi = priv_sta; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_tx_altrate *ar = info->status.retries; + struct minstrel_priv *mp = priv; + int i, ndx, tries; + int success = 0; + + if (!info->status.excessive_retries) + success = 1; + + if (!mp->has_mrr || (ar[0].rate_idx < 0)) { + ndx = rix_to_ndx(mi, info->tx_rate_idx); + tries = info->status.retry_count + 1; + mi->r[ndx].success += success; + mi->r[ndx].attempts += tries; + return; + } + + for (i = 0; i < 4; i++) { + if (ar[i].rate_idx < 0) + break; + + ndx = rix_to_ndx(mi, ar[i].rate_idx); + mi->r[ndx].attempts += ar[i].limit + 1; + + if ((i != 3) && (ar[i + 1].rate_idx < 0)) + mi->r[ndx].success += success; + } + + if ((info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) && (i >= 0)) + mi->sample_count++; + + if (mi->sample_deferred > 0) + mi->sample_deferred--; +} + + +static inline unsigned int +minstrel_get_retry_count(struct minstrel_rate *mr, + struct ieee80211_tx_info *info) +{ + unsigned int retry = mr->adjusted_retry_count; + + if (info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) + retry = max(2U, min(mr->retry_count_rtscts, retry)); + else if (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT) + retry = max(2U, min(mr->retry_count_cts, retry)); + return retry; +} + + +static int +minstrel_get_next_sample(struct minstrel_sta_info *mi) +{ + unsigned int sample_ndx; + sample_ndx = SAMPLE_TBL(mi, mi->sample_idx, mi->sample_column); + mi->sample_idx++; + if (mi->sample_idx > (mi->n_rates - 2)) { + mi->sample_idx = 0; + mi->sample_column++; + if (mi->sample_column >= SAMPLE_COLUMNS) + mi->sample_column = 0; + } + return sample_ndx; +} + +void +minstrel_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, struct rate_selection *sel) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct minstrel_sta_info *mi = priv_sta; + struct minstrel_priv *mp = priv; + struct ieee80211_tx_altrate *ar = info->control.retries; + unsigned int ndx, sample_ndx = 0; + bool mrr; + bool sample_slower = false; + bool sample = false; + int i, delta; + int mrr_ndx[3]; + int sample_rate; + + if (!sta || !mi || use_low_rate(skb)) { + sel->rate_idx = rate_lowest_index(sband, sta); + return; + } + + mrr = mp->has_mrr; + + /* mac80211 does not allow mrr for RTS/CTS */ + if ((info->flags & IEEE80211_TX_CTL_USE_RTS_CTS) || + (info->flags & IEEE80211_TX_CTL_USE_CTS_PROTECT)) + mrr = false; + + if (time_after(jiffies, mi->stats_update + (mp->update_interval * + HZ) / 1000)) + minstrel_update_stats(mp, mi); + + ndx = mi->max_tp_rate; + + if (mrr) + sample_rate = mp->lookaround_rate_mrr; + else + sample_rate = mp->lookaround_rate; + + mi->packet_count++; + delta = (mi->packet_count * sample_rate / 100) - + (mi->sample_count + mi->sample_deferred / 2); + + /* delta > 0: sampling required */ + if (delta > 0) { + if (mi->packet_count >= 10000) { + mi->sample_deferred = 0; + mi->sample_count = 0; + mi->packet_count = 0; + } else if (delta > mi->n_rates * 2) { + /* With multi-rate retry, not every planned sample + * attempt actually gets used, due to the way the retry + * chain is set up - [max_tp,sample,prob,lowest] for + * sample_rate < max_tp. + * + * If there's too much sampling backlog and the link + * starts getting worse, minstrel would start bursting + * out lots of sampling frames, which would result + * in a large throughput loss. */ + mi->sample_count += (delta - mi->n_rates * 2); + } + + sample_ndx = minstrel_get_next_sample(mi); + sample = true; + sample_slower = mrr && (mi->r[sample_ndx].perfect_tx_time > + mi->r[ndx].perfect_tx_time); + + if (!sample_slower) { + ndx = sample_ndx; + mi->sample_count++; + } else { + /* Only use IEEE80211_TX_CTL_RATE_CTRL_PROBE to mark + * packets that have the sampling rate deferred to the + * second MRR stage. Increase the sample counter only + * if the deferred sample rate was actually used. + * Use the sample_deferred counter to make sure that + * the sampling is not done in large bursts */ + info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + mi->sample_deferred++; + } + } + sel->rate_idx = mi->r[ndx].rix; + info->control.retry_limit = minstrel_get_retry_count(&mi->r[ndx], info); + + if (!mrr) { + ar[0].rate_idx = mi->lowest_rix; + ar[0].limit = mp->max_retry; + ar[1].rate_idx = -1; + return; + } + + /* MRR setup */ + if (sample) { + if (sample_slower) + mrr_ndx[0] = sample_ndx; + else + mrr_ndx[0] = mi->max_tp_rate; + } else { + mrr_ndx[0] = mi->max_tp_rate2; + } + mrr_ndx[1] = mi->max_prob_rate; + mrr_ndx[2] = 0; + for (i = 0; i < 3; i++) { + ar[i].rate_idx = mi->r[mrr_ndx[i]].rix; + ar[i].limit = mi->r[mrr_ndx[i]].adjusted_retry_count; + } +} + + +static void +calc_rate_durations(struct minstrel_sta_info *mi, struct ieee80211_local *local, + struct minstrel_rate *d, struct ieee80211_rate *rate) +{ + int erp = !!(rate->flags & IEEE80211_RATE_ERP_G); + + d->perfect_tx_time = ieee80211_frame_duration(local, 1200, + rate->bitrate, erp, 1); + d->ack_time = ieee80211_frame_duration(local, 10, + rate->bitrate, erp, 1); +} + +static void +init_sample_table(struct minstrel_sta_info *mi) +{ + unsigned int i, col, new_idx; + unsigned int n_srates = mi->n_rates - 1; + u8 rnd[8]; + + mi->sample_column = 0; + mi->sample_idx = 0; + memset(mi->sample_table, 0, SAMPLE_COLUMNS * mi->n_rates); + + for (col = 0; col < SAMPLE_COLUMNS; col++) { + for (i = 0; i < n_srates; i++) { + get_random_bytes(rnd, sizeof(rnd)); + new_idx = (i + rnd[i & 7]) % n_srates; + + while (SAMPLE_TBL(mi, new_idx, col) != 0) + new_idx = (new_idx + 1) % n_srates; + + /* Don't sample the slowest rate (i.e. slowest base + * rate). We must presume that the slowest rate works + * fine, or else other management frames will also be + * failing and the link will break */ + SAMPLE_TBL(mi, new_idx, col) = i + 1; + } + } +} + +static void +minstrel_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + struct minstrel_priv *mp = priv; + struct minstrel_rate *mr_ctl; + unsigned int i, n = 0; + unsigned int t_slot = 9; /* FIXME: get real slot time */ + + mi->lowest_rix = rate_lowest_index(sband, sta); + mr_ctl = &mi->r[rix_to_ndx(mi, mi->lowest_rix)]; + mi->sp_ack_dur = mr_ctl->ack_time; + + for (i = 0; i < sband->n_bitrates; i++) { + struct minstrel_rate *mr = &mi->r[n]; + unsigned int tx_time = 0, tx_time_cts = 0, tx_time_rtscts = 0; + unsigned int tx_time_single; + unsigned int cw = mp->cw_min; + + if (!rate_supported(sta, sband->band, i)) + continue; + n++; + memset(mr, 0, sizeof(*mr)); + + mr->rix = i; + mr->bitrate = sband->bitrates[i].bitrate / 5; + calc_rate_durations(mi, hw_to_local(mp->hw), mr, + &sband->bitrates[i]); + + /* calculate maximum number of retransmissions before + * fallback (based on maximum segment size) */ + mr->retry_count = 1; + mr->retry_count_cts = 1; + mr->retry_count_rtscts = 1; + tx_time = mr->perfect_tx_time + mi->sp_ack_dur; + do { + /* add one retransmission */ + tx_time_single = mr->ack_time + mr->perfect_tx_time; + + /* contention window */ + tx_time_single += t_slot + min(cw, mp->cw_max); + cw = (cw + 1) << 1; + + tx_time += tx_time_single; + tx_time_cts += tx_time_single + mi->sp_ack_dur; + tx_time_rtscts += tx_time_single + 2 * mi->sp_ack_dur; + if ((tx_time_cts < mp->segment_size) && + (mr->retry_count_cts < mp->max_retry)) + mr->retry_count_cts++; + if ((tx_time_rtscts < mp->segment_size) && + (mr->retry_count_rtscts < mp->max_retry)) + mr->retry_count_rtscts++; + } while ((tx_time < mp->segment_size) && + (++mr->retry_count < mp->max_retry)); + mr->adjusted_retry_count = mr->retry_count; + } + + for (i = n; i < sband->n_bitrates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + mr->rix = -1; + } + + mi->n_rates = n; + mi->stats_update = jiffies; + + init_sample_table(mi); +} + +static void * +minstrel_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) +{ + struct ieee80211_supported_band *sband; + struct minstrel_sta_info *mi; + struct minstrel_priv *mp = priv; + struct ieee80211_hw *hw = mp->hw; + int max_rates = 0; + int i; + + mi = kzalloc(sizeof(struct minstrel_sta_info), gfp); + if (!mi) + return NULL; + + for (i = 0; i < IEEE80211_NUM_BANDS; i++) { + sband = hw->wiphy->bands[hw->conf.channel->band]; + if (sband->n_bitrates > max_rates) + max_rates = sband->n_bitrates; + } + + mi->r = kzalloc(sizeof(struct minstrel_rate) * max_rates, gfp); + if (!mi->r) + goto error; + + mi->sample_table = kmalloc(SAMPLE_COLUMNS * max_rates, gfp); + if (!mi->sample_table) + goto error1; + + mi->stats_update = jiffies; + return mi; + +error1: + kfree(mi->r); +error: + kfree(mi); + return NULL; +} + +static void +minstrel_free_sta(void *priv, struct ieee80211_sta *sta, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + + kfree(mi->sample_table); + kfree(mi->r); + kfree(mi); +} + +static void +minstrel_clear(void *priv) +{ +} + +static void * +minstrel_alloc(struct ieee80211_hw *hw, struct dentry *debugfsdir) +{ + struct minstrel_priv *mp; + + mp = kzalloc(sizeof(struct minstrel_priv), GFP_ATOMIC); + if (!mp) + return NULL; + + /* contention window settings + * Just an approximation. Using the per-queue values would complicate + * the calculations and is probably unnecessary */ + mp->cw_min = 15; + mp->cw_max = 1023; + + /* number of packets (in %) to use for sampling other rates + * sample less often for non-mrr packets, because the overhead + * is much higher than with mrr */ + mp->lookaround_rate = 5; + mp->lookaround_rate_mrr = 10; + + /* moving average weight for EWMA */ + mp->ewma_level = 75; + + /* maximum time that the hw is allowed to stay in one MRR segment */ + mp->segment_size = 6000; + + if (hw->max_altrate_tries > 0) + mp->max_retry = hw->max_altrate_tries; + else + /* safe default, does not necessarily have to match hw properties */ + mp->max_retry = 7; + + if (hw->max_altrates >= 3) + mp->has_mrr = true; + + mp->hw = hw; + mp->update_interval = 100; + + return mp; +} + +static void +minstrel_free(void *priv) +{ + kfree(priv); +} + +static struct rate_control_ops mac80211_minstrel = { + .name = "minstrel", + .tx_status = minstrel_tx_status, + .get_rate = minstrel_get_rate, + .rate_init = minstrel_rate_init, + .clear = minstrel_clear, + .alloc = minstrel_alloc, + .free = minstrel_free, + .alloc_sta = minstrel_alloc_sta, + .free_sta = minstrel_free_sta, +#ifdef CONFIG_MAC80211_DEBUGFS + .add_sta_debugfs = minstrel_add_sta_debugfs, + .remove_sta_debugfs = minstrel_remove_sta_debugfs, +#endif +}; + +int __init +rc80211_minstrel_init(void) +{ + return ieee80211_rate_control_register(&mac80211_minstrel); +} + +void +rc80211_minstrel_exit(void) +{ + ieee80211_rate_control_unregister(&mac80211_minstrel); +} + diff --git a/net/mac80211/rc80211_minstrel.h b/net/mac80211/rc80211_minstrel.h new file mode 100644 index 000000000000..9a90a6aee043 --- /dev/null +++ b/net/mac80211/rc80211_minstrel.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __RC_MINSTREL_H +#define __RC_MINSTREL_H + +struct minstrel_rate { + int bitrate; + int rix; + + unsigned int perfect_tx_time; + unsigned int ack_time; + + unsigned int retry_count; + unsigned int retry_count_cts; + unsigned int retry_count_rtscts; + unsigned int adjusted_retry_count; + + u32 success; + u32 attempts; + u32 last_attempts; + u32 last_success; + + /* parts per thousand */ + u32 cur_prob; + u32 probability; + + /* per-rate throughput */ + u32 cur_tp; + u32 throughput; + + u64 succ_hist; + u64 att_hist; +}; + +struct minstrel_sta_info { + unsigned long stats_update; + unsigned int sp_ack_dur; + unsigned int rate_avg; + + unsigned int lowest_rix; + + unsigned int max_tp_rate; + unsigned int max_tp_rate2; + unsigned int max_prob_rate; + unsigned int packet_count; + unsigned int sample_count; + int sample_deferred; + + unsigned int sample_idx; + unsigned int sample_column; + + int n_rates; + struct minstrel_rate *r; + + /* sampling table */ + u8 *sample_table; + +#ifdef CONFIG_MAC80211_DEBUGFS + struct dentry *dbg_stats; +#endif +}; + +struct minstrel_priv { + struct ieee80211_hw *hw; + bool has_mrr; + unsigned int cw_min; + unsigned int cw_max; + unsigned int max_retry; + unsigned int ewma_level; + unsigned int segment_size; + unsigned int update_interval; + unsigned int lookaround_rate; + unsigned int lookaround_rate_mrr; +}; + +void minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir); +void minstrel_remove_sta_debugfs(void *priv, void *priv_sta); + +#endif diff --git a/net/mac80211/rc80211_minstrel_debugfs.c b/net/mac80211/rc80211_minstrel_debugfs.c new file mode 100644 index 000000000000..98f480708050 --- /dev/null +++ b/net/mac80211/rc80211_minstrel_debugfs.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2008 Felix Fietkau <nbd@openwrt.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on minstrel.c: + * Copyright (C) 2005-2007 Derek Smithies <derek@indranet.co.nz> + * Sponsored by Indranet Technologies Ltd + * + * Based on sample.c: + * Copyright (c) 2005 John Bicket + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * without modification. + * 2. Redistributions in binary form must reproduce at minimum a disclaimer + * similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any + * redistribution must be conditioned upon including a substantially + * similar Disclaimer requirement for further binary redistribution. + * 3. Neither the names of the above-listed copyright holders nor the names + * of any contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * NO WARRANTY + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY + * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER + * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGES. + */ +#include <linux/netdevice.h> +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/debugfs.h> +#include <linux/ieee80211.h> +#include <net/mac80211.h> +#include "rc80211_minstrel.h" + +struct minstrel_stats_info { + struct minstrel_sta_info *mi; + char buf[4096]; + size_t len; +}; + +static int +minstrel_stats_open(struct inode *inode, struct file *file) +{ + struct minstrel_sta_info *mi = inode->i_private; + struct minstrel_stats_info *ms; + unsigned int i, tp, prob, eprob; + char *p; + + ms = kmalloc(sizeof(*ms), GFP_KERNEL); + if (!ms) + return -ENOMEM; + + file->private_data = ms; + p = ms->buf; + p += sprintf(p, "rate throughput ewma prob this prob " + "this succ/attempt success attempts\n"); + for (i = 0; i < mi->n_rates; i++) { + struct minstrel_rate *mr = &mi->r[i]; + + *(p++) = (i == mi->max_tp_rate) ? 'T' : ' '; + *(p++) = (i == mi->max_tp_rate2) ? 't' : ' '; + *(p++) = (i == mi->max_prob_rate) ? 'P' : ' '; + p += sprintf(p, "%3u%s", mr->bitrate / 2, + (mr->bitrate & 1 ? ".5" : " ")); + + tp = ((mr->cur_tp * 96) / 18000) >> 10; + prob = mr->cur_prob / 18; + eprob = mr->probability / 18; + + p += sprintf(p, " %6u.%1u %6u.%1u %6u.%1u " + "%3u(%3u) %8llu %8llu\n", + tp / 10, tp % 10, + eprob / 10, eprob % 10, + prob / 10, prob % 10, + mr->last_success, + mr->last_attempts, + (unsigned long long)mr->succ_hist, + (unsigned long long)mr->att_hist); + } + p += sprintf(p, "\nTotal packet count:: ideal %d " + "lookaround %d\n\n", + mi->packet_count - mi->sample_count, + mi->sample_count); + ms->len = p - ms->buf; + + return 0; +} + +static ssize_t +minstrel_stats_read(struct file *file, char __user *buf, size_t len, loff_t *o) +{ + struct minstrel_stats_info *ms; + char *src; + + ms = file->private_data; + src = ms->buf; + + len = min(len, ms->len); + if (len <= *o) + return 0; + + src += *o; + len -= *o; + *o += len; + + if (copy_to_user(buf, src, len)) + return -EFAULT; + + return len; +} + +static int +minstrel_stats_release(struct inode *inode, struct file *file) +{ + struct minstrel_stats_info *ms = file->private_data; + + kfree(ms); + + return 0; +} + +static struct file_operations minstrel_stat_fops = { + .owner = THIS_MODULE, + .open = minstrel_stats_open, + .read = minstrel_stats_read, + .release = minstrel_stats_release, +}; + +void +minstrel_add_sta_debugfs(void *priv, void *priv_sta, struct dentry *dir) +{ + struct minstrel_sta_info *mi = priv_sta; + + mi->dbg_stats = debugfs_create_file("rc_stats", S_IRUGO, dir, mi, + &minstrel_stat_fops); +} + +void +minstrel_remove_sta_debugfs(void *priv, void *priv_sta) +{ + struct minstrel_sta_info *mi = priv_sta; + + debugfs_remove(mi->dbg_stats); +} diff --git a/net/mac80211/rc80211_pid.h b/net/mac80211/rc80211_pid.h index 0a9135b974b5..01d64d53f3b9 100644 --- a/net/mac80211/rc80211_pid.h +++ b/net/mac80211/rc80211_pid.h @@ -124,7 +124,6 @@ struct rc_pid_events_file_info { * struct rc_pid_debugfs_entries - tunable parameters * * Algorithm parameters, tunable via debugfs. - * @dir: the debugfs directory for a specific phy * @target: target percentage for failed frames * @sampling_period: error sampling interval in milliseconds * @coeff_p: absolute value of the proportional coefficient @@ -143,7 +142,6 @@ struct rc_pid_events_file_info { * ordering of rates) */ struct rc_pid_debugfs_entries { - struct dentry *dir; struct dentry *target; struct dentry *sampling_period; struct dentry *coeff_p; @@ -180,6 +178,8 @@ struct rc_pid_sta_info { u32 tx_num_failed; u32 tx_num_xmit; + int txrate_idx; + /* Average failed frames percentage error (i.e. actual vs. target * percentage), scaled by RC_PID_SMOOTHING. This value is computed * using using an exponential weighted average technique: diff --git a/net/mac80211/rc80211_pid_algo.c b/net/mac80211/rc80211_pid_algo.c index a914ba73ccf5..86eb374e3b87 100644 --- a/net/mac80211/rc80211_pid_algo.c +++ b/net/mac80211/rc80211_pid_algo.c @@ -68,17 +68,14 @@ * exhibited a worse failed frames behaviour and we'll choose the highest rate * whose failed frames behaviour is not worse than the one of the original rate * target. While at it, check that the new rate is valid. */ -static void rate_control_pid_adjust_rate(struct ieee80211_local *local, - struct sta_info *sta, int adj, +static void rate_control_pid_adjust_rate(struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo, int adj, struct rc_pid_rateinfo *rinfo) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_supported_band *sband; int cur_sorted, new_sorted, probe, tmp, n_bitrates, band; - int cur = sta->txrate_idx; + int cur = spinfo->txrate_idx; - sdata = sta->sdata; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; band = sband->band; n_bitrates = sband->n_bitrates; @@ -111,7 +108,7 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local, /* Fit the rate found to the nearest supported rate. */ do { if (rate_supported(sta, band, rinfo[tmp].index)) { - sta->txrate_idx = rinfo[tmp].index; + spinfo->txrate_idx = rinfo[tmp].index; break; } if (adj < 0) @@ -121,9 +118,9 @@ static void rate_control_pid_adjust_rate(struct ieee80211_local *local, } while (tmp < n_bitrates && tmp >= 0); #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_rate_change( - &((struct rc_pid_sta_info *)sta->rate_ctrl_priv)->events, - sta->txrate_idx, sband->bitrates[sta->txrate_idx].bitrate); + rate_control_pid_event_rate_change(&spinfo->events, + spinfo->txrate_idx, + sband->bitrates[spinfo->txrate_idx].bitrate); #endif } @@ -145,15 +142,11 @@ static void rate_control_pid_normalize(struct rc_pid_info *pinfo, int l) } static void rate_control_pid_sample(struct rc_pid_info *pinfo, - struct ieee80211_local *local, - struct sta_info *sta) + struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, + struct rc_pid_sta_info *spinfo) { -#ifdef CONFIG_MAC80211_MESH - struct ieee80211_sub_if_data *sdata = sta->sdata; -#endif - struct rc_pid_sta_info *spinfo = sta->rate_ctrl_priv; struct rc_pid_rateinfo *rinfo = pinfo->rinfo; - struct ieee80211_supported_band *sband; u32 pf; s32 err_avg; u32 err_prop; @@ -162,9 +155,6 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, int adj, i, j, tmp; unsigned long period; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - spinfo = sta->rate_ctrl_priv; - /* In case nothing happened during the previous control interval, turn * the sharpening factor on. */ period = (HZ * pinfo->sampling_period + 500) / 1000; @@ -180,14 +170,15 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, if (unlikely(spinfo->tx_num_xmit == 0)) pf = spinfo->last_pf; else { + /* XXX: BAD HACK!!! */ + struct sta_info *si = container_of(sta, struct sta_info, sta); + pf = spinfo->tx_num_failed * 100 / spinfo->tx_num_xmit; -#ifdef CONFIG_MAC80211_MESH - if (pf == 100 && - sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) - mesh_plink_broken(sta); -#endif + + if (ieee80211_vif_is_mesh(&si->sdata->vif) && pf == 100) + mesh_plink_broken(si); pf <<= RC_PID_ARITH_SHIFT; - sta->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) + si->fail_avg = ((pf + (spinfo->last_pf << 3)) / 9) >> RC_PID_ARITH_SHIFT; } @@ -195,16 +186,16 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, spinfo->tx_num_failed = 0; /* If we just switched rate, update the rate behaviour info. */ - if (pinfo->oldrate != sta->txrate_idx) { + if (pinfo->oldrate != spinfo->txrate_idx) { i = rinfo[pinfo->oldrate].rev_index; - j = rinfo[sta->txrate_idx].rev_index; + j = rinfo[spinfo->txrate_idx].rev_index; tmp = (pf - spinfo->last_pf); tmp = RC_PID_DO_ARITH_RIGHT_SHIFT(tmp, RC_PID_ARITH_SHIFT); rinfo[j].diff = rinfo[i].diff + tmp; - pinfo->oldrate = sta->txrate_idx; + pinfo->oldrate = spinfo->txrate_idx; } rate_control_pid_normalize(pinfo, sband->n_bitrates); @@ -233,43 +224,26 @@ static void rate_control_pid_sample(struct rc_pid_info *pinfo, /* Change rate. */ if (adj) - rate_control_pid_adjust_rate(local, sta, adj, rinfo); + rate_control_pid_adjust_rate(sband, sta, spinfo, adj, rinfo); } -static void rate_control_pid_tx_status(void *priv, struct net_device *dev, +static void rate_control_pid_tx_status(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; struct rc_pid_info *pinfo = priv; - struct sta_info *sta; - struct rc_pid_sta_info *spinfo; + struct rc_pid_sta_info *spinfo = priv_sta; unsigned long period; - struct ieee80211_supported_band *sband; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - - if (!sta) - goto unlock; - - /* Don't update the state if we're not controlling the rate. */ - sdata = sta->sdata; - if (sdata->force_unicast_rateidx > -1) { - sta->txrate_idx = sdata->max_ratectrl_rateidx; - goto unlock; - } + if (!spinfo) + return; /* Ignore all frames that were sent with a different rate than the rate * we currently advise mac80211 to use. */ - if (info->tx_rate_idx != sta->txrate_idx) - goto unlock; + if (info->tx_rate_idx != spinfo->txrate_idx) + return; - spinfo = sta->rate_ctrl_priv; spinfo->tx_num_xmit++; #ifdef CONFIG_MAC80211_DEBUGFS @@ -287,93 +261,68 @@ static void rate_control_pid_tx_status(void *priv, struct net_device *dev, spinfo->tx_num_xmit++; } - if (info->status.excessive_retries) { - sta->tx_retry_failed++; - sta->tx_num_consecutive_failures++; - sta->tx_num_mpdu_fail++; - } else { - sta->tx_num_consecutive_failures = 0; - sta->tx_num_mpdu_ok++; - } - sta->tx_retry_count += info->status.retry_count; - sta->tx_num_mpdu_fail += info->status.retry_count; - /* Update PID controller state. */ period = (HZ * pinfo->sampling_period + 500) / 1000; if (!period) period = 1; if (time_after(jiffies, spinfo->last_sample + period)) - rate_control_pid_sample(pinfo, local, sta); - - unlock: - rcu_read_unlock(); + rate_control_pid_sample(pinfo, sband, sta, spinfo); } -static void rate_control_pid_get_rate(void *priv, struct net_device *dev, - struct ieee80211_supported_band *sband, - struct sk_buff *skb, - struct rate_selection *sel) +static void +rate_control_pid_get_rate(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta, + struct sk_buff *skb, + struct rate_selection *sel) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; + struct rc_pid_sta_info *spinfo = priv_sta; int rateidx; u16 fc; - rcu_read_lock(); - - sta = sta_info_get(local, hdr->addr1); - /* Send management frames and broadcast/multicast data using lowest * rate. */ fc = le16_to_cpu(hdr->frame_control); - if ((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || - is_multicast_ether_addr(hdr->addr1) || !sta) { - sel->rate_idx = rate_lowest_index(local, sband, sta); - rcu_read_unlock(); + if (!sta || !spinfo || + (fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA || + is_multicast_ether_addr(hdr->addr1)) { + sel->rate_idx = rate_lowest_index(sband, sta); return; } - /* If a forced rate is in effect, select it. */ - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->force_unicast_rateidx > -1) - sta->txrate_idx = sdata->force_unicast_rateidx; - - rateidx = sta->txrate_idx; + rateidx = spinfo->txrate_idx; if (rateidx >= sband->n_bitrates) rateidx = sband->n_bitrates - 1; - sta->last_txrate_idx = rateidx; - - rcu_read_unlock(); - sel->rate_idx = rateidx; #ifdef CONFIG_MAC80211_DEBUGFS - rate_control_pid_event_tx_rate( - &((struct rc_pid_sta_info *) sta->rate_ctrl_priv)->events, + rate_control_pid_event_tx_rate(&spinfo->events, rateidx, sband->bitrates[rateidx].bitrate); #endif } -static void rate_control_pid_rate_init(void *priv, void *priv_sta, - struct ieee80211_local *local, - struct sta_info *sta) +static void +rate_control_pid_rate_init(void *priv, struct ieee80211_supported_band *sband, + struct ieee80211_sta *sta, void *priv_sta) { + struct rc_pid_sta_info *spinfo = priv_sta; + struct sta_info *si; + /* TODO: This routine should consider using RSSI from previous packets * as we need to have IEEE 802.1X auth succeed immediately after assoc.. * Until that method is implemented, we will use the lowest supported * rate as a workaround. */ - struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - sta->txrate_idx = rate_lowest_index(local, sband, sta); - sta->fail_avg = 0; + spinfo->txrate_idx = rate_lowest_index(sband, sta); + /* HACK */ + si = container_of(sta, struct sta_info, sta); + si->fail_avg = 0; } -static void *rate_control_pid_alloc(struct ieee80211_local *local) +static void *rate_control_pid_alloc(struct ieee80211_hw *hw, + struct dentry *debugfsdir) { struct rc_pid_info *pinfo; struct rc_pid_rateinfo *rinfo; @@ -384,7 +333,7 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) struct rc_pid_debugfs_entries *de; #endif - sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + sband = hw->wiphy->bands[hw->conf.channel->band]; pinfo = kmalloc(sizeof(*pinfo), GFP_ATOMIC); if (!pinfo) @@ -439,30 +388,28 @@ static void *rate_control_pid_alloc(struct ieee80211_local *local) #ifdef CONFIG_MAC80211_DEBUGFS de = &pinfo->dentries; - de->dir = debugfs_create_dir("rc80211_pid", - local->hw.wiphy->debugfsdir); de->target = debugfs_create_u32("target_pf", S_IRUSR | S_IWUSR, - de->dir, &pinfo->target); + debugfsdir, &pinfo->target); de->sampling_period = debugfs_create_u32("sampling_period", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sampling_period); de->coeff_p = debugfs_create_u32("coeff_p", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_p); + debugfsdir, &pinfo->coeff_p); de->coeff_i = debugfs_create_u32("coeff_i", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_i); + debugfsdir, &pinfo->coeff_i); de->coeff_d = debugfs_create_u32("coeff_d", S_IRUSR | S_IWUSR, - de->dir, &pinfo->coeff_d); + debugfsdir, &pinfo->coeff_d); de->smoothing_shift = debugfs_create_u32("smoothing_shift", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->smoothing_shift); de->sharpen_factor = debugfs_create_u32("sharpen_factor", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_factor); de->sharpen_duration = debugfs_create_u32("sharpen_duration", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->sharpen_duration); de->norm_offset = debugfs_create_u32("norm_offset", - S_IRUSR | S_IWUSR, de->dir, + S_IRUSR | S_IWUSR, debugfsdir, &pinfo->norm_offset); #endif @@ -484,7 +431,6 @@ static void rate_control_pid_free(void *priv) debugfs_remove(de->coeff_p); debugfs_remove(de->sampling_period); debugfs_remove(de->target); - debugfs_remove(de->dir); #endif kfree(pinfo->rinfo); @@ -495,7 +441,8 @@ static void rate_control_pid_clear(void *priv) { } -static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) +static void *rate_control_pid_alloc_sta(void *priv, struct ieee80211_sta *sta, + gfp_t gfp) { struct rc_pid_sta_info *spinfo; @@ -513,10 +460,10 @@ static void *rate_control_pid_alloc_sta(void *priv, gfp_t gfp) return spinfo; } -static void rate_control_pid_free_sta(void *priv, void *priv_sta) +static void rate_control_pid_free_sta(void *priv, struct ieee80211_sta *sta, + void *priv_sta) { - struct rc_pid_sta_info *spinfo = priv_sta; - kfree(spinfo); + kfree(priv_sta); } static struct rate_control_ops mac80211_rcpid = { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 6db854505193..cf6b121e1bbf 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -143,6 +143,8 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, /* IEEE80211_RADIOTAP_FLAGS */ if (local->hw.flags & IEEE80211_HW_RX_INCLUDES_FCS) *pos |= IEEE80211_RADIOTAP_F_FCS; + if (status->flag & RX_FLAG_SHORTPRE) + *pos |= IEEE80211_RADIOTAP_F_SHORTPRE; pos++; /* IEEE80211_RADIOTAP_RATE */ @@ -155,8 +157,11 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, if (status->band == IEEE80211_BAND_5GHZ) *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | IEEE80211_CHAN_5GHZ); + else if (rate->flags & IEEE80211_RATE_ERP_G) + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_OFDM | + IEEE80211_CHAN_2GHZ); else - *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_DYN | + *(__le16 *)pos = cpu_to_le16(IEEE80211_CHAN_CCK | IEEE80211_CHAN_2GHZ); pos += 2; @@ -290,7 +295,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) @@ -398,12 +403,12 @@ ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct sk_buff *skb = rx->skb; - if (unlikely(local->sta_hw_scanning)) - return ieee80211_sta_rx_scan(rx->dev, skb, rx->status); + if (unlikely(local->hw_scanning)) + return ieee80211_scan_rx(rx->sdata, skb, rx->status); - if (unlikely(local->sta_sw_scanning)) { + if (unlikely(local->sw_scanning)) { /* drop all the other packets during a software scan anyway */ - if (ieee80211_sta_rx_scan(rx->dev, skb, rx->status) + if (ieee80211_scan_rx(rx->sdata, skb, rx->status) != RX_QUEUED) dev_kfree_skb(skb); return RX_QUEUED; @@ -461,7 +466,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) if (ieee80211_is_data(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && - mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->dev)) + mesh_rmc_check(hdr->addr4, msh_h_get(hdr, hdrlen), rx->sdata)) return RX_DROP_MONITOR; #undef msh_h_get @@ -496,8 +501,8 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) /* Drop disallowed frame classes based on STA auth/assoc state; * IEEE 802.11, Chap 5.5. * - * 80211.o does filtering only based on association state, i.e., it - * drops Class 3 frames from not associated stations. hostapd sends + * mac80211 filters only based on association state, i.e. it drops + * Class 3 frames from not associated stations. hostapd sends * deauth/disassoc frames when needed. In addition, hostapd is * responsible for filtering on both auth and assoc states. */ @@ -507,7 +512,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && - rx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && + rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && (!rx->sta || !test_sta_flags(rx->sta, WLAN_STA_ASSOC)))) { if ((!ieee80211_has_fromds(hdr->frame_control) && !ieee80211_has_tods(hdr->frame_control) && @@ -645,32 +650,28 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return result; } -static void ap_sta_ps_start(struct net_device *dev, struct sta_info *sta) +static void ap_sta_ps_start(struct sta_info *sta) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = sta->sdata; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - atomic_inc(&sdata->bss->num_sta_ps); set_and_clear_sta_flags(sta, WLAN_STA_PS, WLAN_STA_PSPOLL); #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d enters power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } -static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) +static int ap_sta_ps_end(struct sta_info *sta) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; struct sk_buff *skb; int sent = 0; - struct ieee80211_sub_if_data *sdata; struct ieee80211_tx_info *info; DECLARE_MAC_BUF(mac); - sdata = sta->sdata; - atomic_dec(&sdata->bss->num_sta_ps); clear_sta_flags(sta, WLAN_STA_PS | WLAN_STA_PSPOLL); @@ -680,7 +681,7 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d exits power save mode\n", - dev->name, print_mac(mac, sta->addr), sta->aid); + sdata->dev->name, print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ /* Send all buffered frames to the station */ @@ -696,8 +697,8 @@ static int ap_sta_ps_end(struct net_device *dev, struct sta_info *sta) sent++; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "%s: STA %s aid %d send PS frame " - "since STA not sleeping anymore\n", dev->name, - print_mac(mac, sta->addr), sta->aid); + "since STA not sleeping anymore\n", sdata->dev->name, + print_mac(mac, sta->sta.addr), sta->sta.aid); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ info->flags |= IEEE80211_TX_CTL_REQUEUE; dev_queue_xmit(skb); @@ -710,7 +711,6 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; - struct net_device *dev = rx->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; if (!sta) @@ -719,14 +719,14 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) /* Update last_rx only for IBSS packets which are for the current * BSSID to avoid keeping the current IBSS network alive in cases where * other STAs are using different BSSID. */ - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (rx->sdata->vif.type == NL80211_IFTYPE_ADHOC) { u8 *bssid = ieee80211_get_bssid(hdr, rx->skb->len, - IEEE80211_IF_TYPE_IBSS); + NL80211_IFTYPE_ADHOC); if (compare_ether_addr(bssid, rx->sdata->u.sta.bssid) == 0) sta->last_rx = jiffies; } else if (!is_multicast_ether_addr(hdr->addr1) || - rx->sdata->vif.type == IEEE80211_IF_TYPE_STA) { + rx->sdata->vif.type == NL80211_IFTYPE_STATION) { /* Update last_rx only for unicast frames in order to prevent * the Probe Request frames (the only broadcast frames from a * STA in infrastructure mode) from keeping a connection alive. @@ -746,16 +746,16 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->last_noise = rx->status->noise; if (!ieee80211_has_morefrags(hdr->frame_control) && - (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP || - rx->sdata->vif.type == IEEE80211_IF_TYPE_VLAN)) { + (rx->sdata->vif.type == NL80211_IFTYPE_AP || + rx->sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) { /* Change STA power saving mode only in the end of a frame * exchange sequence */ if (test_sta_flags(sta, WLAN_STA_PS) && !ieee80211_has_pm(hdr->frame_control)) - rx->sent_ps_buffered += ap_sta_ps_end(dev, sta); + rx->sent_ps_buffered += ap_sta_ps_end(sta); else if (!test_sta_flags(sta, WLAN_STA_PS) && ieee80211_has_pm(hdr->frame_control)) - ap_sta_ps_start(dev, sta); + ap_sta_ps_start(sta); } /* Drop data::nullfunc frames silently, since they are used only to @@ -816,7 +816,7 @@ ieee80211_reassemble_add(struct ieee80211_sub_if_data *sdata, static inline struct ieee80211_fragment_entry * ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, - u16 fc, unsigned int frag, unsigned int seq, + unsigned int frag, unsigned int seq, int rx_queue, struct ieee80211_hdr *hdr) { struct ieee80211_fragment_entry *entry; @@ -825,7 +825,6 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, idx = sdata->fragment_next; for (i = 0; i < IEEE80211_FRAGMENT_MAX; i++) { struct ieee80211_hdr *f_hdr; - u16 f_fc; idx--; if (idx < 0) @@ -837,10 +836,13 @@ ieee80211_reassemble_find(struct ieee80211_sub_if_data *sdata, entry->last_frag + 1 != frag) continue; - f_hdr = (struct ieee80211_hdr *) entry->skb_list.next->data; - f_fc = le16_to_cpu(f_hdr->frame_control); + f_hdr = (struct ieee80211_hdr *)entry->skb_list.next->data; - if ((fc & IEEE80211_FCTL_FTYPE) != (f_fc & IEEE80211_FCTL_FTYPE) || + /* + * Check ftype and addresses are equal, else check next fragment + */ + if (((hdr->frame_control ^ f_hdr->frame_control) & + cpu_to_le16(IEEE80211_FCTL_FTYPE)) || compare_ether_addr(hdr->addr1, f_hdr->addr1) != 0 || compare_ether_addr(hdr->addr2, f_hdr->addr2) != 0) continue; @@ -860,16 +862,18 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr; u16 sc; + __le16 fc; unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; DECLARE_MAC_BUF(mac); - hdr = (struct ieee80211_hdr *) rx->skb->data; + hdr = (struct ieee80211_hdr *)rx->skb->data; + fc = hdr->frame_control; sc = le16_to_cpu(hdr->seq_ctrl); frag = sc & IEEE80211_SCTL_FRAG; - if (likely((!(rx->fc & IEEE80211_FCTL_MOREFRAGS) && frag == 0) || + if (likely((!ieee80211_has_morefrags(fc) && frag == 0) || (rx->skb)->len < 24 || is_multicast_ether_addr(hdr->addr1))) { /* not fragmented */ @@ -884,7 +888,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->queue, &(rx->skb)); if (rx->key && rx->key->conf.alg == ALG_CCMP && - (rx->fc & IEEE80211_FCTL_PROTECTED)) { + ieee80211_has_protected(fc)) { /* Store CCMP PN so that we can verify that the next * fragment has a sequential PN value. */ entry->ccmp = 1; @@ -898,8 +902,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is a fragment for a frame that should already be pending in * fragment cache. Add this fragment to the end of the pending entry. */ - entry = ieee80211_reassemble_find(rx->sdata, rx->fc, frag, seq, - rx->queue, hdr); + entry = ieee80211_reassemble_find(rx->sdata, frag, seq, rx->queue, hdr); if (!entry) { I802_DEBUG_INC(rx->local->rx_handlers_drop_defrag); return RX_DROP_MONITOR; @@ -924,11 +927,11 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) memcpy(entry->last_pn, pn, CCMP_PN_LEN); } - skb_pull(rx->skb, ieee80211_get_hdrlen(rx->fc)); + skb_pull(rx->skb, ieee80211_hdrlen(fc)); __skb_queue_tail(&entry->skb_list, rx->skb); entry->last_frag = frag; entry->extra_len += rx->skb->len; - if (rx->fc & IEEE80211_FCTL_MOREFRAGS) { + if (ieee80211_has_morefrags(fc)) { rx->skb = NULL; return RX_QUEUED; } @@ -968,15 +971,14 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) struct sk_buff *skb; int no_pending_pkts; DECLARE_MAC_BUF(mac); + __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; - if (likely(!rx->sta || - (rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_CTL || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PSPOLL || + if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || !(rx->flags & IEEE80211_RX_RA_MATCH))) return RX_CONTINUE; - if ((sdata->vif.type != IEEE80211_IF_TYPE_AP) && - (sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) + if ((sdata->vif.type != NL80211_IFTYPE_AP) && + (sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return RX_DROP_UNUSABLE; skb = skb_dequeue(&rx->sta->tx_filtered); @@ -1000,7 +1002,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS Poll (entries after %d)\n", - print_mac(mac, rx->sta->addr), rx->sta->aid, + print_mac(mac, rx->sta->sta.addr), rx->sta->sta.aid, skb_queue_len(&rx->sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ @@ -1025,7 +1027,7 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) */ printk(KERN_DEBUG "%s: STA %s sent PS Poll even " "though there are no buffered frames for it\n", - rx->dev->name, print_mac(mac, rx->sta->addr)); + rx->dev->name, print_mac(mac, rx->sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ } @@ -1050,7 +1052,6 @@ ieee80211_rx_h_remove_qos_control(struct ieee80211_rx_data *rx) ieee80211_hdrlen(hdr->frame_control) - IEEE80211_QOS_CTL_LEN); hdr = (struct ieee80211_hdr *)skb_pull(rx->skb, IEEE80211_QOS_CTL_LEN); /* change frame type to non QOS */ - rx->fc &= ~IEEE80211_STYPE_QOS_DATA; hdr->frame_control &= ~cpu_to_le16(IEEE80211_STYPE_QOS_DATA); return RX_CONTINUE; @@ -1067,7 +1068,7 @@ ieee80211_802_1x_port_control(struct ieee80211_rx_data *rx) } static int -ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx) +ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx, __le16 fc) { /* * Pass through unencrypted frames if the hardware has @@ -1077,9 +1078,8 @@ ieee80211_drop_unencrypted(struct ieee80211_rx_data *rx) return 0; /* Drop unencrypted frames if key is set. */ - if (unlikely(!(rx->fc & IEEE80211_FCTL_PROTECTED) && - (rx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_NULLFUNC && + if (unlikely(!ieee80211_has_protected(fc) && + !ieee80211_is_nullfunc(fc) && (rx->key || rx->sdata->drop_unencrypted))) return -EACCES; @@ -1091,7 +1091,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) rx->skb->data; - u16 fc, hdrlen, ethertype; + u16 hdrlen, ethertype; u8 *payload; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN] __aligned(2); @@ -1102,16 +1102,10 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) DECLARE_MAC_BUF(mac3); DECLARE_MAC_BUF(mac4); - fc = rx->fc; - - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return -1; - hdrlen = ieee80211_get_hdrlen(fc); - - if (ieee80211_vif_is_mesh(&sdata->vif)) - hdrlen += ieee80211_get_mesh_hdrlen( - (struct ieee80211s_hdr *) (skb->data + hdrlen)); + hdrlen = ieee80211_hdrlen(hdr->frame_control); /* convert IEEE 802.11 header + possible LLC headers into Ethernet * header @@ -1122,42 +1116,38 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) * 1 0 BSSID SA DA n/a * 1 1 RA TA DA SA */ - - switch (fc & (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { - case IEEE80211_FCTL_TODS: - /* BSSID SA DA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_AP && - sdata->vif.type != IEEE80211_IF_TYPE_VLAN)) + memcpy(dst, ieee80211_get_DA(hdr), ETH_ALEN); + memcpy(src, ieee80211_get_SA(hdr), ETH_ALEN); + + switch (hdr->frame_control & + cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) { + case __constant_cpu_to_le16(IEEE80211_FCTL_TODS): + if (unlikely(sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) return -1; break; - case (IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): - /* RA TA DA SA */ - memcpy(dst, hdr->addr3, ETH_ALEN); - memcpy(src, hdr->addr4, ETH_ALEN); - - if (unlikely(sdata->vif.type != IEEE80211_IF_TYPE_WDS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT)) + case __constant_cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS): + if (unlikely(sdata->vif.type != NL80211_IFTYPE_WDS && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT)) return -1; + if (ieee80211_vif_is_mesh(&sdata->vif)) { + struct ieee80211s_hdr *meshdr = (struct ieee80211s_hdr *) + (skb->data + hdrlen); + hdrlen += ieee80211_get_mesh_hdrlen(meshdr); + if (meshdr->flags & MESH_FLAGS_AE_A5_A6) { + memcpy(dst, meshdr->eaddr1, ETH_ALEN); + memcpy(src, meshdr->eaddr2, ETH_ALEN); + } + } break; - case IEEE80211_FCTL_FROMDS: - /* DA BSSID SA */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr3, ETH_ALEN); - - if (sdata->vif.type != IEEE80211_IF_TYPE_STA || + case __constant_cpu_to_le16(IEEE80211_FCTL_FROMDS): + if (sdata->vif.type != NL80211_IFTYPE_STATION || (is_multicast_ether_addr(dst) && !compare_ether_addr(src, dev->dev_addr))) return -1; break; - case 0: - /* DA SA BSSID */ - memcpy(dst, hdr->addr1, ETH_ALEN); - memcpy(src, hdr->addr2, ETH_ALEN); - - if (sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + case __constant_cpu_to_le16(0): + if (sdata->vif.type != NL80211_IFTYPE_ADHOC) return -1; break; } @@ -1193,7 +1183,7 @@ ieee80211_data_to_8023(struct ieee80211_rx_data *rx) /* * requires that rx->skb is a frame with ethernet header */ -static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) +static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx, __le16 fc) { static const u8 pae_group_addr[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xC2, 0x00, 0x00, 0x03 }; @@ -1209,7 +1199,7 @@ static bool ieee80211_frame_allowed(struct ieee80211_rx_data *rx) return true; if (ieee80211_802_1x_port_control(rx) || - ieee80211_drop_unencrypted(rx)) + ieee80211_drop_unencrypted(rx, fc)) return false; return true; @@ -1231,8 +1221,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) skb = rx->skb; xmit_skb = NULL; - if (local->bridge_packets && (sdata->vif.type == IEEE80211_IF_TYPE_AP || - sdata->vif.type == IEEE80211_IF_TYPE_VLAN) && + if ((sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && + !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && (rx->flags & IEEE80211_RX_RA_MATCH)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* @@ -1279,20 +1270,21 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; struct ieee80211_local *local = rx->local; - u16 fc, ethertype; + u16 ethertype; u8 *payload; struct sk_buff *skb = rx->skb, *frame = NULL; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; + __le16 fc = hdr->frame_control; const struct ethhdr *eth; int remaining, err; u8 dst[ETH_ALEN]; u8 src[ETH_ALEN]; DECLARE_MAC_BUF(mac); - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(fc))) return RX_DROP_MONITOR; if (!(rx->flags & IEEE80211_RX_AMSDU)) @@ -1374,7 +1366,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) memcpy(skb_push(frame, ETH_ALEN), dst, ETH_ALEN); } - if (!ieee80211_frame_allowed(rx)) { + if (!ieee80211_frame_allowed(rx, fc)) { if (skb == frame) /* last frame */ return RX_DROP_UNUSABLE; dev_kfree_skb(frame); @@ -1387,7 +1379,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) return RX_QUEUED; } -static ieee80211_rx_result debug_noinline +#ifdef CONFIG_MAC80211_MESH +static ieee80211_rx_result ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr; @@ -1406,6 +1399,25 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) /* illegal frame */ return RX_DROP_MONITOR; + if (mesh_hdr->flags & MESH_FLAGS_AE_A5_A6){ + struct ieee80211_sub_if_data *sdata; + struct mesh_path *mppath; + + sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + rcu_read_lock(); + mppath = mpp_path_lookup(mesh_hdr->eaddr2, sdata); + if (!mppath) { + mpp_path_add(mesh_hdr->eaddr2, hdr->addr4, sdata); + } else { + spin_lock_bh(&mppath->state_lock); + mppath->exp_time = jiffies; + if (compare_ether_addr(mppath->mpp, hdr->addr4) != 0) + memcpy(mppath->mpp, hdr->addr4, ETH_ALEN); + spin_unlock_bh(&mppath->state_lock); + } + rcu_read_unlock(); + } + if (compare_ether_addr(rx->dev->dev_addr, hdr->addr3) == 0) return RX_CONTINUE; @@ -1413,7 +1425,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) if (rx->flags & IEEE80211_RX_RA_MATCH) { if (!mesh_hdr->ttl) - IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.sta, + IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, dropped_frames_ttl); else { struct ieee80211_hdr *fwd_hdr; @@ -1442,27 +1454,27 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) else return RX_DROP_MONITOR; } - +#endif static ieee80211_rx_result debug_noinline ieee80211_rx_h_data(struct ieee80211_rx_data *rx) { struct net_device *dev = rx->dev; - u16 fc; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + __le16 fc = hdr->frame_control; int err; - fc = rx->fc; - if (unlikely((fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA)) + if (unlikely(!ieee80211_is_data(hdr->frame_control))) return RX_CONTINUE; - if (unlikely(!WLAN_FC_DATA_PRESENT(fc))) + if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) return RX_DROP_MONITOR; err = ieee80211_data_to_8023(rx); if (unlikely(err)) return RX_DROP_UNUSABLE; - if (!ieee80211_frame_allowed(rx)) + if (!ieee80211_frame_allowed(rx, fc)) return RX_DROP_MONITOR; rx->skb->dev = dev; @@ -1520,22 +1532,97 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx) } static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + int len = rx->skb->len; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + if (!rx->sta) + return RX_DROP_MONITOR; + + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + return RX_DROP_MONITOR; + + /* all categories we currently handle have action_code */ + if (len < IEEE80211_MIN_ACTION_SIZE + 1) + return RX_DROP_MONITOR; + + /* + * FIXME: revisit this, I'm sure we should handle most + * of these frames in other modes as well! + */ + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) + return RX_CONTINUE; + + switch (mgmt->u.action.category) { + case WLAN_CATEGORY_BACK: + switch (mgmt->u.action.u.addba_req.action_code) { + case WLAN_ACTION_ADDBA_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_req))) + return RX_DROP_MONITOR; + ieee80211_process_addba_request(local, rx->sta, mgmt, len); + break; + case WLAN_ACTION_ADDBA_RESP: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.addba_resp))) + return RX_DROP_MONITOR; + ieee80211_process_addba_resp(local, rx->sta, mgmt, len); + break; + case WLAN_ACTION_DELBA: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.delba))) + return RX_DROP_MONITOR; + ieee80211_process_delba(sdata, rx->sta, mgmt, len); + break; + } + break; + case WLAN_CATEGORY_SPECTRUM_MGMT: + if (local->hw.conf.channel->band != IEEE80211_BAND_5GHZ) + return RX_DROP_MONITOR; + switch (mgmt->u.action.u.measurement.action_code) { + case WLAN_ACTION_SPCT_MSR_REQ: + if (len < (IEEE80211_MIN_ACTION_SIZE + + sizeof(mgmt->u.action.u.measurement))) + return RX_DROP_MONITOR; + ieee80211_process_measurement_req(sdata, mgmt, len); + break; + } + break; + default: + return RX_CONTINUE; + } + + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) { - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; - sdata = IEEE80211_DEV_TO_SUB_IF(rx->dev); - if ((sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS || - sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) && - !(sdata->flags & IEEE80211_SDATA_USERSPACE_MLME)) - ieee80211_sta_rx_mgmt(rx->dev, rx->skb, rx->status); - else + if (ieee80211_vif_is_mesh(&sdata->vif)) + return ieee80211_mesh_rx_mgmt(sdata, rx->skb, rx->status); + + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return RX_DROP_MONITOR; + if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) + return RX_DROP_MONITOR; + + ieee80211_sta_rx_mgmt(sdata, rx->skb, rx->status); return RX_QUEUED; } @@ -1565,7 +1652,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, if (!ieee80211_has_protected(hdr->frame_control)) goto ignore; - if (rx->sdata->vif.type == IEEE80211_IF_TYPE_AP && keyidx) { + if (rx->sdata->vif.type == NL80211_IFTYPE_AP && keyidx) { /* * APs with pairwise keys should never receive Michael MIC * errors for non-zero keyidx because these are reserved for @@ -1579,7 +1666,7 @@ static void ieee80211_rx_michael_mic_report(struct net_device *dev, !ieee80211_is_auth(hdr->frame_control)) goto ignore; - mac80211_ev_michael_mic_failure(rx->dev, keyidx, hdr); + mac80211_ev_michael_mic_failure(rx->sdata, keyidx, hdr); ignore: dev_kfree_skb(rx->skb); rx->skb = NULL; @@ -1635,7 +1722,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx) if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type != IEEE80211_IF_TYPE_MNTR || + if (sdata->vif.type != NL80211_IFTYPE_MONITOR || !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) continue; @@ -1694,10 +1781,13 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_sub_if_data *sdata, /* must be after MMIC verify so header is counted in MPDU mic */ CALL_RXH(ieee80211_rx_h_remove_qos_control) CALL_RXH(ieee80211_rx_h_amsdu) +#ifdef CONFIG_MAC80211_MESH if (ieee80211_vif_is_mesh(&sdata->vif)) CALL_RXH(ieee80211_rx_h_mesh_fwding); +#endif CALL_RXH(ieee80211_rx_h_data) CALL_RXH(ieee80211_rx_h_ctrl) + CALL_RXH(ieee80211_rx_h_action) CALL_RXH(ieee80211_rx_h_mgmt) #undef CALL_RXH @@ -1733,7 +1823,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, int multicast = is_multicast_ether_addr(hdr->addr1); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: if (!bssid) return 0; if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { @@ -1748,14 +1838,10 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; if (ieee80211_is_beacon(hdr->frame_control)) { - if (!rx->sta) - rx->sta = ieee80211_ibss_add_sta(sdata->dev, - rx->skb, bssid, hdr->addr2, - BIT(rx->status->rate_idx)); return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.sta.bssid)) { @@ -1769,11 +1855,11 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, return 0; rx->flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) - rx->sta = ieee80211_ibss_add_sta(sdata->dev, rx->skb, + rx->sta = ieee80211_ibss_add_sta(sdata, rx->skb, bssid, hdr->addr2, BIT(rx->status->rate_idx)); break; - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: if (!multicast && compare_ether_addr(sdata->dev->dev_addr, hdr->addr1) != 0) { @@ -1783,8 +1869,8 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_VLAN: - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_AP: if (!bssid) { if (compare_ether_addr(sdata->dev->dev_addr, hdr->addr1)) @@ -1796,16 +1882,17 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, rx->flags &= ~IEEE80211_RX_RA_MATCH; } break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: if (bssid || !ieee80211_is_data(hdr->frame_control)) return 0; if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: /* take everything */ break; - case IEEE80211_IF_TYPE_INVALID: + case NL80211_IFTYPE_UNSPECIFIED: + case __NL80211_IFTYPE_AFTER_LAST: /* should never get here */ WARN_ON(1); break; @@ -1827,23 +1914,20 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr; struct ieee80211_rx_data rx; - u16 type; int prepares; struct ieee80211_sub_if_data *prev = NULL; struct sk_buff *skb_new; u8 *bssid; - hdr = (struct ieee80211_hdr *) skb->data; + hdr = (struct ieee80211_hdr *)skb->data; memset(&rx, 0, sizeof(rx)); rx.skb = skb; rx.local = local; rx.status = status; rx.rate = rate; - rx.fc = le16_to_cpu(hdr->frame_control); - type = rx.fc & IEEE80211_FCTL_FTYPE; - if (type == IEEE80211_FTYPE_DATA || type == IEEE80211_FTYPE_MGMT) + if (ieee80211_is_data(hdr->frame_control) || ieee80211_is_mgmt(hdr->frame_control)) local->dot11ReceivedFragmentCount++; rx.sta = sta_info_get(local, hdr->addr2); @@ -1857,7 +1941,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, return; } - if (unlikely(local->sta_sw_scanning || local->sta_hw_scanning)) + if (unlikely(local->sw_scanning || local->hw_scanning)) rx.flags |= IEEE80211_RX_IN_SCAN; ieee80211_parse_qos(&rx); @@ -1869,7 +1953,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (!netif_running(sdata->dev)) continue; - if (sdata->vif.type == IEEE80211_IF_TYPE_MNTR) + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); @@ -1904,14 +1988,12 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, prev->dev->name); continue; } - rx.fc = le16_to_cpu(hdr->frame_control); ieee80211_invoke_rx_handlers(prev, &rx, skb_new); prev = sdata; } - if (prev) { - rx.fc = le16_to_cpu(hdr->frame_control); + if (prev) ieee80211_invoke_rx_handlers(prev, &rx, skb); - } else + else dev_kfree_skb(skb); } @@ -2080,7 +2162,7 @@ static u8 ieee80211_rx_reorder_ampdu(struct ieee80211_local *local, /* if this mpdu is fragmented - terminate rx aggregation session */ sc = le16_to_cpu(hdr->seq_ctrl); if (sc & IEEE80211_SCTL_FRAG) { - ieee80211_sta_stop_rx_ba_session(sta->sdata->dev, sta->addr, + ieee80211_sta_stop_rx_ba_session(sta->sdata, sta->sta.addr, tid, 0, WLAN_REASON_QSTA_REQUIRE_SETUP); ret = 1; goto end_reorder; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c new file mode 100644 index 000000000000..416bb41099f3 --- /dev/null +++ b/net/mac80211/scan.c @@ -0,0 +1,938 @@ +/* + * Scanning implementation + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2004, Instant802 Networks, Inc. + * Copyright 2005, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* TODO: + * order BSS list by RSSI(?) ("quality of AP") + * scan result table filtering (by capability (privacy, IBSS/BSS, WPA/RSN IE, + * SSID) + */ + +#include <linux/wireless.h> +#include <linux/if_arp.h> +#include <net/mac80211.h> +#include <net/iw_handler.h> + +#include "ieee80211_i.h" +#include "mesh.h" + +#define IEEE80211_PROBE_DELAY (HZ / 33) +#define IEEE80211_CHANNEL_TIME (HZ / 33) +#define IEEE80211_PASSIVE_CHANNEL_TIME (HZ / 5) + +void ieee80211_rx_bss_list_init(struct ieee80211_local *local) +{ + spin_lock_init(&local->bss_lock); + INIT_LIST_HEAD(&local->bss_list); +} + +void ieee80211_rx_bss_list_deinit(struct ieee80211_local *local) +{ + struct ieee80211_bss *bss, *tmp; + + list_for_each_entry_safe(bss, tmp, &local->bss_list, list) + ieee80211_rx_bss_put(local, bss); +} + +struct ieee80211_bss * +ieee80211_rx_bss_get(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len) +{ + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + bss = local->bss_hash[STA_HASH(bssid)]; + while (bss) { + if (!bss_mesh_cfg(bss) && + !memcmp(bss->bssid, bssid, ETH_ALEN) && + bss->freq == freq && + bss->ssid_len == ssid_len && + (ssid_len == 0 || !memcmp(bss->ssid, ssid, ssid_len))) { + atomic_inc(&bss->users); + break; + } + bss = bss->hnext; + } + spin_unlock_bh(&local->bss_lock); + return bss; +} + +/* Caller must hold local->bss_lock */ +static void __ieee80211_rx_bss_hash_add(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + u8 hash_idx; + + if (bss_mesh_cfg(bss)) + hash_idx = mesh_id_hash(bss_mesh_id(bss), + bss_mesh_id_len(bss)); + else + hash_idx = STA_HASH(bss->bssid); + + bss->hnext = local->bss_hash[hash_idx]; + local->bss_hash[hash_idx] = bss; +} + +/* Caller must hold local->bss_lock */ +static void __ieee80211_rx_bss_hash_del(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + struct ieee80211_bss *b, *prev = NULL; + b = local->bss_hash[STA_HASH(bss->bssid)]; + while (b) { + if (b == bss) { + if (!prev) + local->bss_hash[STA_HASH(bss->bssid)] = + bss->hnext; + else + prev->hnext = bss->hnext; + break; + } + prev = b; + b = b->hnext; + } +} + +struct ieee80211_bss * +ieee80211_rx_bss_add(struct ieee80211_local *local, u8 *bssid, int freq, + u8 *ssid, u8 ssid_len) +{ + struct ieee80211_bss *bss; + + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); + if (!bss) + return NULL; + atomic_set(&bss->users, 2); + memcpy(bss->bssid, bssid, ETH_ALEN); + bss->freq = freq; + if (ssid && ssid_len <= IEEE80211_MAX_SSID_LEN) { + memcpy(bss->ssid, ssid, ssid_len); + bss->ssid_len = ssid_len; + } + + spin_lock_bh(&local->bss_lock); + /* TODO: order by RSSI? */ + list_add_tail(&bss->list, &local->bss_list); + __ieee80211_rx_bss_hash_add(local, bss); + spin_unlock_bh(&local->bss_lock); + return bss; +} + +#ifdef CONFIG_MAC80211_MESH +static struct ieee80211_bss * +ieee80211_rx_mesh_bss_get(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, + u8 *mesh_cfg, int freq) +{ + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + bss = local->bss_hash[mesh_id_hash(mesh_id, mesh_id_len)]; + while (bss) { + if (bss_mesh_cfg(bss) && + !memcmp(bss_mesh_cfg(bss), mesh_cfg, MESH_CFG_CMP_LEN) && + bss->freq == freq && + mesh_id_len == bss->mesh_id_len && + (mesh_id_len == 0 || !memcmp(bss->mesh_id, mesh_id, + mesh_id_len))) { + atomic_inc(&bss->users); + break; + } + bss = bss->hnext; + } + spin_unlock_bh(&local->bss_lock); + return bss; +} + +static struct ieee80211_bss * +ieee80211_rx_mesh_bss_add(struct ieee80211_local *local, u8 *mesh_id, int mesh_id_len, + u8 *mesh_cfg, int mesh_config_len, int freq) +{ + struct ieee80211_bss *bss; + + if (mesh_config_len != MESH_CFG_LEN) + return NULL; + + bss = kzalloc(sizeof(*bss), GFP_ATOMIC); + if (!bss) + return NULL; + + bss->mesh_cfg = kmalloc(MESH_CFG_CMP_LEN, GFP_ATOMIC); + if (!bss->mesh_cfg) { + kfree(bss); + return NULL; + } + + if (mesh_id_len && mesh_id_len <= IEEE80211_MAX_MESH_ID_LEN) { + bss->mesh_id = kmalloc(mesh_id_len, GFP_ATOMIC); + if (!bss->mesh_id) { + kfree(bss->mesh_cfg); + kfree(bss); + return NULL; + } + memcpy(bss->mesh_id, mesh_id, mesh_id_len); + } + + atomic_set(&bss->users, 2); + memcpy(bss->mesh_cfg, mesh_cfg, MESH_CFG_CMP_LEN); + bss->mesh_id_len = mesh_id_len; + bss->freq = freq; + spin_lock_bh(&local->bss_lock); + /* TODO: order by RSSI? */ + list_add_tail(&bss->list, &local->bss_list); + __ieee80211_rx_bss_hash_add(local, bss); + spin_unlock_bh(&local->bss_lock); + return bss; +} +#endif + +static void ieee80211_rx_bss_free(struct ieee80211_bss *bss) +{ + kfree(bss->ies); + kfree(bss_mesh_id(bss)); + kfree(bss_mesh_cfg(bss)); + kfree(bss); +} + +void ieee80211_rx_bss_put(struct ieee80211_local *local, + struct ieee80211_bss *bss) +{ + local_bh_disable(); + if (!atomic_dec_and_lock(&bss->users, &local->bss_lock)) { + local_bh_enable(); + return; + } + + __ieee80211_rx_bss_hash_del(local, bss); + list_del(&bss->list); + spin_unlock_bh(&local->bss_lock); + ieee80211_rx_bss_free(bss); +} + +struct ieee80211_bss * +ieee80211_bss_info_update(struct ieee80211_local *local, + struct ieee80211_rx_status *rx_status, + struct ieee80211_mgmt *mgmt, + size_t len, + struct ieee802_11_elems *elems, + int freq, bool beacon) +{ + struct ieee80211_bss *bss; + int clen; + +#ifdef CONFIG_MAC80211_MESH + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_get(local, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, freq); + else +#endif + bss = ieee80211_rx_bss_get(local, mgmt->bssid, freq, + elems->ssid, elems->ssid_len); + if (!bss) { +#ifdef CONFIG_MAC80211_MESH + if (elems->mesh_config) + bss = ieee80211_rx_mesh_bss_add(local, elems->mesh_id, + elems->mesh_id_len, elems->mesh_config, + elems->mesh_config_len, freq); + else +#endif + bss = ieee80211_rx_bss_add(local, mgmt->bssid, freq, + elems->ssid, elems->ssid_len); + if (!bss) + return NULL; + } else { +#if 0 + /* TODO: order by RSSI? */ + spin_lock_bh(&local->bss_lock); + list_move_tail(&bss->list, &local->bss_list); + spin_unlock_bh(&local->bss_lock); +#endif + } + + /* save the ERP value so that it is available at association time */ + if (elems->erp_info && elems->erp_info_len >= 1) { + bss->erp_value = elems->erp_info[0]; + bss->has_erp_value = 1; + } + + bss->beacon_int = le16_to_cpu(mgmt->u.beacon.beacon_int); + bss->capability = le16_to_cpu(mgmt->u.beacon.capab_info); + + if (elems->tim) { + struct ieee80211_tim_ie *tim_ie = + (struct ieee80211_tim_ie *)elems->tim; + bss->dtim_period = tim_ie->dtim_period; + } + + /* set default value for buggy APs */ + if (!elems->tim || bss->dtim_period == 0) + bss->dtim_period = 1; + + bss->supp_rates_len = 0; + if (elems->supp_rates) { + clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + if (clen > elems->supp_rates_len) + clen = elems->supp_rates_len; + memcpy(&bss->supp_rates[bss->supp_rates_len], elems->supp_rates, + clen); + bss->supp_rates_len += clen; + } + if (elems->ext_supp_rates) { + clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len; + if (clen > elems->ext_supp_rates_len) + clen = elems->ext_supp_rates_len; + memcpy(&bss->supp_rates[bss->supp_rates_len], + elems->ext_supp_rates, clen); + bss->supp_rates_len += clen; + } + + bss->band = rx_status->band; + + bss->timestamp = le64_to_cpu(mgmt->u.beacon.timestamp); + bss->last_update = jiffies; + bss->signal = rx_status->signal; + bss->noise = rx_status->noise; + bss->qual = rx_status->qual; + bss->wmm_used = elems->wmm_param || elems->wmm_info; + + if (!beacon) + bss->last_probe_resp = jiffies; + + /* + * For probe responses, or if we don't have any information yet, + * use the IEs from the beacon. + */ + if (!bss->ies || !beacon) { + if (bss->ies == NULL || bss->ies_len < elems->total_len) { + kfree(bss->ies); + bss->ies = kmalloc(elems->total_len, GFP_ATOMIC); + } + if (bss->ies) { + memcpy(bss->ies, elems->ie_start, elems->total_len); + bss->ies_len = elems->total_len; + } else + bss->ies_len = 0; + } + + return bss; +} + +ieee80211_rx_result +ieee80211_scan_rx(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + struct ieee80211_rx_status *rx_status) +{ + struct ieee80211_mgmt *mgmt; + struct ieee80211_bss *bss; + u8 *elements; + struct ieee80211_channel *channel; + size_t baselen; + int freq; + __le16 fc; + bool presp, beacon = false; + struct ieee802_11_elems elems; + + if (skb->len < 2) + return RX_DROP_UNUSABLE; + + mgmt = (struct ieee80211_mgmt *) skb->data; + fc = mgmt->frame_control; + + if (ieee80211_is_ctl(fc)) + return RX_CONTINUE; + + if (skb->len < 24) + return RX_DROP_MONITOR; + + presp = ieee80211_is_probe_resp(fc); + if (presp) { + /* ignore ProbeResp to foreign address */ + if (memcmp(mgmt->da, sdata->dev->dev_addr, ETH_ALEN)) + return RX_DROP_MONITOR; + + presp = true; + elements = mgmt->u.probe_resp.variable; + baselen = offsetof(struct ieee80211_mgmt, u.probe_resp.variable); + } else { + beacon = ieee80211_is_beacon(fc); + baselen = offsetof(struct ieee80211_mgmt, u.beacon.variable); + elements = mgmt->u.beacon.variable; + } + + if (!presp && !beacon) + return RX_CONTINUE; + + if (baselen > skb->len) + return RX_DROP_MONITOR; + + ieee802_11_parse_elems(elements, skb->len - baselen, &elems); + + if (elems.ds_params && elems.ds_params_len == 1) + freq = ieee80211_channel_to_frequency(elems.ds_params[0]); + else + freq = rx_status->freq; + + channel = ieee80211_get_channel(sdata->local->hw.wiphy, freq); + + if (!channel || channel->flags & IEEE80211_CHAN_DISABLED) + return RX_DROP_MONITOR; + + bss = ieee80211_bss_info_update(sdata->local, rx_status, + mgmt, skb->len, &elems, + freq, beacon); + if (bss) + ieee80211_rx_bss_put(sdata->local, bss); + + dev_kfree_skb(skb); + return RX_QUEUED; +} + +static void ieee80211_send_nullfunc(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + int powersave) +{ + struct sk_buff *skb; + struct ieee80211_hdr *nullfunc; + __le16 fc; + + skb = dev_alloc_skb(local->hw.extra_tx_headroom + 24); + if (!skb) { + printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc " + "frame\n", sdata->dev->name); + return; + } + skb_reserve(skb, local->hw.extra_tx_headroom); + + nullfunc = (struct ieee80211_hdr *) skb_put(skb, 24); + memset(nullfunc, 0, 24); + fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | + IEEE80211_FCTL_TODS); + if (powersave) + fc |= cpu_to_le16(IEEE80211_FCTL_PM); + nullfunc->frame_control = fc; + memcpy(nullfunc->addr1, sdata->u.sta.bssid, ETH_ALEN); + memcpy(nullfunc->addr2, sdata->dev->dev_addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->u.sta.bssid, ETH_ALEN); + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_scan_completed(struct ieee80211_hw *hw) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct ieee80211_sub_if_data *sdata; + union iwreq_data wrqu; + + if (WARN_ON(!local->hw_scanning && !local->sw_scanning)) + return; + + local->last_scan_completed = jiffies; + memset(&wrqu, 0, sizeof(wrqu)); + + /* + * local->scan_sdata could have been NULLed by the interface + * down code in case we were scanning on an interface that is + * being taken down. + */ + sdata = local->scan_sdata; + if (sdata) + wireless_send_event(sdata->dev, SIOCGIWSCAN, &wrqu, NULL); + + if (local->hw_scanning) { + local->hw_scanning = false; + if (ieee80211_hw_config(local)) + printk(KERN_DEBUG "%s: failed to restore operational " + "channel after scan\n", wiphy_name(local->hw.wiphy)); + + goto done; + } + + local->sw_scanning = false; + if (ieee80211_hw_config(local)) + printk(KERN_DEBUG "%s: failed to restore operational " + "channel after scan\n", wiphy_name(local->hw.wiphy)); + + + netif_tx_lock_bh(local->mdev); + netif_addr_lock(local->mdev); + local->filter_flags &= ~FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + + netif_addr_unlock(local->mdev); + netif_tx_unlock_bh(local->mdev); + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + /* Tell AP we're back */ + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + ieee80211_send_nullfunc(local, sdata, 0); + netif_tx_wake_all_queues(sdata->dev); + } + } else + netif_tx_wake_all_queues(sdata->dev); + } + rcu_read_unlock(); + + done: + ieee80211_mlme_notify_scan_completed(local); + ieee80211_mesh_notify_scan_completed(local); +} +EXPORT_SYMBOL(ieee80211_scan_completed); + + +void ieee80211_scan_work(struct work_struct *work) +{ + struct ieee80211_local *local = + container_of(work, struct ieee80211_local, scan_work.work); + struct ieee80211_sub_if_data *sdata = local->scan_sdata; + struct ieee80211_supported_band *sband; + struct ieee80211_channel *chan; + int skip; + unsigned long next_delay = 0; + + /* + * Avoid re-scheduling when the sdata is going away. + */ + if (!netif_running(sdata->dev)) + return; + + switch (local->scan_state) { + case SCAN_SET_CHANNEL: + /* + * Get current scan band. scan_band may be IEEE80211_NUM_BANDS + * after we successfully scanned the last channel of the last + * band (and the last band is supported by the hw) + */ + if (local->scan_band < IEEE80211_NUM_BANDS) + sband = local->hw.wiphy->bands[local->scan_band]; + else + sband = NULL; + + /* + * If we are at an unsupported band and have more bands + * left to scan, advance to the next supported one. + */ + while (!sband && local->scan_band < IEEE80211_NUM_BANDS - 1) { + local->scan_band++; + sband = local->hw.wiphy->bands[local->scan_band]; + local->scan_channel_idx = 0; + } + + /* if no more bands/channels left, complete scan */ + if (!sband || local->scan_channel_idx >= sband->n_channels) { + ieee80211_scan_completed(local_to_hw(local)); + return; + } + skip = 0; + chan = &sband->channels[local->scan_channel_idx]; + + if (chan->flags & IEEE80211_CHAN_DISABLED || + (sdata->vif.type == NL80211_IFTYPE_ADHOC && + chan->flags & IEEE80211_CHAN_NO_IBSS)) + skip = 1; + + if (!skip) { + local->scan_channel = chan; + if (ieee80211_hw_config(local)) { + printk(KERN_DEBUG "%s: failed to set freq to " + "%d MHz for scan\n", wiphy_name(local->hw.wiphy), + chan->center_freq); + skip = 1; + } + } + + /* advance state machine to next channel/band */ + local->scan_channel_idx++; + if (local->scan_channel_idx >= sband->n_channels) { + /* + * scan_band may end up == IEEE80211_NUM_BANDS, but + * we'll catch that case above and complete the scan + * if that is the case. + */ + local->scan_band++; + local->scan_channel_idx = 0; + } + + if (skip) + break; + + next_delay = IEEE80211_PROBE_DELAY + + usecs_to_jiffies(local->hw.channel_change_time); + local->scan_state = SCAN_SEND_PROBE; + break; + case SCAN_SEND_PROBE: + next_delay = IEEE80211_PASSIVE_CHANNEL_TIME; + local->scan_state = SCAN_SET_CHANNEL; + + if (local->scan_channel->flags & IEEE80211_CHAN_PASSIVE_SCAN) + break; + ieee80211_send_probe_req(sdata, NULL, local->scan_ssid, + local->scan_ssid_len); + next_delay = IEEE80211_CHANNEL_TIME; + break; + } + + queue_delayed_work(local->hw.workqueue, &local->scan_work, + next_delay); +} + + +int ieee80211_start_scan(struct ieee80211_sub_if_data *scan_sdata, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = scan_sdata->local; + struct ieee80211_sub_if_data *sdata; + + if (ssid_len > IEEE80211_MAX_SSID_LEN) + return -EINVAL; + + /* MLME-SCAN.request (page 118) page 144 (11.1.3.1) + * BSSType: INFRASTRUCTURE, INDEPENDENT, ANY_BSS + * BSSID: MACAddress + * SSID + * ScanType: ACTIVE, PASSIVE + * ProbeDelay: delay (in microseconds) to be used prior to transmitting + * a Probe frame during active scanning + * ChannelList + * MinChannelTime (>= ProbeDelay), in TU + * MaxChannelTime: (>= MinChannelTime), in TU + */ + + /* MLME-SCAN.confirm + * BSSDescriptionSet + * ResultCode: SUCCESS, INVALID_PARAMETERS + */ + + if (local->sw_scanning || local->hw_scanning) { + if (local->scan_sdata == scan_sdata) + return 0; + return -EBUSY; + } + + if (local->ops->hw_scan) { + int rc; + + local->hw_scanning = true; + rc = local->ops->hw_scan(local_to_hw(local), ssid, ssid_len); + if (rc) { + local->hw_scanning = false; + return rc; + } + local->scan_sdata = scan_sdata; + return 0; + } + + local->sw_scanning = true; + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, list) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { + if (sdata->u.sta.flags & IEEE80211_STA_ASSOCIATED) { + netif_tx_stop_all_queues(sdata->dev); + ieee80211_send_nullfunc(local, sdata, 1); + } + } else + netif_tx_stop_all_queues(sdata->dev); + } + rcu_read_unlock(); + + if (ssid) { + local->scan_ssid_len = ssid_len; + memcpy(local->scan_ssid, ssid, ssid_len); + } else + local->scan_ssid_len = 0; + local->scan_state = SCAN_SET_CHANNEL; + local->scan_channel_idx = 0; + local->scan_band = IEEE80211_BAND_2GHZ; + local->scan_sdata = scan_sdata; + + netif_addr_lock_bh(local->mdev); + local->filter_flags |= FIF_BCN_PRBRESP_PROMISC; + local->ops->configure_filter(local_to_hw(local), + FIF_BCN_PRBRESP_PROMISC, + &local->filter_flags, + local->mdev->mc_count, + local->mdev->mc_list); + netif_addr_unlock_bh(local->mdev); + + /* TODO: start scan as soon as all nullfunc frames are ACKed */ + queue_delayed_work(local->hw.workqueue, &local->scan_work, + IEEE80211_CHANNEL_TIME); + + return 0; +} + + +int ieee80211_request_scan(struct ieee80211_sub_if_data *sdata, + u8 *ssid, size_t ssid_len) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_if_sta *ifsta; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return ieee80211_start_scan(sdata, ssid, ssid_len); + + /* + * STA has a state machine that might need to defer scanning + * while it's trying to associate/authenticate, therefore we + * queue it up to the state machine in that case. + */ + + if (local->sw_scanning || local->hw_scanning) { + if (local->scan_sdata == sdata) + return 0; + return -EBUSY; + } + + ifsta = &sdata->u.sta; + + ifsta->scan_ssid_len = ssid_len; + if (ssid_len) + memcpy(ifsta->scan_ssid, ssid, ssid_len); + set_bit(IEEE80211_STA_REQ_SCAN, &ifsta->request); + queue_work(local->hw.workqueue, &ifsta->work); + + return 0; +} + + +static void ieee80211_scan_add_ies(struct iw_request_info *info, + struct ieee80211_bss *bss, + char **current_ev, char *end_buf) +{ + u8 *pos, *end, *next; + struct iw_event iwe; + + if (bss == NULL || bss->ies == NULL) + return; + + /* + * If needed, fragment the IEs buffer (at IE boundaries) into short + * enough fragments to fit into IW_GENERIC_IE_MAX octet messages. + */ + pos = bss->ies; + end = pos + bss->ies_len; + + while (end - pos > IW_GENERIC_IE_MAX) { + next = pos + 2 + pos[1]; + while (next + 2 + next[1] - pos < IW_GENERIC_IE_MAX) + next = next + 2 + next[1]; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = next - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + + pos = next; + } + + if (end > pos) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVGENIE; + iwe.u.data.length = end - pos; + *current_ev = iwe_stream_add_point(info, *current_ev, + end_buf, &iwe, pos); + } +} + + +static char * +ieee80211_scan_result(struct ieee80211_local *local, + struct iw_request_info *info, + struct ieee80211_bss *bss, + char *current_ev, char *end_buf) +{ + struct iw_event iwe; + char *buf; + + if (time_after(jiffies, + bss->last_update + IEEE80211_SCAN_RESULT_EXPIRE)) + return current_ev; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWAP; + iwe.u.ap_addr.sa_family = ARPHRD_ETHER; + memcpy(iwe.u.ap_addr.sa_data, bss->bssid, ETH_ALEN); + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_ADDR_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWESSID; + if (bss_mesh_cfg(bss)) { + iwe.u.data.length = bss_mesh_id_len(bss); + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss_mesh_id(bss)); + } else { + iwe.u.data.length = bss->ssid_len; + iwe.u.data.flags = 1; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, bss->ssid); + } + + if (bss->capability & (WLAN_CAPABILITY_ESS | WLAN_CAPABILITY_IBSS) + || bss_mesh_cfg(bss)) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWMODE; + if (bss_mesh_cfg(bss)) + iwe.u.mode = IW_MODE_MESH; + else if (bss->capability & WLAN_CAPABILITY_ESS) + iwe.u.mode = IW_MODE_MASTER; + else + iwe.u.mode = IW_MODE_ADHOC; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, + &iwe, IW_EV_UINT_LEN); + } + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = ieee80211_frequency_to_channel(bss->freq); + iwe.u.freq.e = 0; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWFREQ; + iwe.u.freq.m = bss->freq; + iwe.u.freq.e = 6; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_FREQ_LEN); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVQUAL; + iwe.u.qual.qual = bss->qual; + iwe.u.qual.level = bss->signal; + iwe.u.qual.noise = bss->noise; + iwe.u.qual.updated = local->wstats_flags; + current_ev = iwe_stream_add_event(info, current_ev, end_buf, &iwe, + IW_EV_QUAL_LEN); + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWENCODE; + if (bss->capability & WLAN_CAPABILITY_PRIVACY) + iwe.u.data.flags = IW_ENCODE_ENABLED | IW_ENCODE_NOKEY; + else + iwe.u.data.flags = IW_ENCODE_DISABLED; + iwe.u.data.length = 0; + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, ""); + + ieee80211_scan_add_ies(info, bss, ¤t_ev, end_buf); + + if (bss->supp_rates_len > 0) { + /* display all supported rates in readable format */ + char *p = current_ev + iwe_stream_lcp_len(info); + int i; + + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = SIOCGIWRATE; + /* Those two flags are ignored... */ + iwe.u.bitrate.fixed = iwe.u.bitrate.disabled = 0; + + for (i = 0; i < bss->supp_rates_len; i++) { + iwe.u.bitrate.value = ((bss->supp_rates[i] & + 0x7f) * 500000); + p = iwe_stream_add_value(info, current_ev, p, + end_buf, &iwe, IW_EV_PARAM_LEN); + } + current_ev = p; + } + + buf = kmalloc(30, GFP_ATOMIC); + if (buf) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "tsf=%016llx", (unsigned long long)(bss->timestamp)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, end_buf, + &iwe, buf); + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, " Last beacon: %dms ago", + jiffies_to_msecs(jiffies - bss->last_update)); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, &iwe, buf); + kfree(buf); + } + + if (bss_mesh_cfg(bss)) { + u8 *cfg = bss_mesh_cfg(bss); + buf = kmalloc(50, GFP_ATOMIC); + if (buf) { + memset(&iwe, 0, sizeof(iwe)); + iwe.cmd = IWEVCUSTOM; + sprintf(buf, "Mesh network (version %d)", cfg[0]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Protocol ID: " + "0x%02X%02X%02X%02X", cfg[1], cfg[2], cfg[3], + cfg[4]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Path Selection Metric ID: " + "0x%02X%02X%02X%02X", cfg[5], cfg[6], cfg[7], + cfg[8]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Congestion Control Mode ID: " + "0x%02X%02X%02X%02X", cfg[9], cfg[10], + cfg[11], cfg[12]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + sprintf(buf, "Channel Precedence: " + "0x%02X%02X%02X%02X", cfg[13], cfg[14], + cfg[15], cfg[16]); + iwe.u.data.length = strlen(buf); + current_ev = iwe_stream_add_point(info, current_ev, + end_buf, + &iwe, buf); + kfree(buf); + } + } + + return current_ev; +} + + +int ieee80211_scan_results(struct ieee80211_local *local, + struct iw_request_info *info, + char *buf, size_t len) +{ + char *current_ev = buf; + char *end_buf = buf + len; + struct ieee80211_bss *bss; + + spin_lock_bh(&local->bss_lock); + list_for_each_entry(bss, &local->bss_list, list) { + if (buf + len - current_ev <= IW_EV_ADDR_LEN) { + spin_unlock_bh(&local->bss_lock); + return -E2BIG; + } + current_ev = ieee80211_scan_result(local, info, bss, + current_ev, end_buf); + } + spin_unlock_bh(&local->bss_lock); + return current_ev - buf; +} diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c new file mode 100644 index 000000000000..f72bad636d8e --- /dev/null +++ b/net/mac80211/spectmgmt.c @@ -0,0 +1,86 @@ +/* + * spectrum management + * + * Copyright 2003, Jouni Malinen <jkmaline@cc.hut.fi> + * Copyright 2002-2005, Instant802 Networks, Inc. + * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> + * Copyright 2007, Michael Wu <flamingice@sourmilk.net> + * Copyright 2007-2008, Intel Corporation + * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ieee80211.h> +#include <net/wireless.h> +#include <net/mac80211.h> +#include "ieee80211_i.h" +#include "sta_info.h" +#include "wme.h" + +static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_data *sdata, + struct ieee80211_msrment_ie *request_ie, + const u8 *da, const u8 *bssid, + u8 dialog_token) +{ + struct ieee80211_local *local = sdata->local; + struct sk_buff *skb; + struct ieee80211_mgmt *msr_report; + + skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + + sizeof(struct ieee80211_msrment_ie)); + + if (!skb) { + printk(KERN_ERR "%s: failed to allocate buffer for " + "measurement report frame\n", sdata->dev->name); + return; + } + + skb_reserve(skb, local->hw.extra_tx_headroom); + msr_report = (struct ieee80211_mgmt *)skb_put(skb, 24); + memset(msr_report, 0, 24); + memcpy(msr_report->da, da, ETH_ALEN); + memcpy(msr_report->sa, sdata->dev->dev_addr, ETH_ALEN); + memcpy(msr_report->bssid, bssid, ETH_ALEN); + msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_ACTION); + + skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); + msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; + msr_report->u.action.u.measurement.action_code = + WLAN_ACTION_SPCT_MSR_RPRT; + msr_report->u.action.u.measurement.dialog_token = dialog_token; + + msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; + msr_report->u.action.u.measurement.length = + sizeof(struct ieee80211_msrment_ie); + + memset(&msr_report->u.action.u.measurement.msr_elem, 0, + sizeof(struct ieee80211_msrment_ie)); + msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; + msr_report->u.action.u.measurement.msr_elem.mode |= + IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; + msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; + + ieee80211_tx_skb(sdata, skb, 0); +} + +void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, + struct ieee80211_mgmt *mgmt, + size_t len) +{ + /* + * Ignoring measurement request is spec violation. + * Mandatory measurements must be reported optional + * measurements might be refused or reported incapable + * For now just refuse + * TODO: Answer basic measurement as unmeasured + */ + ieee80211_send_refuse_measurement_request(sdata, + &mgmt->u.action.u.measurement.msr_elem, + mgmt->sa, mgmt->bssid, + mgmt->u.action.u.measurement.dialog_token); +} diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index f2ba653b9d69..7fef8ea1f5ec 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -73,11 +73,11 @@ static int sta_info_hash_del(struct ieee80211_local *local, { struct sta_info *s; - s = local->sta_hash[STA_HASH(sta->addr)]; + s = local->sta_hash[STA_HASH(sta->sta.addr)]; if (!s) return -ENOENT; if (s == sta) { - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], s->hnext); return 0; } @@ -93,26 +93,19 @@ static int sta_info_hash_del(struct ieee80211_local *local, } /* protected by RCU */ -static struct sta_info *__sta_info_find(struct ieee80211_local *local, - u8 *addr) +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr) { struct sta_info *sta; sta = rcu_dereference(local->sta_hash[STA_HASH(addr)]); while (sta) { - if (compare_ether_addr(sta->addr, addr) == 0) + if (compare_ether_addr(sta->sta.addr, addr) == 0) break; sta = rcu_dereference(sta->hnext); } return sta; } -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr) -{ - return __sta_info_find(local, addr); -} -EXPORT_SYMBOL(sta_info_get); - struct sta_info *sta_info_get_by_idx(struct ieee80211_local *local, int idx, struct net_device *dev) { @@ -146,12 +139,12 @@ static void __sta_info_free(struct ieee80211_local *local, { DECLARE_MAC_BUF(mbuf); - rate_control_free_sta(sta->rate_ctrl, sta->rate_ctrl_priv); + rate_control_free_sta(sta); rate_control_put(sta->rate_ctrl); #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Destroyed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ kfree(sta); @@ -219,8 +212,8 @@ void sta_info_destroy(struct sta_info *sta) static void sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - sta->hnext = local->sta_hash[STA_HASH(sta->addr)]; - rcu_assign_pointer(local->sta_hash[STA_HASH(sta->addr)], sta); + sta->hnext = local->sta_hash[STA_HASH(sta->sta.addr)]; + rcu_assign_pointer(local->sta_hash[STA_HASH(sta->sta.addr)], sta); } struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, @@ -231,20 +224,20 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, int i; DECLARE_MAC_BUF(mbuf); - sta = kzalloc(sizeof(*sta), gfp); + sta = kzalloc(sizeof(*sta) + local->hw.sta_data_size, gfp); if (!sta) return NULL; spin_lock_init(&sta->lock); spin_lock_init(&sta->flaglock); - memcpy(sta->addr, addr, ETH_ALEN); + memcpy(sta->sta.addr, addr, ETH_ALEN); sta->local = local; sta->sdata = sdata; sta->rate_ctrl = rate_control_get(local->rate_ctrl); sta->rate_ctrl_priv = rate_control_alloc_sta(sta->rate_ctrl, - gfp); + &sta->sta, gfp); if (!sta->rate_ctrl_priv) { rate_control_put(sta->rate_ctrl); kfree(sta); @@ -271,7 +264,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Allocated STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ #ifdef CONFIG_MAC80211_MESH @@ -300,15 +293,15 @@ int sta_info_insert(struct sta_info *sta) goto out_free; } - if (WARN_ON(compare_ether_addr(sta->addr, sdata->dev->dev_addr) == 0 || - is_multicast_ether_addr(sta->addr))) { + if (WARN_ON(compare_ether_addr(sta->sta.addr, sdata->dev->dev_addr) == 0 || + is_multicast_ether_addr(sta->sta.addr))) { err = -EINVAL; goto out_free; } spin_lock_irqsave(&local->sta_lock, flags); /* check if STA exists already */ - if (__sta_info_find(local, sta->addr)) { + if (sta_info_get(local, sta->sta.addr)) { spin_unlock_irqrestore(&local->sta_lock, flags); err = -EEXIST; goto out_free; @@ -319,18 +312,18 @@ int sta_info_insert(struct sta_info *sta) /* notify driver */ if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_ADD, sta->addr); + STA_NOTIFY_ADD, &sta->sta); } #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Inserted STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mac, sta->addr)); + wiphy_name(local->hw.wiphy), print_mac(mac, sta->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ spin_unlock_irqrestore(&local->sta_lock, flags); @@ -379,11 +372,12 @@ static void __sta_info_set_tim_bit(struct ieee80211_if_ap *bss, { BUG_ON(!bss); - __bss_tim_set(bss, sta->aid); + __bss_tim_set(bss, sta->sta.aid); if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 1); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, true); sta->local->tim_in_locked_section = false; } } @@ -404,11 +398,12 @@ static void __sta_info_clear_tim_bit(struct ieee80211_if_ap *bss, { BUG_ON(!bss); - __bss_tim_clear(bss, sta->aid); + __bss_tim_clear(bss, sta->sta.aid); if (sta->local->ops->set_tim) { sta->local->tim_in_locked_section = true; - sta->local->ops->set_tim(local_to_hw(sta->local), sta->aid, 0); + sta->local->ops->set_tim(local_to_hw(sta->local), + &sta->sta, false); sta->local->tim_in_locked_section = false; } } @@ -424,7 +419,7 @@ void sta_info_clear_tim_bit(struct sta_info *sta) spin_unlock_irqrestore(&sta->local->sta_lock, flags); } -void __sta_info_unlink(struct sta_info **sta) +static void __sta_info_unlink(struct sta_info **sta) { struct ieee80211_local *local = (*sta)->local; struct ieee80211_sub_if_data *sdata = (*sta)->sdata; @@ -456,13 +451,13 @@ void __sta_info_unlink(struct sta_info **sta) local->num_sta--; if (local->ops->sta_notify) { - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); local->ops->sta_notify(local_to_hw(local), &sdata->vif, - STA_NOTIFY_REMOVE, (*sta)->addr); + STA_NOTIFY_REMOVE, &(*sta)->sta); } if (ieee80211_vif_is_mesh(&sdata->vif)) { @@ -474,7 +469,7 @@ void __sta_info_unlink(struct sta_info **sta) #ifdef CONFIG_MAC80211_VERBOSE_DEBUG printk(KERN_DEBUG "%s: Removed STA %s\n", - wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->addr)); + wiphy_name(local->hw.wiphy), print_mac(mbuf, (*sta)->sta.addr)); #endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ /* @@ -570,7 +565,7 @@ static void sta_info_cleanup_expire_buffered(struct ieee80211_local *local, local->total_ps_buffered--; #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "Buffered frame expired (STA " - "%s)\n", print_mac(mac, sta->addr)); + "%s)\n", print_mac(mac, sta->sta.addr)); #endif dev_kfree_skb(skb); @@ -640,7 +635,12 @@ static void sta_info_debugfs_add_work(struct work_struct *work) spin_lock_irqsave(&local->sta_lock, flags); list_for_each_entry(tmp, &local->sta_list, list) { - if (!tmp->debugfs.dir) { + /* + * debugfs.add_has_run will be set by + * ieee80211_sta_debugfs_add regardless + * of what else it does. + */ + if (!tmp->debugfs.add_has_run) { sta = tmp; __sta_info_pin(sta); break; @@ -802,3 +802,40 @@ void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata) schedule_work(&local->sta_flush_work); spin_unlock_irqrestore(&local->sta_lock, flags); } + +void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, + unsigned long exp_time) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta, *tmp; + LIST_HEAD(tmp_list); + DECLARE_MAC_BUF(mac); + unsigned long flags; + + spin_lock_irqsave(&local->sta_lock, flags); + list_for_each_entry_safe(sta, tmp, &local->sta_list, list) + if (time_after(jiffies, sta->last_rx + exp_time)) { +#ifdef CONFIG_MAC80211_IBSS_DEBUG + printk(KERN_DEBUG "%s: expiring inactive STA %s\n", + sdata->dev->name, print_mac(mac, sta->sta.addr)); +#endif + __sta_info_unlink(&sta); + if (sta) + list_add(&sta->list, &tmp_list); + } + spin_unlock_irqrestore(&local->sta_lock, flags); + + list_for_each_entry_safe(sta, tmp, &tmp_list, list) + sta_info_destroy(sta); +} + +struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_hw *hw, + const u8 *addr) +{ + struct sta_info *sta = sta_info_get(hw_to_local(hw), addr); + + if (!sta) + return NULL; + return &sta->sta; +} +EXPORT_SYMBOL(ieee80211_find_sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 109db787ccb7..168a39a298bd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -167,8 +167,6 @@ struct sta_ampdu_mlme { * @lock: used for locking all fields that require locking, see comments * in the header file. * @flaglock: spinlock for flags accesses - * @ht_info: HT capabilities of this STA - * @supp_rates: Bitmap of supported rates (per band) * @addr: MAC address of this STA * @aid: STA's unique AID (1..2007, 0 = not assigned yet), * only used in AP (and IBSS?) mode @@ -191,20 +189,15 @@ struct sta_ampdu_mlme { * @last_qual: qual of last received frame from this STA * @last_noise: noise of last received frame from this STA * @last_seq_ctrl: last received seq/frag number from this STA (per RX queue) - * @wme_rx_queue: TBD * @tx_filtered_count: TBD * @tx_retry_failed: TBD * @tx_retry_count: TBD - * @tx_num_consecutive_failures: TBD - * @tx_num_mpdu_ok: TBD - * @tx_num_mpdu_fail: TBD * @fail_avg: moving percentage of failed MSDUs * @tx_packets: number of RX/TX MSDUs * @tx_bytes: TBD * @tx_fragments: number of transmitted MPDUs - * @txrate_idx: TBD - * @last_txrate_idx: TBD - * @wme_tx_queue: TBD + * @last_txrate_idx: Index of the last used transmit rate + * @tid_seq: TBD * @ampdu_mlme: TBD * @timer_to_tid: identity mapping to ID timers * @tid_to_tx_q: map tid to tx queue @@ -217,6 +210,7 @@ struct sta_ampdu_mlme { * @plink_timeout: TBD * @plink_timer: TBD * @debugfs: debug filesystem info + * @sta: station information we share with the driver */ struct sta_info { /* General information, mostly static */ @@ -229,10 +223,7 @@ struct sta_info { void *rate_ctrl_priv; spinlock_t lock; spinlock_t flaglock; - struct ieee80211_ht_info ht_info; - u64 supp_rates[IEEE80211_NUM_BANDS]; - u8 addr[ETH_ALEN]; - u16 aid; + u16 listen_interval; /* @@ -265,17 +256,10 @@ struct sta_info { int last_qual; int last_noise; __le16 last_seq_ctrl[NUM_RX_DATA_QUEUES]; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_rx_queue[NUM_RX_DATA_QUEUES]; -#endif /* Updated from TX status path only, no locking requirements */ unsigned long tx_filtered_count; unsigned long tx_retry_failed, tx_retry_count; - /* TODO: update in generic code not rate control? */ - u32 tx_num_consecutive_failures; - u32 tx_num_mpdu_ok; - u32 tx_num_mpdu_fail; /* moving percentage of failed MSDUs */ unsigned int fail_avg; @@ -283,12 +267,8 @@ struct sta_info { unsigned long tx_packets; unsigned long tx_bytes; unsigned long tx_fragments; - int txrate_idx; - int last_txrate_idx; + unsigned int last_txrate_idx; u16 tid_seq[IEEE80211_QOS_CTL_TID_MASK + 1]; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - unsigned int wme_tx_queue[NUM_RX_DATA_QUEUES]; -#endif /* * Aggregation information, locked with lock. @@ -319,13 +299,13 @@ struct sta_info { struct dentry *num_ps_buf_frames; struct dentry *inactive_ms; struct dentry *last_seq_ctrl; -#ifdef CONFIG_MAC80211_DEBUG_COUNTERS - struct dentry *wme_rx_queue; - struct dentry *wme_tx_queue; -#endif struct dentry *agg_status; + bool add_has_run; } debugfs; #endif + + /* keep last! */ + struct ieee80211_sta sta; }; static inline enum plink_state sta_plink_state(struct sta_info *sta) @@ -425,7 +405,7 @@ static inline u32 get_sta_flags(struct sta_info *sta) /* * Get a STA info, must have be under RCU read lock. */ -struct sta_info *sta_info_get(struct ieee80211_local *local, u8 *addr); +struct sta_info *sta_info_get(struct ieee80211_local *local, const u8 *addr); /* * Get STA info by index, BROKEN! */ @@ -451,7 +431,6 @@ int sta_info_insert(struct sta_info *sta); * has already unlinked it. */ void sta_info_unlink(struct sta_info **sta); -void __sta_info_unlink(struct sta_info **sta); void sta_info_destroy(struct sta_info *sta); void sta_info_set_tim_bit(struct sta_info *sta); @@ -463,5 +442,7 @@ void sta_info_stop(struct ieee80211_local *local); int sta_info_flush(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); void sta_info_flush_delayed(struct ieee80211_sub_if_data *sdata); +void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, + unsigned long exp_time); #endif /* STA_INFO_H */ diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c index 995f7af3d25e..34b32bc8f609 100644 --- a/net/mac80211/tkip.c +++ b/net/mac80211/tkip.c @@ -304,7 +304,7 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm, key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) { u8 bcast[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; - u8 *sta_addr = key->sta->addr; + u8 *sta_addr = key->sta->sta.addr; if (is_multicast_ether_addr(ra)) sta_addr = bcast; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4788f7b91f49..1460537faf33 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -38,43 +38,6 @@ /* misc utils */ -#ifdef CONFIG_MAC80211_LOWTX_FRAME_DUMP -static void ieee80211_dump_frame(const char *ifname, const char *title, - const struct sk_buff *skb) -{ - const struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - unsigned int hdrlen; - DECLARE_MAC_BUF(mac); - - printk(KERN_DEBUG "%s: %s (len=%d)", ifname, title, skb->len); - if (skb->len < 4) { - printk("\n"); - return; - } - - hdrlen = ieee80211_hdrlen(hdr->frame_control); - if (hdrlen > skb->len) - hdrlen = skb->len; - if (hdrlen >= 4) - printk(" FC=0x%04x DUR=0x%04x", - le16_to_cpu(hdr->frame_control), le16_to_cpu(hdr->duration_id)); - if (hdrlen >= 10) - printk(" A1=%s", print_mac(mac, hdr->addr1)); - if (hdrlen >= 16) - printk(" A2=%s", print_mac(mac, hdr->addr2)); - if (hdrlen >= 24) - printk(" A3=%s", print_mac(mac, hdr->addr3)); - if (hdrlen >= 30) - printk(" A4=%s", print_mac(mac, hdr->addr4)); - printk("\n"); -} -#else /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ -static inline void ieee80211_dump_frame(const char *ifname, const char *title, - struct sk_buff *skb) -{ -} -#endif /* CONFIG_MAC80211_LOWTX_FRAME_DUMP */ - static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, int next_frag_len) { @@ -82,6 +45,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, struct ieee80211_rate *txrate; struct ieee80211_local *local = tx->local; struct ieee80211_supported_band *sband; + struct ieee80211_hdr *hdr; sband = local->hw.wiphy->bands[tx->channel->band]; txrate = &sband->bitrates[tx->rate_idx]; @@ -107,10 +71,10 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, * at the highest possible rate belonging to the PHY rates in the * BSSBasicRateSet */ - - if ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_CTL) { + hdr = (struct ieee80211_hdr *)tx->skb->data; + if (ieee80211_is_ctl(hdr->frame_control)) { /* TODO: These control frames are not currently sent by - * 80211.o, but should they be implemented, this function + * mac80211, but should they be implemented, this function * needs to be updated to support duration field calculation. * * RTS: time needed to transmit pending data/mgmt frame plus @@ -152,7 +116,7 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, if (r->bitrate > txrate->bitrate) break; - if (tx->sdata->basic_rates & BIT(i)) + if (tx->sdata->bss_conf.basic_rates & BIT(i)) rate = r->bitrate; switch (sband->band) { @@ -201,11 +165,10 @@ static __le16 ieee80211_duration(struct ieee80211_tx_data *tx, int group_addr, return cpu_to_le16(dur); } -static int inline is_ieee80211_device(struct net_device *dev, - struct net_device *master) +static int inline is_ieee80211_device(struct ieee80211_local *local, + struct net_device *dev) { - return (wdev_priv(dev->ieee80211_ptr) == - wdev_priv(master->ieee80211_ptr)); + return local == wdev_priv(dev->ieee80211_ptr); } /* tx handlers */ @@ -213,21 +176,19 @@ static int inline is_ieee80211_device(struct net_device *dev, static ieee80211_tx_result debug_noinline ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) { -#ifdef CONFIG_MAC80211_VERBOSE_DEBUG + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; -#endif /* CONFIG_MAC80211_VERBOSE_DEBUG */ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); u32 sta_flags; if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) return TX_CONTINUE; - if (unlikely(tx->local->sta_sw_scanning) && - ((tx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (tx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_PROBE_REQ)) + if (unlikely(tx->local->sw_scanning) && + !ieee80211_is_probe_req(hdr->frame_control)) return TX_DROP; - if (tx->sdata->vif.type == IEEE80211_IF_TYPE_MESH_POINT) + if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT) return TX_CONTINUE; if (tx->flags & IEEE80211_TX_PS_BUFFERED) @@ -237,8 +198,8 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) if (likely(tx->flags & IEEE80211_TX_UNICAST)) { if (unlikely(!(sta_flags & WLAN_STA_ASSOC) && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - (tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA)) { + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC && + ieee80211_is_data(hdr->frame_control))) { #ifdef CONFIG_MAC80211_VERBOSE_DEBUG DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "%s: dropped data frame to not " @@ -249,9 +210,9 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) return TX_DROP; } } else { - if (unlikely((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_DATA && + if (unlikely(ieee80211_is_data(hdr->frame_control) && tx->local->num_sta == 0 && - tx->sdata->vif.type != IEEE80211_IF_TYPE_IBSS)) { + tx->sdata->vif.type != NL80211_IFTYPE_ADHOC)) { /* * No associated STAs - no need to send multicast * frames. @@ -282,7 +243,7 @@ static void purge_old_ps_buffers(struct ieee80211_local *local) list_for_each_entry_rcu(sdata, &local->interfaces, list) { struct ieee80211_if_ap *ap; - if (sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_AP) continue; ap = &sdata->u.ap; skb = skb_dequeue(&ap->ps_bc_buf); @@ -315,6 +276,7 @@ static ieee80211_tx_result ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; /* * broadcast/multicast frame @@ -329,7 +291,7 @@ ieee80211_tx_h_multicast_ps_buf(struct ieee80211_tx_data *tx) return TX_CONTINUE; /* no buffering for ordered frames */ - if (tx->fc & IEEE80211_FCTL_ORDER) + if (ieee80211_has_order(hdr->frame_control)) return TX_CONTINUE; /* no stations in PS mode */ @@ -367,12 +329,11 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; u32 staflags; DECLARE_MAC_BUF(mac); - if (unlikely(!sta || - ((tx->fc & IEEE80211_FCTL_FTYPE) == IEEE80211_FTYPE_MGMT && - (tx->fc & IEEE80211_FCTL_STYPE) == IEEE80211_STYPE_PROBE_RESP))) + if (unlikely(!sta || ieee80211_is_probe_resp(hdr->frame_control))) return TX_CONTINUE; staflags = get_sta_flags(sta); @@ -382,7 +343,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) #ifdef CONFIG_MAC80211_VERBOSE_PS_DEBUG printk(KERN_DEBUG "STA %s aid %d: PS buffer (entries " "before %d)\n", - print_mac(mac, sta->addr), sta->aid, + print_mac(mac, sta->sta.addr), sta->sta.aid, skb_queue_len(&sta->ps_tx_buf)); #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -393,7 +354,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) if (net_ratelimit()) { printk(KERN_DEBUG "%s: STA %s TX " "buffer full - dropping oldest frame\n", - tx->dev->name, print_mac(mac, sta->addr)); + tx->dev->name, print_mac(mac, sta->sta.addr)); } #endif dev_kfree_skb(old); @@ -412,7 +373,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) else if (unlikely(test_sta_flags(sta, WLAN_STA_PS))) { printk(KERN_DEBUG "%s: STA %s in PS mode, but pspoll " "set -> send frame\n", tx->dev->name, - print_mac(mac, sta->addr)); + print_mac(mac, sta->sta.addr)); } #endif /* CONFIG_MAC80211_VERBOSE_PS_DEBUG */ clear_sta_flags(sta, WLAN_STA_PSPOLL); @@ -437,7 +398,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) { struct ieee80211_key *key; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - u16 fc = tx->fc; + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; if (unlikely(tx->skb->do_not_encrypt)) tx->key = NULL; @@ -454,22 +415,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key = NULL; if (tx->key) { - u16 ftype, stype; - tx->key->tx_rx_count++; /* TODO: add threshold stuff again */ switch (tx->key->conf.alg) { case ALG_WEP: - ftype = fc & IEEE80211_FCTL_FTYPE; - stype = fc & IEEE80211_FCTL_STYPE; - - if (ftype == IEEE80211_FTYPE_MGMT && - stype == IEEE80211_STYPE_AUTH) + if (ieee80211_is_auth(hdr->frame_control)) break; case ALG_TKIP: case ALG_CCMP: - if (!WLAN_FC_DATA_PRESENT(fc)) + if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; } @@ -491,20 +446,24 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; if (likely(tx->rate_idx < 0)) { - rate_control_get_rate(tx->dev, sband, tx->skb, &rsel); + rate_control_get_rate(tx->sdata, sband, tx->sta, + tx->skb, &rsel); + if (tx->sta) + tx->sta->last_txrate_idx = rsel.rate_idx; tx->rate_idx = rsel.rate_idx; if (unlikely(rsel.probe_idx >= 0)) { info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; tx->flags |= IEEE80211_TX_PROBE_LAST_FRAG; - info->control.alt_retry_rate_idx = tx->rate_idx; + info->control.retries[0].rate_idx = tx->rate_idx; + info->control.retries[0].limit = tx->local->hw.max_altrate_tries; tx->rate_idx = rsel.probe_idx; - } else - info->control.alt_retry_rate_idx = -1; + } else if (info->control.retries[0].limit == 0) + info->control.retries[0].rate_idx = -1; if (unlikely(tx->rate_idx < 0)) return TX_DROP; } else - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; if (tx->sdata->bss_conf.use_cts_prot && (tx->flags & IEEE80211_TX_FRAGMENTED) && (rsel.nonerp_idx >= 0)) { @@ -535,7 +494,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) sband = tx->local->hw.wiphy->bands[tx->channel->band]; if (tx->sta) - info->control.aid = tx->sta->aid; + info->control.sta = &tx->sta->sta; if (!info->control.retry_limit) { if (!is_multicast_ether_addr(hdr->addr1)) { @@ -563,7 +522,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) * frames. * TODO: The last fragment could still use multiple retry * rates. */ - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; } /* Use CTS protection for unicast frames sent using extended rates if @@ -593,7 +552,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) int idx; /* Do not use multiple retry rates when using RTS/CTS */ - info->control.alt_retry_rate_idx = -1; + info->control.retries[0].rate_idx = -1; /* Use min(data rate, max base rate) as CTS/RTS rate */ rate = &sband->bitrates[tx->rate_idx]; @@ -601,7 +560,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) for (idx = 0; idx < sband->n_bitrates; idx++) { if (sband->bitrates[idx].bitrate > rate->bitrate) continue; - if (tx->sdata->basic_rates & BIT(idx) && + if (tx->sdata->bss_conf.basic_rates & BIT(idx) && (baserate < 0 || (sband->bitrates[baserate].bitrate < sband->bitrates[idx].bitrate))) @@ -615,7 +574,7 @@ ieee80211_tx_h_misc(struct ieee80211_tx_data *tx) } if (tx->sta) - info->control.aid = tx->sta->aid; + info->control.sta = &tx->sta->sta; return TX_CONTINUE; } @@ -629,7 +588,14 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) u8 *qc; int tid; - /* only for injected frames */ + /* + * Packet injection may want to control the sequence + * number, if we have no matching interface then we + * neither assign one ourselves nor ask the driver to. + */ + if (unlikely(!info->control.vif)) + return TX_CONTINUE; + if (unlikely(ieee80211_is_ctl(hdr->frame_control))) return TX_CONTINUE; @@ -854,7 +820,6 @@ __ieee80211_parse_tx_radiotap(struct ieee80211_tx_data *tx, sband = tx->local->hw.wiphy->bands[tx->channel->band]; skb->do_not_encrypt = 1; - info->flags |= IEEE80211_TX_CTL_INJECTED; tx->flags &= ~IEEE80211_TX_FRAGMENTED; /* @@ -986,7 +951,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* process and remove the injection radiotap header */ sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (unlikely(sdata->vif.type == IEEE80211_IF_TYPE_MNTR)) { + if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) { if (__ieee80211_parse_tx_radiotap(tx, skb) == TX_DROP) return TX_DROP; @@ -1000,7 +965,6 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, hdr = (struct ieee80211_hdr *) skb->data; tx->sta = sta_info_get(local, hdr->addr1); - tx->fc = le16_to_cpu(hdr->frame_control); if (is_multicast_ether_addr(hdr->addr1)) { tx->flags &= ~IEEE80211_TX_UNICAST; @@ -1025,7 +989,7 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, else if (test_and_clear_sta_flags(tx->sta, WLAN_STA_CLEAR_PS_FILT)) info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT; - hdrlen = ieee80211_get_hdrlen(tx->fc); + hdrlen = ieee80211_hdrlen(hdr->frame_control); if (skb->len > hdrlen + sizeof(rfc1042_header) + 2) { u8 *pos = &skb->data[hdrlen + sizeof(rfc1042_header)]; tx->ethertype = (pos[0] << 8) | pos[1]; @@ -1038,14 +1002,14 @@ __ieee80211_tx_prepare(struct ieee80211_tx_data *tx, /* * NB: @tx is uninitialised when passed in here */ -static int ieee80211_tx_prepare(struct ieee80211_tx_data *tx, - struct sk_buff *skb, - struct net_device *mdev) +static int ieee80211_tx_prepare(struct ieee80211_local *local, + struct ieee80211_tx_data *tx, + struct sk_buff *skb) { struct net_device *dev; dev = dev_get_by_index(&init_net, skb->iif); - if (unlikely(dev && !is_ieee80211_device(dev, mdev))) { + if (unlikely(dev && !is_ieee80211_device(local, dev))) { dev_put(dev); dev = NULL; } @@ -1068,8 +1032,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, return IEEE80211_TX_AGAIN; info = IEEE80211_SKB_CB(skb); - ieee80211_dump_frame(wiphy_name(local->hw.wiphy), - "TX to low-level driver", skb); ret = local->ops->tx(local_to_hw(local), skb); if (ret) return IEEE80211_TX_AGAIN; @@ -1099,9 +1061,6 @@ static int __ieee80211_tx(struct ieee80211_local *local, struct sk_buff *skb, ~IEEE80211_TX_CTL_RATE_CTRL_PROBE; } - ieee80211_dump_frame(wiphy_name(local->hw.wiphy), - "TX to low-level driver", - tx->extra_frag[i]); ret = local->ops->tx(local_to_hw(local), tx->extra_frag[i]); if (ret) @@ -1297,20 +1256,26 @@ static int ieee80211_skb_resize(struct ieee80211_local *local, return 0; } -int ieee80211_master_start_xmit(struct sk_buff *skb, - struct net_device *dev) +int ieee80211_master_start_xmit(struct sk_buff *skb, struct net_device *dev) { + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; struct net_device *odev = NULL; struct ieee80211_sub_if_data *osdata; int headroom; bool may_encrypt; + enum { + NOT_MONITOR, + FOUND_SDATA, + UNKNOWN_ADDRESS, + } monitor_iface = NOT_MONITOR; int ret; if (skb->iif) odev = dev_get_by_index(&init_net, skb->iif); - if (unlikely(odev && !is_ieee80211_device(odev, dev))) { + if (unlikely(odev && !is_ieee80211_device(local, odev))) { dev_put(odev); odev = NULL; } @@ -1331,15 +1296,55 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, if (ieee80211_vif_is_mesh(&osdata->vif) && ieee80211_is_data(hdr->frame_control)) { - if (ieee80211_is_data(hdr->frame_control)) { - if (is_multicast_ether_addr(hdr->addr3)) - memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); - else - if (mesh_nexthop_lookup(skb, odev)) - return 0; - if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) - IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.sta, - fwded_frames); + if (is_multicast_ether_addr(hdr->addr3)) + memcpy(hdr->addr1, hdr->addr3, ETH_ALEN); + else + if (mesh_nexthop_lookup(skb, osdata)) + return 0; + if (memcmp(odev->dev_addr, hdr->addr4, ETH_ALEN) != 0) + IEEE80211_IFSTA_MESH_CTR_INC(&osdata->u.mesh, + fwded_frames); + } else if (unlikely(osdata->vif.type == NL80211_IFTYPE_MONITOR)) { + struct ieee80211_sub_if_data *sdata; + int hdrlen; + u16 len_rthdr; + + info->flags |= IEEE80211_TX_CTL_INJECTED; + monitor_iface = UNKNOWN_ADDRESS; + + len_rthdr = ieee80211_get_radiotap_len(skb->data); + hdr = (struct ieee80211_hdr *)skb->data + len_rthdr; + hdrlen = ieee80211_hdrlen(hdr->frame_control); + + /* check the header is complete in the frame */ + if (likely(skb->len >= len_rthdr + hdrlen)) { + /* + * We process outgoing injected frames that have a + * local address we handle as though they are our + * own frames. + * This code here isn't entirely correct, the local + * MAC address is not necessarily enough to find + * the interface to use; for that proper VLAN/WDS + * support we will need a different mechanism. + */ + + rcu_read_lock(); + list_for_each_entry_rcu(sdata, &local->interfaces, + list) { + if (!netif_running(sdata->dev)) + continue; + if (compare_ether_addr(sdata->dev->dev_addr, + hdr->addr2)) { + dev_hold(sdata->dev); + dev_put(odev); + osdata = sdata; + odev = osdata->dev; + skb->iif = sdata->dev->ifindex; + monitor_iface = FOUND_SDATA; + break; + } + } + rcu_read_unlock(); } } @@ -1357,7 +1362,12 @@ int ieee80211_master_start_xmit(struct sk_buff *skb, return 0; } - info->control.vif = &osdata->vif; + if (osdata->vif.type == NL80211_IFTYPE_AP_VLAN) + osdata = container_of(osdata->bss, + struct ieee80211_sub_if_data, + u.ap); + if (likely(monitor_iface != UNKNOWN_ADDRESS)) + info->control.vif = &osdata->vif; ret = ieee80211_tx(odev, skb); dev_put(odev); @@ -1437,8 +1447,8 @@ fail: int ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata; + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_local *local = sdata->local; int ret = 1, head_need; u16 ethertype, hdrlen, meshhdrlen = 0; __le16 fc; @@ -1450,7 +1460,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, struct sta_info *sta; u32 sta_flags = 0; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); if (unlikely(skb->len < ETH_HLEN)) { ret = 0; goto fail; @@ -1465,8 +1474,8 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); /* DA BSSID SA */ memcpy(hdr.addr1, skb->data, ETH_ALEN); @@ -1474,7 +1483,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); /* RA TA DA SA */ memcpy(hdr.addr1, sdata->u.wds.remote_addr, ETH_ALEN); @@ -1484,24 +1493,56 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, hdrlen = 30; break; #ifdef CONFIG_MAC80211_MESH - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_MESH_POINT: fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); - /* RA TA DA SA */ - memset(hdr.addr1, 0, ETH_ALEN); - memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); - if (!sdata->u.sta.mshcfg.dot11MeshTTL) { + if (!sdata->u.mesh.mshcfg.dot11MeshTTL) { /* Do not send frames with mesh_ttl == 0 */ - sdata->u.sta.mshstats.dropped_frames_ttl++; + sdata->u.mesh.mshstats.dropped_frames_ttl++; ret = 0; goto fail; } - meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata); + memset(&mesh_hdr, 0, sizeof(mesh_hdr)); + + if (compare_ether_addr(dev->dev_addr, + skb->data + ETH_ALEN) == 0) { + /* RA TA DA SA */ + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr3, skb->data, ETH_ALEN); + memcpy(hdr.addr4, skb->data + ETH_ALEN, ETH_ALEN); + meshhdrlen = ieee80211_new_mesh_header(&mesh_hdr, sdata); + } else { + /* packet from other interface */ + struct mesh_path *mppath; + + memset(hdr.addr1, 0, ETH_ALEN); + memcpy(hdr.addr2, dev->dev_addr, ETH_ALEN); + memcpy(hdr.addr4, dev->dev_addr, ETH_ALEN); + + if (is_multicast_ether_addr(skb->data)) + memcpy(hdr.addr3, skb->data, ETH_ALEN); + else { + rcu_read_lock(); + mppath = mpp_path_lookup(skb->data, sdata); + if (mppath) + memcpy(hdr.addr3, mppath->mpp, ETH_ALEN); + else + memset(hdr.addr3, 0xff, ETH_ALEN); + rcu_read_unlock(); + } + + mesh_hdr.flags |= MESH_FLAGS_AE_A5_A6; + mesh_hdr.ttl = sdata->u.mesh.mshcfg.dot11MeshTTL; + put_unaligned(cpu_to_le32(sdata->u.mesh.mesh_seqnum), &mesh_hdr.seqnum); + memcpy(mesh_hdr.eaddr1, skb->data, ETH_ALEN); + memcpy(mesh_hdr.eaddr2, skb->data + ETH_ALEN, ETH_ALEN); + sdata->u.mesh.mesh_seqnum++; + meshhdrlen = 18; + } hdrlen = 30; break; #endif - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: fc |= cpu_to_le16(IEEE80211_FCTL_TODS); /* BSSID SA DA */ memcpy(hdr.addr1, sdata->u.sta.bssid, ETH_ALEN); @@ -1509,7 +1550,7 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, memcpy(hdr.addr3, skb->data, ETH_ALEN); hdrlen = 24; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: /* DA SA BSSID */ memcpy(hdr.addr1, skb->data, ETH_ALEN); memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); @@ -1588,19 +1629,6 @@ int ieee80211_subif_start_xmit(struct sk_buff *skb, nh_pos -= skip_header_bytes; h_pos -= skip_header_bytes; - /* TODO: implement support for fragments so that there is no need to - * reallocate and copy payload; it might be enough to support one - * extra fragment that would be copied in the beginning of the frame - * data.. anyway, it would be nice to include this into skb structure - * somehow - * - * There are few options for this: - * use skb->cb as an extra space for 802.11 header - * allocate new buffer if not enough headroom - * make sure that there is enough headroom in every skb by increasing - * build in headroom in __dev_alloc_skb() (linux/skbuff.h) and - * alloc_skb() (net/core/skbuff.c) - */ head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); /* @@ -1823,10 +1851,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, struct rate_selection rsel; struct beacon_data *beacon; struct ieee80211_supported_band *sband; - struct ieee80211_mgmt *mgmt; - int *num_beacons; enum ieee80211_band band = local->hw.conf.channel->band; - u8 *pos; sband = local->hw.wiphy->bands[band]; @@ -1835,7 +1860,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, sdata = vif_to_sdata(vif); bdev = sdata->dev; - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { ap = &sdata->u.ap; beacon = rcu_dereference(ap->beacon); if (ap && beacon) { @@ -1873,11 +1898,9 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, if (beacon->tail) memcpy(skb_put(skb, beacon->tail_len), beacon->tail, beacon->tail_len); - - num_beacons = &ap->num_beacons; } else goto out; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { struct ieee80211_hdr *hdr; ifsta = &sdata->u.sta; @@ -1889,11 +1912,13 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, goto out; hdr = (struct ieee80211_hdr *) skb->data; - hdr->frame_control = IEEE80211_FC(IEEE80211_FTYPE_MGMT, - IEEE80211_STYPE_BEACON); + hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | + IEEE80211_STYPE_BEACON); - num_beacons = &ifsta->num_beacons; } else if (ieee80211_vif_is_mesh(&sdata->vif)) { + struct ieee80211_mgmt *mgmt; + u8 *pos; + /* headroom, head length, tail length and maximum TIM length */ skb = dev_alloc_skb(local->tx_headroom + 400); if (!skb) @@ -1916,9 +1941,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, *pos++ = WLAN_EID_SSID; *pos++ = 0x0; - mesh_mgmt_ies_add(skb, sdata->dev); - - num_beacons = &sdata->u.sta.num_beacons; + mesh_mgmt_ies_add(skb, sdata); } else { WARN_ON(1); goto out; @@ -1929,7 +1952,7 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, skb->do_not_encrypt = 1; info->band = band; - rate_control_get_rate(local->mdev, sband, skb, &rsel); + rate_control_get_rate(sdata, sband, NULL, skb, &rsel); if (unlikely(rsel.rate_idx < 0)) { if (net_ratelimit()) { @@ -1955,7 +1978,6 @@ struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw, info->antenna_sel_tx = local->hw.conf.antenna_sel_tx; info->control.retry_limit = 1; - (*num_beacons)++; out: rcu_read_unlock(); return skb; @@ -2017,7 +2039,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, rcu_read_lock(); beacon = rcu_dereference(bss->beacon); - if (sdata->vif.type != IEEE80211_IF_TYPE_AP || !beacon || !beacon->head) + if (sdata->vif.type != NL80211_IFTYPE_AP || !beacon || !beacon->head) goto out; if (bss->dtim_count != 0) @@ -2039,7 +2061,7 @@ ieee80211_get_buffered_bc(struct ieee80211_hw *hw, cpu_to_le16(IEEE80211_FCTL_MOREDATA); } - if (!ieee80211_tx_prepare(&tx, skb, local->mdev)) + if (!ieee80211_tx_prepare(local, &tx, skb)) break; dev_kfree_skb_any(skb); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 0d463c80c404..cee4884b9d06 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -43,7 +43,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, - enum ieee80211_if_types type) + enum nl80211_iftype type) { __le16 fc = hdr->frame_control; @@ -77,10 +77,10 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, if (ieee80211_is_back_req(fc)) { switch (type) { - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: return hdr->addr2; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: return hdr->addr1; default: break; /* fall through to the return */ @@ -91,45 +91,6 @@ u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len, return NULL; } -int ieee80211_get_hdrlen(u16 fc) -{ - int hdrlen = 24; - - switch (fc & IEEE80211_FCTL_FTYPE) { - case IEEE80211_FTYPE_DATA: - if ((fc & IEEE80211_FCTL_FROMDS) && (fc & IEEE80211_FCTL_TODS)) - hdrlen = 30; /* Addr4 */ - /* - * The QoS Control field is two bytes and its presence is - * indicated by the IEEE80211_STYPE_QOS_DATA bit. Add 2 to - * hdrlen if that bit is set. - * This works by masking out the bit and shifting it to - * bit position 1 so the result has the value 0 or 2. - */ - hdrlen += (fc & IEEE80211_STYPE_QOS_DATA) - >> (ilog2(IEEE80211_STYPE_QOS_DATA)-1); - break; - case IEEE80211_FTYPE_CTL: - /* - * ACK and CTS are 10 bytes, all others 16. To see how - * to get this condition consider - * subtype mask: 0b0000000011110000 (0x00F0) - * ACK subtype: 0b0000000011010000 (0x00D0) - * CTS subtype: 0b0000000011000000 (0x00C0) - * bits that matter: ^^^ (0x00E0) - * value of those: 0b0000000011000000 (0x00C0) - */ - if ((fc & 0xE0) == 0xC0) - hdrlen = 10; - else - hdrlen = 16; - break; - } - - return hdrlen; -} -EXPORT_SYMBOL(ieee80211_get_hdrlen); - unsigned int ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; @@ -270,16 +231,21 @@ __le16 ieee80211_generic_frame_duration(struct ieee80211_hw *hw, struct ieee80211_rate *rate) { struct ieee80211_local *local = hw_to_local(hw); - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; u16 dur; int erp; + bool short_preamble = false; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, erp, - sdata->bss_conf.use_short_preamble); + short_preamble); return cpu_to_le16(dur); } @@ -291,7 +257,7 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; @@ -299,13 +265,17 @@ __le16 ieee80211_rts_duration(struct ieee80211_hw *hw, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = false; rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* CTS duration */ dur = ieee80211_frame_duration(local, 10, rate->bitrate, @@ -328,7 +298,7 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_rate *rate; - struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_sub_if_data *sdata; bool short_preamble; int erp; u16 dur; @@ -336,12 +306,16 @@ __le16 ieee80211_ctstoself_duration(struct ieee80211_hw *hw, sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; - short_preamble = sdata->bss_conf.use_short_preamble; + short_preamble = false; rate = &sband->bitrates[frame_txctl->control.rts_cts_rate_idx]; erp = 0; - if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) - erp = rate->flags & IEEE80211_RATE_ERP_G; + if (vif) { + sdata = vif_to_sdata(vif); + short_preamble = sdata->bss_conf.use_short_preamble; + if (sdata->flags & IEEE80211_SDATA_OPERATING_GMODE) + erp = rate->flags & IEEE80211_RATE_ERP_G; + } /* Data frame duration */ dur = ieee80211_frame_duration(local, frame_len, rate->bitrate, @@ -386,6 +360,13 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_stop_queues); +int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue) +{ + struct ieee80211_local *local = hw_to_local(hw); + return __netif_subqueue_stopped(local->mdev, queue); +} +EXPORT_SYMBOL(ieee80211_queue_stopped); + void ieee80211_wake_queues(struct ieee80211_hw *hw) { int i; @@ -408,15 +389,16 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } if (netif_running(sdata->dev)) @@ -441,15 +423,16 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_INVALID: - case IEEE80211_IF_TYPE_MNTR: - case IEEE80211_IF_TYPE_VLAN: + case __NL80211_IFTYPE_AFTER_LAST: + case NL80211_IFTYPE_UNSPECIFIED: + case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_AP_VLAN: continue; - case IEEE80211_IF_TYPE_AP: - case IEEE80211_IF_TYPE_STA: - case IEEE80211_IF_TYPE_IBSS: - case IEEE80211_IF_TYPE_WDS: - case IEEE80211_IF_TYPE_MESH_POINT: + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_MESH_POINT: break; } if (netif_running(sdata->dev)) @@ -460,3 +443,243 @@ void ieee80211_iterate_active_interfaces_atomic( rcu_read_unlock(); } EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic); + +void ieee802_11_parse_elems(u8 *start, size_t len, + struct ieee802_11_elems *elems) +{ + size_t left = len; + u8 *pos = start; + + memset(elems, 0, sizeof(*elems)); + elems->ie_start = start; + elems->total_len = len; + + while (left >= 2) { + u8 id, elen; + + id = *pos++; + elen = *pos++; + left -= 2; + + if (elen > left) + return; + + switch (id) { + case WLAN_EID_SSID: + elems->ssid = pos; + elems->ssid_len = elen; + break; + case WLAN_EID_SUPP_RATES: + elems->supp_rates = pos; + elems->supp_rates_len = elen; + break; + case WLAN_EID_FH_PARAMS: + elems->fh_params = pos; + elems->fh_params_len = elen; + break; + case WLAN_EID_DS_PARAMS: + elems->ds_params = pos; + elems->ds_params_len = elen; + break; + case WLAN_EID_CF_PARAMS: + elems->cf_params = pos; + elems->cf_params_len = elen; + break; + case WLAN_EID_TIM: + elems->tim = pos; + elems->tim_len = elen; + break; + case WLAN_EID_IBSS_PARAMS: + elems->ibss_params = pos; + elems->ibss_params_len = elen; + break; + case WLAN_EID_CHALLENGE: + elems->challenge = pos; + elems->challenge_len = elen; + break; + case WLAN_EID_WPA: + if (elen >= 4 && pos[0] == 0x00 && pos[1] == 0x50 && + pos[2] == 0xf2) { + /* Microsoft OUI (00:50:F2) */ + if (pos[3] == 1) { + /* OUI Type 1 - WPA IE */ + elems->wpa = pos; + elems->wpa_len = elen; + } else if (elen >= 5 && pos[3] == 2) { + if (pos[4] == 0) { + elems->wmm_info = pos; + elems->wmm_info_len = elen; + } else if (pos[4] == 1) { + elems->wmm_param = pos; + elems->wmm_param_len = elen; + } + } + } + break; + case WLAN_EID_RSN: + elems->rsn = pos; + elems->rsn_len = elen; + break; + case WLAN_EID_ERP_INFO: + elems->erp_info = pos; + elems->erp_info_len = elen; + break; + case WLAN_EID_EXT_SUPP_RATES: + elems->ext_supp_rates = pos; + elems->ext_supp_rates_len = elen; + break; + case WLAN_EID_HT_CAPABILITY: + if (elen >= sizeof(struct ieee80211_ht_cap)) + elems->ht_cap_elem = (void *)pos; + break; + case WLAN_EID_HT_EXTRA_INFO: + if (elen >= sizeof(struct ieee80211_ht_addt_info)) + elems->ht_info_elem = (void *)pos; + break; + case WLAN_EID_MESH_ID: + elems->mesh_id = pos; + elems->mesh_id_len = elen; + break; + case WLAN_EID_MESH_CONFIG: + elems->mesh_config = pos; + elems->mesh_config_len = elen; + break; + case WLAN_EID_PEER_LINK: + elems->peer_link = pos; + elems->peer_link_len = elen; + break; + case WLAN_EID_PREQ: + elems->preq = pos; + elems->preq_len = elen; + break; + case WLAN_EID_PREP: + elems->prep = pos; + elems->prep_len = elen; + break; + case WLAN_EID_PERR: + elems->perr = pos; + elems->perr_len = elen; + break; + case WLAN_EID_CHANNEL_SWITCH: + elems->ch_switch_elem = pos; + elems->ch_switch_elem_len = elen; + break; + case WLAN_EID_QUIET: + if (!elems->quiet_elem) { + elems->quiet_elem = pos; + elems->quiet_elem_len = elen; + } + elems->num_of_quiet_elem++; + break; + case WLAN_EID_COUNTRY: + elems->country_elem = pos; + elems->country_elem_len = elen; + break; + case WLAN_EID_PWR_CONSTRAINT: + elems->pwr_constr_elem = pos; + elems->pwr_constr_elem_len = elen; + break; + default: + break; + } + + left -= elen; + pos += elen; + } +} + +void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_tx_queue_params qparam; + int i; + + if (!local->ops->conf_tx) + return; + + memset(&qparam, 0, sizeof(qparam)); + + qparam.aifs = 2; + + if (local->hw.conf.channel->band == IEEE80211_BAND_2GHZ && + !(sdata->flags & IEEE80211_SDATA_OPERATING_GMODE)) + qparam.cw_min = 31; + else + qparam.cw_min = 15; + + qparam.cw_max = 1023; + qparam.txop = 0; + + for (i = 0; i < local_to_hw(local)->queues; i++) + local->ops->conf_tx(local_to_hw(local), i, &qparam); +} + +void ieee80211_tx_skb(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb, + int encrypt) +{ + skb->dev = sdata->local->mdev; + skb_set_mac_header(skb, 0); + skb_set_network_header(skb, 0); + skb_set_transport_header(skb, 0); + + skb->iif = sdata->dev->ifindex; + skb->do_not_encrypt = !encrypt; + + dev_queue_xmit(skb); +} + +int ieee80211_set_freq(struct ieee80211_sub_if_data *sdata, int freqMHz) +{ + int ret = -EINVAL; + struct ieee80211_channel *chan; + struct ieee80211_local *local = sdata->local; + + chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); + + if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { + if (sdata->vif.type == NL80211_IFTYPE_ADHOC && + chan->flags & IEEE80211_CHAN_NO_IBSS) { + printk(KERN_DEBUG "%s: IBSS not allowed on frequency " + "%d MHz\n", sdata->dev->name, chan->center_freq); + return ret; + } + local->oper_channel = chan; + + if (local->sw_scanning || local->hw_scanning) + ret = 0; + else + ret = ieee80211_hw_config(local); + + rate_control_clear(local); + } + + return ret; +} + +u64 ieee80211_mandatory_rates(struct ieee80211_local *local, + enum ieee80211_band band) +{ + struct ieee80211_supported_band *sband; + struct ieee80211_rate *bitrates; + u64 mandatory_rates; + enum ieee80211_rate_flags mandatory_flag; + int i; + + sband = local->hw.wiphy->bands[band]; + if (!sband) { + WARN_ON(1); + sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; + } + + if (band == IEEE80211_BAND_2GHZ) + mandatory_flag = IEEE80211_RATE_MANDATORY_B; + else + mandatory_flag = IEEE80211_RATE_MANDATORY_A; + + bitrates = sband->bitrates; + mandatory_rates = 0; + for (i = 0; i < sband->n_bitrates; i++) + if (bitrates[i].flags & mandatory_flag) + mandatory_rates |= BIT(i); + return mandatory_rates; +} diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 5c2bf0a3d4db..f0e2d3ecb5c4 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -228,11 +228,10 @@ int ieee80211_wep_decrypt(struct ieee80211_local *local, struct sk_buff *skb, return -1; hdrlen = ieee80211_hdrlen(hdr->frame_control); - - if (skb->len < 8 + hdrlen) + if (skb->len < hdrlen + WEP_IV_LEN + WEP_ICV_LEN) return -1; - len = skb->len - hdrlen - 8; + len = skb->len - hdrlen - WEP_IV_LEN - WEP_ICV_LEN; keyidx = skb->data[hdrlen + 3] >> 6; @@ -292,9 +291,10 @@ u8 * ieee80211_wep_is_weak_iv(struct sk_buff *skb, struct ieee80211_key *key) ieee80211_rx_result ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) { - if ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_DATA && - ((rx->fc & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT || - (rx->fc & IEEE80211_FCTL_STYPE) != IEEE80211_STYPE_AUTH)) + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + + if (!ieee80211_is_data(hdr->frame_control) && + !ieee80211_is_auth(hdr->frame_control)) return RX_CONTINUE; if (!(rx->status->flag & RX_FLAG_DECRYPTED)) { @@ -303,7 +303,7 @@ ieee80211_crypto_wep_decrypt(struct ieee80211_rx_data *rx) } else if (!(rx->status->flag & RX_FLAG_IV_STRIPPED)) { ieee80211_wep_remove_iv(rx->local, rx->skb, rx->key); /* remove ICV */ - skb_trim(rx->skb, rx->skb->len - 4); + skb_trim(rx->skb, rx->skb->len - WEP_ICV_LEN); } return RX_CONTINUE; @@ -313,9 +313,6 @@ static int wep_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) { struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - info->control.iv_len = WEP_IV_LEN; - info->control.icv_len = WEP_ICV_LEN; - if (!(tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE)) { if (ieee80211_wep_encrypt(tx->local, skb, tx->key)) return -1; diff --git a/net/mac80211/wext.c b/net/mac80211/wext.c index 34fa8ed1e784..742f811ca416 100644 --- a/net/mac80211/wext.c +++ b/net/mac80211/wext.c @@ -27,22 +27,19 @@ #include "aes_ccm.h" -static int ieee80211_set_encryption(struct net_device *dev, u8 *sta_addr, +static int ieee80211_set_encryption(struct ieee80211_sub_if_data *sdata, u8 *sta_addr, int idx, int alg, int remove, int set_tx_key, const u8 *_key, size_t key_len) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_local *local = sdata->local; struct sta_info *sta; struct ieee80211_key *key; - struct ieee80211_sub_if_data *sdata; int err; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (idx < 0 || idx >= NUM_DEFAULT_KEYS) { printk(KERN_DEBUG "%s: set_encrypt - invalid idx=%d\n", - dev->name, idx); + sdata->dev->name, idx); return -EINVAL; } @@ -125,13 +122,13 @@ static int ieee80211_ioctl_siwgenie(struct net_device *dev, if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) return -EOPNOTSUPP; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - int ret = ieee80211_sta_set_extra_ie(dev, extra, data->length); + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + int ret = ieee80211_sta_set_extra_ie(sdata, extra, data->length); if (ret) return ret; sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; } @@ -276,21 +273,21 @@ static int ieee80211_ioctl_siwmode(struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int type; - if (sdata->vif.type == IEEE80211_IF_TYPE_VLAN) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; switch (*mode) { case IW_MODE_INFRA: - type = IEEE80211_IF_TYPE_STA; + type = NL80211_IFTYPE_STATION; break; case IW_MODE_ADHOC: - type = IEEE80211_IF_TYPE_IBSS; + type = NL80211_IFTYPE_ADHOC; break; case IW_MODE_REPEAT: - type = IEEE80211_IF_TYPE_WDS; + type = NL80211_IFTYPE_WDS; break; case IW_MODE_MONITOR: - type = IEEE80211_IF_TYPE_MNTR; + type = NL80211_IFTYPE_MONITOR; break; default: return -EINVAL; @@ -308,22 +305,22 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); switch (sdata->vif.type) { - case IEEE80211_IF_TYPE_AP: + case NL80211_IFTYPE_AP: *mode = IW_MODE_MASTER; break; - case IEEE80211_IF_TYPE_STA: + case NL80211_IFTYPE_STATION: *mode = IW_MODE_INFRA; break; - case IEEE80211_IF_TYPE_IBSS: + case NL80211_IFTYPE_ADHOC: *mode = IW_MODE_ADHOC; break; - case IEEE80211_IF_TYPE_MNTR: + case NL80211_IFTYPE_MONITOR: *mode = IW_MODE_MONITOR; break; - case IEEE80211_IF_TYPE_WDS: + case NL80211_IFTYPE_WDS: *mode = IW_MODE_REPEAT; break; - case IEEE80211_IF_TYPE_VLAN: + case NL80211_IFTYPE_AP_VLAN: *mode = IW_MODE_SECOND; /* FIXME */ break; default: @@ -333,60 +330,31 @@ static int ieee80211_ioctl_giwmode(struct net_device *dev, return 0; } -int ieee80211_set_freq(struct net_device *dev, int freqMHz) -{ - int ret = -EINVAL; - struct ieee80211_channel *chan; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - - chan = ieee80211_get_channel(local->hw.wiphy, freqMHz); - - if (chan && !(chan->flags & IEEE80211_CHAN_DISABLED)) { - if (sdata->vif.type == IEEE80211_IF_TYPE_IBSS && - chan->flags & IEEE80211_CHAN_NO_IBSS) { - printk(KERN_DEBUG "%s: IBSS not allowed on frequency " - "%d MHz\n", dev->name, chan->center_freq); - return ret; - } - local->oper_channel = chan; - - if (local->sta_sw_scanning || local->sta_hw_scanning) - ret = 0; - else - ret = ieee80211_hw_config(local); - - rate_control_clear(local); - } - - return ret; -} - static int ieee80211_ioctl_siwfreq(struct net_device *dev, struct iw_request_info *info, struct iw_freq *freq, char *extra) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_CHANNEL_SEL; /* freq->e == 0: freq->m = channel; otherwise freq = m * 10^e */ if (freq->e == 0) { if (freq->m < 0) { - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) + if (sdata->vif.type == NL80211_IFTYPE_STATION) sdata->u.sta.flags |= IEEE80211_STA_AUTO_CHANNEL_SEL; return 0; } else - return ieee80211_set_freq(dev, + return ieee80211_set_freq(sdata, ieee80211_channel_to_frequency(freq->m)); } else { int i, div = 1000000; for (i = 0; i < freq->e; i++) div /= 10; if (div > 0) - return ieee80211_set_freq(dev, freq->m / div); + return ieee80211_set_freq(sdata, freq->m / div); else return -EINVAL; } @@ -418,8 +386,8 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, len--; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { if (len > IEEE80211_MAX_SSID_LEN) @@ -432,14 +400,14 @@ static int ieee80211_ioctl_siwessid(struct net_device *dev, sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_SSID_SEL; else sdata->u.sta.flags |= IEEE80211_STA_AUTO_SSID_SEL; - ret = ieee80211_sta_set_ssid(dev, ssid, len); + ret = ieee80211_sta_set_ssid(sdata, ssid, len); if (ret) return ret; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { memcpy(sdata->u.ap.ssid, ssid, len); memset(sdata->u.ap.ssid + len, 0, IEEE80211_MAX_SSID_LEN - len); @@ -458,9 +426,9 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - int res = ieee80211_sta_get_ssid(dev, ssid, &len); + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + int res = ieee80211_sta_get_ssid(sdata, ssid, &len); if (res == 0) { data->length = len; data->flags = 1; @@ -469,7 +437,7 @@ static int ieee80211_ioctl_giwessid(struct net_device *dev, return res; } - if (sdata->vif.type == IEEE80211_IF_TYPE_AP) { + if (sdata->vif.type == NL80211_IFTYPE_AP) { len = sdata->u.ap.ssid_len; if (len > IW_ESSID_MAX_SIZE) len = IW_ESSID_MAX_SIZE; @@ -489,8 +457,8 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { int ret; if (sdata->flags & IEEE80211_SDATA_USERSPACE_MLME) { memcpy(sdata->u.sta.bssid, (u8 *) &ap_addr->sa_data, @@ -504,12 +472,12 @@ static int ieee80211_ioctl_siwap(struct net_device *dev, sdata->u.sta.flags |= IEEE80211_STA_AUTO_BSSID_SEL; else sdata->u.sta.flags &= ~IEEE80211_STA_AUTO_BSSID_SEL; - ret = ieee80211_sta_set_bssid(dev, (u8 *) &ap_addr->sa_data); + ret = ieee80211_sta_set_bssid(sdata, (u8 *) &ap_addr->sa_data); if (ret) return ret; - ieee80211_sta_req_auth(dev, &sdata->u.sta); + ieee80211_sta_req_auth(sdata, &sdata->u.sta); return 0; - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { /* * If it is necessary to update the WDS peer address * while the interface is running, then we need to do @@ -537,10 +505,10 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, struct ieee80211_sub_if_data *sdata; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) { - if (sdata->u.sta.state == IEEE80211_ASSOCIATED || - sdata->u.sta.state == IEEE80211_IBSS_JOINED) { + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) { + if (sdata->u.sta.state == IEEE80211_STA_MLME_ASSOCIATED || + sdata->u.sta.state == IEEE80211_STA_MLME_IBSS_JOINED) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.sta.bssid, ETH_ALEN); return 0; @@ -548,7 +516,7 @@ static int ieee80211_ioctl_giwap(struct net_device *dev, memset(&ap_addr->sa_data, 0, ETH_ALEN); return 0; } - } else if (sdata->vif.type == IEEE80211_IF_TYPE_WDS) { + } else if (sdata->vif.type == NL80211_IFTYPE_WDS) { ap_addr->sa_family = ARPHRD_ETHER; memcpy(&ap_addr->sa_data, sdata->u.wds.remote_addr, ETH_ALEN); return 0; @@ -570,10 +538,10 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, if (!netif_running(dev)) return -ENETDOWN; - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS && - sdata->vif.type != IEEE80211_IF_TYPE_MESH_POINT && - sdata->vif.type != IEEE80211_IF_TYPE_AP) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_AP) return -EOPNOTSUPP; /* if SSID was specified explicitly then use that */ @@ -584,7 +552,7 @@ static int ieee80211_ioctl_siwscan(struct net_device *dev, ssid_len = req->essid_len; } - return ieee80211_sta_req_scan(dev, ssid, ssid_len); + return ieee80211_request_scan(sdata, ssid, ssid_len); } @@ -594,11 +562,14 @@ static int ieee80211_ioctl_giwscan(struct net_device *dev, { int res; struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); + struct ieee80211_sub_if_data *sdata; + + sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (local->sta_sw_scanning || local->sta_hw_scanning) + if (local->sw_scanning || local->hw_scanning) return -EAGAIN; - res = ieee80211_sta_scan_results(dev, info, extra, data->length); + res = ieee80211_scan_results(local, info, extra, data->length); if (res >= 0) { data->length = res; return 0; @@ -656,7 +627,7 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; sband = local->hw.wiphy->bands[local->hw.conf.channel->band]; @@ -665,8 +636,8 @@ static int ieee80211_ioctl_giwrate(struct net_device *dev, sta = sta_info_get(local, sdata->u.sta.bssid); - if (sta && sta->txrate_idx < sband->n_bitrates) - rate->value = sband->bitrates[sta->txrate_idx].bitrate; + if (sta && sta->last_txrate_idx < sband->n_bitrates) + rate->value = sband->bitrates[sta->last_txrate_idx].bitrate; else rate->value = 0; @@ -804,7 +775,7 @@ static int ieee80211_ioctl_siwfrag(struct net_device *dev, * configure it here */ if (local->ops->set_frag_threshold) - local->ops->set_frag_threshold( + return local->ops->set_frag_threshold( local_to_hw(local), local->fragmentation_threshold); @@ -887,17 +858,17 @@ static int ieee80211_ioctl_siwmlme(struct net_device *dev, struct iw_mlme *mlme = (struct iw_mlme *) extra; sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != IEEE80211_IF_TYPE_STA && - sdata->vif.type != IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type != NL80211_IFTYPE_STATION && + sdata->vif.type != NL80211_IFTYPE_ADHOC) return -EINVAL; switch (mlme->cmd) { case IW_MLME_DEAUTH: /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_deauthenticate(dev, mlme->reason_code); + return ieee80211_sta_deauthenticate(sdata, mlme->reason_code); case IW_MLME_DISASSOC: /* TODO: mlme->addr.sa_data */ - return ieee80211_sta_disassociate(dev, mlme->reason_code); + return ieee80211_sta_disassociate(sdata, mlme->reason_code); default: return -EOPNOTSUPP; } @@ -938,7 +909,7 @@ static int ieee80211_ioctl_siwencode(struct net_device *dev, } return ieee80211_set_encryption( - dev, bcaddr, + sdata, bcaddr, idx, alg, remove, !sdata->default_key, keybuf, erq->length); @@ -983,7 +954,7 @@ static int ieee80211_ioctl_giwencode(struct net_device *dev, erq->length = sdata->keys[idx]->conf.keylen; erq->flags |= IW_ENCODE_ENABLED; - if (sdata->vif.type == IEEE80211_IF_TYPE_STA) { + if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct ieee80211_if_sta *ifsta = &sdata->u.sta; switch (ifsta->auth_alg) { case WLAN_AUTH_OPEN: @@ -1057,7 +1028,7 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, sdata->drop_unencrypted = !!data->value; break; case IW_AUTH_PRIVACY_INVOKED: - if (sdata->vif.type != IEEE80211_IF_TYPE_STA) + if (sdata->vif.type != NL80211_IFTYPE_STATION) ret = -EINVAL; else { sdata->u.sta.flags &= ~IEEE80211_STA_PRIVACY_INVOKED; @@ -1072,8 +1043,8 @@ static int ieee80211_ioctl_siwauth(struct net_device *dev, } break; case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sdata->u.sta.auth_algs = data->value; else ret = -EOPNOTSUPP; @@ -1095,8 +1066,8 @@ static struct iw_statistics *ieee80211_get_wireless_stats(struct net_device *dev rcu_read_lock(); - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) sta = sta_info_get(local, sdata->u.sta.bssid); if (!sta) { wstats->discard.fragment = 0; @@ -1126,8 +1097,8 @@ static int ieee80211_ioctl_giwauth(struct net_device *dev, switch (data->flags & IW_AUTH_INDEX) { case IW_AUTH_80211_AUTH_ALG: - if (sdata->vif.type == IEEE80211_IF_TYPE_STA || - sdata->vif.type == IEEE80211_IF_TYPE_IBSS) + if (sdata->vif.type == NL80211_IFTYPE_STATION || + sdata->vif.type == NL80211_IFTYPE_ADHOC) data->value = sdata->u.sta.auth_algs; else ret = -EOPNOTSUPP; @@ -1184,7 +1155,7 @@ static int ieee80211_ioctl_siwencodeext(struct net_device *dev, } else idx--; - return ieee80211_set_encryption(dev, ext->addr.sa_data, idx, alg, + return ieee80211_set_encryption(sdata, ext->addr.sa_data, idx, alg, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c index 4310e2f65661..139b5f267b34 100644 --- a/net/mac80211/wme.c +++ b/net/mac80211/wme.c @@ -39,7 +39,7 @@ static unsigned int classify_1d(struct sk_buff *skb) return skb->priority - 256; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): dscp = ip_hdr(skb)->tos & 0xfc; break; @@ -47,8 +47,6 @@ static unsigned int classify_1d(struct sk_buff *skb) return 0; } - if (dscp & 0x1c) - return 0; return dscp >> 5; } @@ -75,9 +73,8 @@ static int wme_downgrade_ac(struct sk_buff *skb) /* Indicate which queue to use. */ -static u16 classify80211(struct sk_buff *skb, struct net_device *dev) +static u16 classify80211(struct ieee80211_local *local, struct sk_buff *skb) { - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; if (!ieee80211_is_data(hdr->frame_control)) { @@ -115,14 +112,15 @@ static u16 classify80211(struct sk_buff *skb, struct net_device *dev) u16 ieee80211_select_queue(struct net_device *dev, struct sk_buff *skb) { + struct ieee80211_master_priv *mpriv = netdev_priv(dev); + struct ieee80211_local *local = mpriv->local; struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct sta_info *sta; u16 queue; u8 tid; - queue = classify80211(skb, dev); + queue = classify80211(local, skb); if (unlikely(queue >= local->hw.queues)) queue = local->hw.queues - 1; @@ -212,7 +210,7 @@ int ieee80211_ht_agg_queue_add(struct ieee80211_local *local, DECLARE_MAC_BUF(mac); printk(KERN_DEBUG "allocated aggregation queue" " %d tid %d addr %s pool=0x%lX\n", - i, tid, print_mac(mac, sta->addr), + i, tid, print_mac(mac, sta->sta.addr), local->queue_pool[0]); } #endif /* CONFIG_MAC80211_HT_DEBUG */ diff --git a/net/mac80211/wme.h b/net/mac80211/wme.h index 04de28c071a6..bc62f28a4d3d 100644 --- a/net/mac80211/wme.h +++ b/net/mac80211/wme.h @@ -1,5 +1,4 @@ /* - * IEEE 802.11 driver (80211.o) - QoS datatypes * Copyright 2004, Instant802 Networks, Inc. * Copyright 2005, Devicescape Software, Inc. * @@ -14,8 +13,6 @@ #include <linux/netdevice.h> #include "ieee80211_i.h" -#define QOS_CONTROL_LEN 2 - #define QOS_CONTROL_ACK_POLICY_NORMAL 0 #define QOS_CONTROL_ACK_POLICY_NOACK 1 diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 2f33df0dcccf..6db649480e8f 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -127,7 +127,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - mac80211_ev_michael_mic_failure(rx->dev, rx->key->conf.keyidx, + mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, (void *) skb->data); return RX_DROP_UNUSABLE; } @@ -152,9 +152,6 @@ static int tkip_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) int len, tail; u8 *pos; - info->control.icv_len = TKIP_ICV_LEN; - info->control.iv_len = TKIP_IV_LEN; - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for IV/ICV */ @@ -256,7 +253,7 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) res = ieee80211_tkip_decrypt_data(rx->local->wep_rx_tfm, key, skb->data + hdrlen, - skb->len - hdrlen, rx->sta->addr, + skb->len - hdrlen, rx->sta->sta.addr, hdr->addr1, hwaccel, rx->queue, &rx->tkip_iv32, &rx->tkip_iv16); @@ -374,9 +371,6 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) u8 *pos, *pn; int i; - info->control.icv_len = CCMP_MIC_LEN; - info->control.iv_len = CCMP_HDR_LEN; - if ((tx->key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE) && !(tx->key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) { /* hwaccel - with no need for preallocated room for CCMP " diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ee898e74808d..25dcef9f2194 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -38,10 +38,11 @@ config NF_CONNTRACK To compile it as a module, choose M here. If unsure, say N. +if NF_CONNTRACK + config NF_CT_ACCT bool "Connection tracking flow accounting" depends on NETFILTER_ADVANCED - depends on NF_CONNTRACK help If this option is enabled, the connection tracking code will keep per-flow packet and byte counters. @@ -63,7 +64,6 @@ config NF_CT_ACCT config NF_CONNTRACK_MARK bool 'Connection mark tracking support' depends on NETFILTER_ADVANCED - depends on NF_CONNTRACK help This option enables support for connection marks, used by the `CONNMARK' target and `connmark' match. Similar to the mark value @@ -72,7 +72,7 @@ config NF_CONNTRACK_MARK config NF_CONNTRACK_SECMARK bool 'Connection tracking security mark support' - depends on NF_CONNTRACK && NETWORK_SECMARK + depends on NETWORK_SECMARK default m if NETFILTER_ADVANCED=n help This option enables security markings to be applied to @@ -85,7 +85,6 @@ config NF_CONNTRACK_SECMARK config NF_CONNTRACK_EVENTS bool "Connection tracking events" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help If this option is enabled, the connection tracking code will @@ -96,7 +95,7 @@ config NF_CONNTRACK_EVENTS config NF_CT_PROTO_DCCP tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_DCCP help @@ -107,11 +106,10 @@ config NF_CT_PROTO_DCCP config NF_CT_PROTO_GRE tristate - depends on NF_CONNTRACK config NF_CT_PROTO_SCTP tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_SCTP help @@ -123,7 +121,6 @@ config NF_CT_PROTO_SCTP config NF_CT_PROTO_UDPLITE tristate 'UDP-Lite protocol connection tracking support' - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help With this option enabled, the layer 3 independent connection @@ -134,7 +131,6 @@ config NF_CT_PROTO_UDPLITE config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select TEXTSEARCH select TEXTSEARCH_KMP @@ -150,7 +146,6 @@ config NF_CONNTRACK_AMANDA config NF_CONNTRACK_FTP tristate "FTP protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help Tracking FTP connections is problematic: special helpers are @@ -165,7 +160,7 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" - depends on NF_CONNTRACK && (IPV6 || IPV6=n) + depends on (IPV6 || IPV6=n) depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most @@ -185,7 +180,6 @@ config NF_CONNTRACK_H323 config NF_CONNTRACK_IRC tristate "IRC protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help There is a commonly-used extension to IRC called @@ -201,7 +195,6 @@ config NF_CONNTRACK_IRC config NF_CONNTRACK_NETBIOS_NS tristate "NetBIOS name service protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help NetBIOS name service requests are sent as broadcast messages from an @@ -221,7 +214,6 @@ config NF_CONNTRACK_NETBIOS_NS config NF_CONNTRACK_PPTP tristate "PPtP protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CT_PROTO_GRE help @@ -241,7 +233,7 @@ config NF_CONNTRACK_PPTP config NF_CONNTRACK_SANE tristate "SANE protocol support (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED help SANE is a protocol for remote access to scanners as implemented @@ -255,7 +247,6 @@ config NF_CONNTRACK_SANE config NF_CONNTRACK_SIP tristate "SIP protocol support" - depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help SIP is an application-layer control protocol that can establish, @@ -268,7 +259,6 @@ config NF_CONNTRACK_SIP config NF_CONNTRACK_TFTP tristate "TFTP protocol support" - depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help TFTP connection tracking helper, this is required depending @@ -280,13 +270,28 @@ config NF_CONNTRACK_TFTP config NF_CT_NETLINK tristate 'Connection tracking netlink interface' - depends on NF_CONNTRACK select NETFILTER_NETLINK - depends on NF_NAT=n || NF_NAT default m if NETFILTER_ADVANCED=n help This option enables support for a netlink-based userspace interface +# transparent proxy support +config NETFILTER_TPROXY + tristate "Transparent proxying support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option enables transparent proxying support, that is, + support for handling non-locally bound IPv4 TCP and UDP sockets. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. + + To compile it as a module, choose M here. If unsure, say N. + +endif # NF_CONNTRACK + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n @@ -294,11 +299,12 @@ config NETFILTER_XTABLES This is required if you intend to use any of ip_tables, ip6_tables or arp_tables. +if NETFILTER_XTABLES + # alphabetically ordered list of targets config NETFILTER_XT_TARGET_CLASSIFY tristate '"CLASSIFY" target support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `CLASSIFY' target, which enables the user to set @@ -311,8 +317,6 @@ config NETFILTER_XT_TARGET_CLASSIFY config NETFILTER_XT_TARGET_CONNMARK tristate '"CONNMARK" target support' - depends on NETFILTER_XTABLES - depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK @@ -325,9 +329,20 @@ config NETFILTER_XT_TARGET_CONNMARK <file:Documentation/kbuild/modules.txt>. The module will be called ipt_CONNMARK.ko. If unsure, say `N'. +config NETFILTER_XT_TARGET_CONNSECMARK + tristate '"CONNSECMARK" target support' + depends on NF_CONNTRACK && NF_CONNTRACK_SECMARK + default m if NETFILTER_ADVANCED=n + help + The CONNSECMARK target copies security markings from packets + to connections, and restores security markings from connections + to packets (if the packets are not already marked). This would + normally be used in conjunction with the SECMARK target. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_DSCP tristate '"DSCP" and "TOS" target support' - depends on NETFILTER_XTABLES depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -344,7 +359,6 @@ config NETFILTER_XT_TARGET_DSCP config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help This option adds a `MARK' target, which allows you to create rules @@ -356,21 +370,8 @@ config NETFILTER_XT_TARGET_MARK To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_NFQUEUE - tristate '"NFQUEUE" target Support' - depends on NETFILTER_XTABLES - depends on NETFILTER_ADVANCED - help - This target replaced the old obsolete QUEUE target. - - As opposed to QUEUE, it supports 65535 different queues, - not just one. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help This option enables the NFLOG target, which allows to LOG @@ -380,9 +381,19 @@ config NETFILTER_XT_TARGET_NFLOG To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NFQUEUE + tristate '"NFQUEUE" target Support' + depends on NETFILTER_ADVANCED + help + This target replaced the old obsolete QUEUE target. + + As opposed to QUEUE, it supports 65535 different queues, + not just one. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NOTRACK tristate '"NOTRACK" target support' - depends on NETFILTER_XTABLES depends on IP_NF_RAW || IP6_NF_RAW depends on NF_CONNTRACK depends on NETFILTER_ADVANCED @@ -397,7 +408,6 @@ config NETFILTER_XT_TARGET_NOTRACK config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `RATEEST' target, which allows to measure @@ -406,9 +416,23 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TPROXY + tristate '"TPROXY" target support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `TPROXY' target, which is somewhat similar to + REDIRECT. It can only be used in the mangle table and is useful + to redirect traffic to a transparent proxy. It does _not_ depend + on Netfilter connection tracking and NAT, unlike REDIRECT. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TRACE tristate '"TRACE" target support' - depends on NETFILTER_XTABLES depends on IP_NF_RAW || IP6_NF_RAW depends on NETFILTER_ADVANCED help @@ -421,7 +445,7 @@ config NETFILTER_XT_TARGET_TRACE config NETFILTER_XT_TARGET_SECMARK tristate '"SECMARK" target support' - depends on NETFILTER_XTABLES && NETWORK_SECMARK + depends on NETWORK_SECMARK default m if NETFILTER_ADVANCED=n help The SECMARK target allows security marking of network @@ -429,21 +453,9 @@ config NETFILTER_XT_TARGET_SECMARK To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_TARGET_CONNSECMARK - tristate '"CONNSECMARK" target support' - depends on NETFILTER_XTABLES && NF_CONNTRACK && NF_CONNTRACK_SECMARK - default m if NETFILTER_ADVANCED=n - help - The CONNSECMARK target copies security markings from packets - to connections, and restores security markings from connections - to packets (if the packets are not already marked). This would - normally be used in conjunction with the SECMARK target. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' - depends on NETFILTER_XTABLES && (IPV6 || IPV6=n) + depends on (IPV6 || IPV6=n) default m if NETFILTER_ADVANCED=n ---help--- This option adds a `TCPMSS' target, which allows you to alter the @@ -470,7 +482,7 @@ config NETFILTER_XT_TARGET_TCPMSS config NETFILTER_XT_TARGET_TCPOPTSTRIP tristate '"TCPOPTSTRIP" target support (EXPERIMENTAL)' - depends on EXPERIMENTAL && NETFILTER_XTABLES + depends on EXPERIMENTAL depends on IP_NF_MANGLE || IP6_NF_MANGLE depends on NETFILTER_ADVANCED help @@ -479,7 +491,6 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `comment' dummy-match, which allows you to put @@ -490,7 +501,6 @@ config NETFILTER_XT_MATCH_COMMENT config NETFILTER_XT_MATCH_CONNBYTES tristate '"connbytes" per-connection counter match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CT_ACCT @@ -503,7 +513,6 @@ config NETFILTER_XT_MATCH_CONNBYTES config NETFILTER_XT_MATCH_CONNLIMIT tristate '"connlimit" match support"' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED ---help--- @@ -512,7 +521,6 @@ config NETFILTER_XT_MATCH_CONNLIMIT config NETFILTER_XT_MATCH_CONNMARK tristate '"connmark" connection mark match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED select NF_CONNTRACK_MARK @@ -526,7 +534,6 @@ config NETFILTER_XT_MATCH_CONNMARK config NETFILTER_XT_MATCH_CONNTRACK tristate '"conntrack" connection tracking match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help @@ -540,7 +547,6 @@ config NETFILTER_XT_MATCH_CONNTRACK config NETFILTER_XT_MATCH_DCCP tristate '"dccp" protocol match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED default IP_DCCP help @@ -553,7 +559,6 @@ config NETFILTER_XT_MATCH_DCCP config NETFILTER_XT_MATCH_DSCP tristate '"dscp" and "tos" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `DSCP' match, which allows you to match against @@ -569,7 +574,6 @@ config NETFILTER_XT_MATCH_DSCP config NETFILTER_XT_MATCH_ESP tristate '"esp" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This match extension allows you to match a range of SPIs @@ -577,9 +581,23 @@ config NETFILTER_XT_MATCH_ESP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_HASHLIMIT + tristate '"hashlimit" match support' + depends on (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) + depends on NETFILTER_ADVANCED + help + This option adds a `hashlimit' match. + + As opposed to `limit', this match dynamically creates a hash table + of limit buckets, based on your selection of source/destination + addresses and/or ports. + + It enables you to express policies like `10kpps for any given + destination address' or `500pps from any given source address' + with a single rule. + config NETFILTER_XT_MATCH_HELPER tristate '"helper" match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK depends on NETFILTER_ADVANCED help @@ -590,7 +608,6 @@ config NETFILTER_XT_MATCH_HELPER config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- This option adds a "iprange" match, which allows you to match based on @@ -601,7 +618,6 @@ config NETFILTER_XT_MATCH_IPRANGE config NETFILTER_XT_MATCH_LENGTH tristate '"length" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option allows you to match the length of a packet against a @@ -611,7 +627,6 @@ config NETFILTER_XT_MATCH_LENGTH config NETFILTER_XT_MATCH_LIMIT tristate '"limit" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help limit matching allows you to control the rate at which a rule can be @@ -622,7 +637,6 @@ config NETFILTER_XT_MATCH_LIMIT config NETFILTER_XT_MATCH_MAC tristate '"mac" address match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help MAC matching allows you to match packets based on the source @@ -632,7 +646,6 @@ config NETFILTER_XT_MATCH_MAC config NETFILTER_XT_MATCH_MARK tristate '"mark" match support' - depends on NETFILTER_XTABLES default m if NETFILTER_ADVANCED=n help Netfilter mark matching allows you to match packets based on the @@ -641,9 +654,18 @@ config NETFILTER_XT_MATCH_MARK To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_MULTIPORT + tristate '"multiport" Multiple port match support' + depends on NETFILTER_ADVANCED + help + Multiport matching allows you to match TCP or UDP packets based on + a series of source or destination ports: normally a rule can only + match a single range of ports. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_OWNER tristate '"owner" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- Socket owner matching allows you to match locally-generated packets @@ -652,7 +674,7 @@ config NETFILTER_XT_MATCH_OWNER config NETFILTER_XT_MATCH_POLICY tristate 'IPsec "policy" match support' - depends on NETFILTER_XTABLES && XFRM + depends on XFRM default m if NETFILTER_ADVANCED=n help Policy matching allows you to match packets based on the @@ -661,20 +683,9 @@ config NETFILTER_XT_MATCH_POLICY To compile it as a module, choose M here. If unsure, say N. -config NETFILTER_XT_MATCH_MULTIPORT - tristate '"multiport" Multiple port match support' - depends on NETFILTER_XTABLES - depends on NETFILTER_ADVANCED - help - Multiport matching allows you to match TCP or UDP packets based on - a series of source or destination ports: normally a rule can only - match a single range of ports. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_MATCH_PHYSDEV tristate '"physdev" match support' - depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER + depends on BRIDGE && BRIDGE_NETFILTER depends on NETFILTER_ADVANCED help Physdev packet matching matches against the physical bridge ports @@ -684,7 +695,6 @@ config NETFILTER_XT_MATCH_PHYSDEV config NETFILTER_XT_MATCH_PKTTYPE tristate '"pkttype" packet type match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help Packet type matching allows you to match a packet by @@ -697,7 +707,6 @@ config NETFILTER_XT_MATCH_PKTTYPE config NETFILTER_XT_MATCH_QUOTA tristate '"quota" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `quota' match, which allows to match on a @@ -708,7 +717,6 @@ config NETFILTER_XT_MATCH_QUOTA config NETFILTER_XT_MATCH_RATEEST tristate '"rateest" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select NETFILTER_XT_TARGET_RATEEST help @@ -719,7 +727,6 @@ config NETFILTER_XT_MATCH_RATEEST config NETFILTER_XT_MATCH_REALM tristate '"realm" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select NET_CLS_ROUTE help @@ -732,9 +739,26 @@ config NETFILTER_XT_MATCH_REALM If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_RECENT + tristate '"recent" match support' + depends on NETFILTER_ADVANCED + ---help--- + This match is used for creating one or many lists of recently + used addresses and then matching against that/those list(s). + + Short options are available by using 'iptables -m recent -h' + Official Website: <http://snowman.net/projects/ipt_recent/> + +config NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + bool 'Enable obsolete /proc/net/ipt_recent' + depends on NETFILTER_XT_MATCH_RECENT && PROC_FS + ---help--- + This option enables the old /proc/net/ipt_recent interface, + which has been obsoleted by /proc/net/xt_recent. + config NETFILTER_XT_MATCH_SCTP tristate '"sctp" protocol match support (EXPERIMENTAL)' - depends on NETFILTER_XTABLES && EXPERIMENTAL + depends on EXPERIMENTAL depends on NETFILTER_ADVANCED default IP_SCTP help @@ -745,9 +769,23 @@ config NETFILTER_XT_MATCH_SCTP If you want to compile it as a module, say M here and read <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. +config NETFILTER_XT_MATCH_SOCKET + tristate '"socket" match support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `socket' match, which can be used to match + packets for which a TCP or UDP socket lookup finds a valid socket. + It can be used in combination with the MARK target and policy + routing to implement full featured non-locally bound sockets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_STATE tristate '"state" match support' - depends on NETFILTER_XTABLES depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n help @@ -759,7 +797,6 @@ config NETFILTER_XT_MATCH_STATE config NETFILTER_XT_MATCH_STATISTIC tristate '"statistic" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `statistic' match, which allows you to match @@ -769,7 +806,6 @@ config NETFILTER_XT_MATCH_STATISTIC config NETFILTER_XT_MATCH_STRING tristate '"string" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED select TEXTSEARCH select TEXTSEARCH_KMP @@ -783,7 +819,6 @@ config NETFILTER_XT_MATCH_STRING config NETFILTER_XT_MATCH_TCPMSS tristate '"tcpmss" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED help This option adds a `tcpmss' match, which allows you to examine the @@ -794,7 +829,6 @@ config NETFILTER_XT_MATCH_TCPMSS config NETFILTER_XT_MATCH_TIME tristate '"time" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- This option adds a "time" match, which allows you to match based on @@ -809,7 +843,6 @@ config NETFILTER_XT_MATCH_TIME config NETFILTER_XT_MATCH_U32 tristate '"u32" match support' - depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED ---help--- u32 allows you to extract quantities of up to 4 bytes from a packet, @@ -821,20 +854,8 @@ config NETFILTER_XT_MATCH_U32 Details and examples are in the kernel module source. -config NETFILTER_XT_MATCH_HASHLIMIT - tristate '"hashlimit" match support' - depends on NETFILTER_XTABLES && (IP6_NF_IPTABLES || IP6_NF_IPTABLES=n) - depends on NETFILTER_ADVANCED - help - This option adds a `hashlimit' match. - - As opposed to `limit', this match dynamically creates a hash table - of limit buckets, based on your selection of source/destination - addresses and/or ports. - - It enables you to express policies like `10kpps for any given - destination address' or `500pps from any given source address' - with a single rule. +endif # NETFILTER_XTABLES endmenu +source "net/netfilter/ipvs/Kconfig" diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 3bd2cc556aea..da3d909e053f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +# transparent proxy support +obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o @@ -48,6 +51,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o @@ -76,10 +80,15 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_POLICY) += xt_policy.o obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o +obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT) += xt_recent.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o obj-$(CONFIG_NETFILTER_XT_MATCH_TIME) += xt_time.o obj-$(CONFIG_NETFILTER_XT_MATCH_U32) += xt_u32.o + +# IPVS +obj-$(CONFIG_IP_VS) += ipvs/ diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 292fa28146fb..a90ac83c5918 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -26,7 +26,7 @@ static DEFINE_MUTEX(afinfo_mutex); -const struct nf_afinfo *nf_afinfo[NPROTO] __read_mostly; +const struct nf_afinfo *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly; EXPORT_SYMBOL(nf_afinfo); int nf_register_afinfo(const struct nf_afinfo *afinfo) @@ -51,7 +51,7 @@ void nf_unregister_afinfo(const struct nf_afinfo *afinfo) } EXPORT_SYMBOL_GPL(nf_unregister_afinfo); -struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS] __read_mostly; +struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly; EXPORT_SYMBOL(nf_hooks); static DEFINE_MUTEX(nf_hook_mutex); @@ -113,7 +113,7 @@ EXPORT_SYMBOL(nf_unregister_hooks); unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -155,7 +155,7 @@ unsigned int nf_iterate(struct list_head *head, /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. */ -int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, +int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -165,14 +165,6 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, unsigned int verdict; int ret = 0; -#ifdef CONFIG_NET_NS - struct net *net; - - net = indev == NULL ? dev_net(outdev) : dev_net(indev); - if (net != &init_net) - return 1; -#endif - /* We may already have this, but read-locks nest anyway */ rcu_read_lock(); @@ -264,7 +256,7 @@ EXPORT_SYMBOL(proc_net_netfilter); void __init netfilter_init(void) { int i, h; - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < ARRAY_SIZE(nf_hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) INIT_LIST_HEAD(&nf_hooks[i][h]); } diff --git a/net/ipv4/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 09d0c3f35669..79a698052218 100644 --- a/net/ipv4/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -2,8 +2,8 @@ # IP Virtual Server configuration # menuconfig IP_VS - tristate "IP virtual server support (EXPERIMENTAL)" - depends on NETFILTER + tristate "IP virtual server support" + depends on NET && INET && NETFILTER ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -24,6 +24,16 @@ menuconfig IP_VS if IP_VS +config IP_VS_IPV6 + bool "IPv6 support for IPVS" + depends on EXPERIMENTAL && (IPV6 = y || IP_VS = IPV6) + ---help--- + Add IPv6 support to IPVS. This is incomplete and might be dangerous. + + See http://www.mindbasket.com/ipvs for more information. + + Say N if unsure. + config IP_VS_DEBUG bool "IP virtual server debugging" ---help--- @@ -33,7 +43,8 @@ config IP_VS_DEBUG config IP_VS_TAB_BITS int "IPVS connection table size (the Nth power of 2)" - default "12" + range 8 20 + default 12 ---help--- The IPVS connection hash table uses the chaining scheme to handle hash collisions. Using a big IPVS connection hash table will greatly @@ -71,14 +82,20 @@ config IP_VS_PROTO_UDP This option enables support for load balancing UDP transport protocol. Say Y if unsure. +config IP_VS_PROTO_AH_ESP + bool + depends on UNDEFINED + config IP_VS_PROTO_ESP bool "ESP load balancing support" + select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing ESP (Encapsulation Security Payload) transport protocol. Say Y if unsure. config IP_VS_PROTO_AH bool "AH load balancing support" + select IP_VS_PROTO_AH_ESP ---help--- This option enables support for load balancing AH (Authentication Header) transport protocol. Say Y if unsure. diff --git a/net/ipv4/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 30e85de9ffff..73a46fe1fe4c 100644 --- a/net/ipv4/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -6,8 +6,7 @@ ip_vs_proto-objs-y := ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TCP) += ip_vs_proto_tcp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o -ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o -ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o +ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ diff --git a/net/ipv4/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c index 201b8ea3020d..201b8ea3020d 100644 --- a/net/ipv4/ipvs/ip_vs_app.c +++ b/net/netfilter/ipvs/ip_vs_app.c diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 44a6872dc245..9a24332fbed8 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -114,9 +114,18 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) +static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, + const union nf_inet_addr *addr, + __be16 port) { - return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), + (__force u32)port, proto, ip_vs_conn_rnd) + & IP_VS_CONN_TAB_MASK; +#endif + return jhash_3words((__force u32)addr->ip, (__force u32)port, proto, + ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -131,7 +140,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) int ret; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -162,7 +171,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->protocol, cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); ct_write_lock(hash); @@ -187,20 +196,23 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -214,39 +226,44 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { struct ip_vs_conn *cp; - cp = __ip_vs_conn_in_get(protocol, s_addr, s_port, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) - cp = __ip_vs_conn_in_get(protocol, s_addr, 0, d_addr, d_port); + cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, + d_port); - IP_VS_DBG(9, "lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (s_addr==cp->caddr && s_port==cp->cport && - d_port==cp->vport && d_addr==cp->vaddr && + if (cp->af == af && + ip_vs_addr_equal(af, s_addr, &cp->caddr) && + ip_vs_addr_equal(af, d_addr, &cp->vaddr) && + s_port == cp->cport && d_port == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - protocol==cp->protocol) { + protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; @@ -257,11 +274,11 @@ struct ip_vs_conn *ip_vs_ct_in_get out: ct_read_unlock(hash); - IP_VS_DBG(9, "template lookup/in %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - cp?"hit":"not hit"); + IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + cp ? "hit" : "not hit"); return cp; } @@ -273,7 +290,8 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) +(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, + const union nf_inet_addr *d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -281,13 +299,15 @@ struct ip_vs_conn *ip_vs_conn_out_get /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(protocol, d_addr, d_port); + hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (d_addr == cp->caddr && d_port == cp->cport && - s_port == cp->dport && s_addr == cp->daddr && + if (cp->af == af && + ip_vs_addr_equal(af, d_addr, &cp->caddr) && + ip_vs_addr_equal(af, s_addr, &cp->daddr) && + d_port == cp->cport && s_port == cp->dport && protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); @@ -298,11 +318,11 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_unlock(hash); - IP_VS_DBG(9, "lookup/out %s %u.%u.%u.%u:%d->%u.%u.%u.%u:%d %s\n", - ip_vs_proto_name(protocol), - NIPQUAD(s_addr), ntohs(s_port), - NIPQUAD(d_addr), ntohs(d_port), - ret?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", + ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), + IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ret ? "hit" : "not hit"); return ret; } @@ -369,6 +389,33 @@ static inline void ip_vs_bind_xmit(struct ip_vs_conn *cp) } } +#ifdef CONFIG_IP_VS_IPV6 +static inline void ip_vs_bind_xmit_v6(struct ip_vs_conn *cp) +{ + switch (IP_VS_FWD_METHOD(cp)) { + case IP_VS_CONN_F_MASQ: + cp->packet_xmit = ip_vs_nat_xmit_v6; + break; + + case IP_VS_CONN_F_TUNNEL: + cp->packet_xmit = ip_vs_tunnel_xmit_v6; + break; + + case IP_VS_CONN_F_DROUTE: + cp->packet_xmit = ip_vs_dr_xmit_v6; + break; + + case IP_VS_CONN_F_LOCALNODE: + cp->packet_xmit = ip_vs_null_xmit; + break; + + case IP_VS_CONN_F_BYPASS: + cp->packet_xmit = ip_vs_bypass_xmit_v6; + break; + } +} +#endif + static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) { @@ -402,16 +449,16 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) cp->flags |= atomic_read(&dest->conn_flags); cp->dest = dest; - IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -444,8 +491,9 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp) struct ip_vs_dest *dest; if ((cp) && (!cp->dest)) { - dest = ip_vs_find_dest(cp->daddr, cp->dport, - cp->vaddr, cp->vport, cp->protocol); + dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport, + &cp->vaddr, cp->vport, + cp->protocol); ip_vs_bind_dest(cp, dest); return dest; } else @@ -464,16 +512,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp) if (!dest) return; - IP_VS_DBG(7, "Unbind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "d:%u.%u.%u.%u:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " - "dest->refcnt:%d\n", - ip_vs_proto_name(cp->protocol), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - ip_vs_fwd_tag(cp), cp->state, - cp->flags, atomic_read(&cp->refcnt), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(7, "Unbind-dest %s c:%s:%d v:%s:%d " + "d:%s:%d fwd:%c s:%u conn->flags:%X conn->refcnt:%d " + "dest->refcnt:%d\n", + ip_vs_proto_name(cp->protocol), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(cp->af, &cp->daddr), ntohs(cp->dport), + ip_vs_fwd_tag(cp), cp->state, + cp->flags, atomic_read(&cp->refcnt), + atomic_read(&dest->refcnt)); /* Update the connection counters */ if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) { @@ -526,13 +574,16 @@ int ip_vs_check_template(struct ip_vs_conn *ct) !(dest->flags & IP_VS_DEST_F_AVAILABLE) || (sysctl_ip_vs_expire_quiescent_template && (atomic_read(&dest->weight) == 0))) { - IP_VS_DBG(9, "check_template: dest not available for " - "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d " - "-> d:%u.%u.%u.%u:%d\n", - ip_vs_proto_name(ct->protocol), - NIPQUAD(ct->caddr), ntohs(ct->cport), - NIPQUAD(ct->vaddr), ntohs(ct->vport), - NIPQUAD(ct->daddr), ntohs(ct->dport)); + IP_VS_DBG_BUF(9, "check_template: dest not available for " + "protocol %s s:%s:%d v:%s:%d " + "-> d:%s:%d\n", + ip_vs_proto_name(ct->protocol), + IP_VS_DBG_ADDR(ct->af, &ct->caddr), + ntohs(ct->cport), + IP_VS_DBG_ADDR(ct->af, &ct->vaddr), + ntohs(ct->vport), + IP_VS_DBG_ADDR(ct->af, &ct->daddr), + ntohs(ct->dport)); /* * Invalidate the connection template @@ -625,8 +676,9 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, - __be32 daddr, __be16 dport, unsigned flags, +ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; @@ -640,12 +692,13 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + cp->af = af; cp->protocol = proto; - cp->caddr = caddr; + ip_vs_addr_copy(af, &cp->caddr, caddr); cp->cport = cport; - cp->vaddr = vaddr; + ip_vs_addr_copy(af, &cp->vaddr, vaddr); cp->vport = vport; - cp->daddr = daddr; + ip_vs_addr_copy(af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; spin_lock_init(&cp->lock); @@ -672,7 +725,12 @@ ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport cp->timeout = 3*HZ; /* Bind its packet transmitter */ - ip_vs_bind_xmit(cp); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_bind_xmit_v6(cp); + else +#endif + ip_vs_bind_xmit(cp); if (unlikely(pp && atomic_read(&pp->appcnt))) ip_vs_bind_app(cp, pp); @@ -760,12 +818,26 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) else { const struct ip_vs_conn *cp = v; - seq_printf(seq, - "%-3s %08X %04X %08X %04X %08X %04X %-11s %7lu\n", +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + seq_printf(seq, + "%-3s " NIP6_FMT " %04X " NIP6_FMT + " %04X " NIP6_FMT " %04X %-11s %7lu\n", + ip_vs_proto_name(cp->protocol), + NIP6(cp->caddr.in6), ntohs(cp->cport), + NIP6(cp->vaddr.in6), ntohs(cp->vport), + NIP6(cp->daddr.in6), ntohs(cp->dport), + ip_vs_state_name(cp->protocol, cp->state), + (cp->timer.expires-jiffies)/HZ); + else +#endif + seq_printf(seq, + "%-3s %08X %04X %08X %04X" + " %08X %04X %-11s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), (cp->timer.expires-jiffies)/HZ); } @@ -809,12 +881,27 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) else { const struct ip_vs_conn *cp = v; - seq_printf(seq, - "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n", +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + seq_printf(seq, + "%-3s " NIP6_FMT " %04X " NIP6_FMT + " %04X " NIP6_FMT " %04X %-11s %-6s %7lu\n", + ip_vs_proto_name(cp->protocol), + NIP6(cp->caddr.in6), ntohs(cp->cport), + NIP6(cp->vaddr.in6), ntohs(cp->vport), + NIP6(cp->daddr.in6), ntohs(cp->dport), + ip_vs_state_name(cp->protocol, cp->state), + ip_vs_origin_name(cp->flags), + (cp->timer.expires-jiffies)/HZ); + else +#endif + seq_printf(seq, + "%-3s %08X %04X %08X %04X " + "%08X %04X %-11s %-6s %7lu\n", ip_vs_proto_name(cp->protocol), - ntohl(cp->caddr), ntohs(cp->cport), - ntohl(cp->vaddr), ntohs(cp->vport), - ntohl(cp->daddr), ntohs(cp->dport), + ntohl(cp->caddr.ip), ntohs(cp->cport), + ntohl(cp->vaddr.ip), ntohs(cp->vport), + ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), ip_vs_origin_name(cp->flags), (cp->timer.expires-jiffies)/HZ); diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index a7879eafc3b5..958abf3e5f8c 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -39,6 +39,11 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#include <linux/netfilter_ipv6.h> +#endif + #include <net/ip_vs.h> @@ -60,6 +65,7 @@ EXPORT_SYMBOL(ip_vs_get_debug_level); /* ID used in ICMP lookups */ #define icmp_id(icmph) (((icmph)->un).echo.id) +#define icmpv6_id(icmph) (icmph->icmp6_dataun.u_echo.identifier) const char *ip_vs_proto_name(unsigned proto) { @@ -74,6 +80,10 @@ const char *ip_vs_proto_name(unsigned proto) return "TCP"; case IPPROTO_ICMP: return "ICMP"; +#ifdef CONFIG_IP_VS_IPV6 + case IPPROTO_ICMPV6: + return "ICMPv6"; +#endif default: sprintf(buf, "IP_%d", proto); return buf; @@ -92,18 +102,18 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.inpkts++; - dest->stats.inbytes += skb->len; + dest->stats.ustats.inpkts++; + dest->stats.ustats.inbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.inpkts++; - dest->svc->stats.inbytes += skb->len; + dest->svc->stats.ustats.inpkts++; + dest->svc->stats.ustats.inbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.inpkts++; - ip_vs_stats.inbytes += skb->len; + ip_vs_stats.ustats.inpkts++; + ip_vs_stats.ustats.inbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -115,18 +125,18 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_dest *dest = cp->dest; if (dest && (dest->flags & IP_VS_DEST_F_AVAILABLE)) { spin_lock(&dest->stats.lock); - dest->stats.outpkts++; - dest->stats.outbytes += skb->len; + dest->stats.ustats.outpkts++; + dest->stats.ustats.outbytes += skb->len; spin_unlock(&dest->stats.lock); spin_lock(&dest->svc->stats.lock); - dest->svc->stats.outpkts++; - dest->svc->stats.outbytes += skb->len; + dest->svc->stats.ustats.outpkts++; + dest->svc->stats.ustats.outbytes += skb->len; spin_unlock(&dest->svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.outpkts++; - ip_vs_stats.outbytes += skb->len; + ip_vs_stats.ustats.outpkts++; + ip_vs_stats.ustats.outbytes += skb->len; spin_unlock(&ip_vs_stats.lock); } } @@ -136,15 +146,15 @@ static inline void ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) { spin_lock(&cp->dest->stats.lock); - cp->dest->stats.conns++; + cp->dest->stats.ustats.conns++; spin_unlock(&cp->dest->stats.lock); spin_lock(&svc->stats.lock); - svc->stats.conns++; + svc->stats.ustats.conns++; spin_unlock(&svc->stats.lock); spin_lock(&ip_vs_stats.lock); - ip_vs_stats.conns++; + ip_vs_stats.ustats.conns++; spin_unlock(&ip_vs_stats.lock); } @@ -173,20 +183,28 @@ ip_vs_sched_persist(struct ip_vs_service *svc, __be16 ports[2]) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __be16 dport; /* destination port to forward */ - __be32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + union nf_inet_addr snet; /* source network of the client, + after masking */ + + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); /* Mask saddr with the netmask to adjust template granularity */ - snet = iph->saddr & svc->netmask; +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + ipv6_addr_prefix(&snet.in6, &iph.saddr.in6, svc->netmask); + else +#endif + snet.ip = iph.saddr.ip & svc->netmask; - IP_VS_DBG(6, "p-schedule: src %u.%u.%u.%u:%u dest %u.%u.%u.%u:%u " - "mnet %u.%u.%u.%u\n", - NIPQUAD(iph->saddr), ntohs(ports[0]), - NIPQUAD(iph->daddr), ntohs(ports[1]), - NIPQUAD(snet)); + IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u " + "mnet %s\n", + IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]), + IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]), + IP_VS_DBG_ADDR(svc->af, &snet)); /* * As far as we know, FTP is a very complicated network protocol, and @@ -204,11 +222,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, if (ports[1] == svc->port) { /* Check if a template already exists */ if (svc->port != FTPPORT) - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, ports[1]); + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, ports[1]); else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -228,18 +246,18 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * for ftp service. */ if (svc->port != FTPPORT) - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, ports[1], - dest->addr, dest->port, + &dest->addr, dest->port, IP_VS_CONN_F_TEMPLATE, dest); else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr, 0, + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -258,12 +276,16 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * fwmark template: <IPPROTO_IP,caddr,0,fwmark,0,daddr,0> * port zero template: <protocol,caddr,0,vaddr,0,daddr,0> */ - if (svc->fwmark) - ct = ip_vs_ct_in_get(IPPROTO_IP, snet, 0, - htonl(svc->fwmark), 0); - else - ct = ip_vs_ct_in_get(iph->protocol, snet, 0, - iph->daddr, 0); + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_ct_in_get(svc->af, IPPROTO_IP, &snet, 0, + &fwmark, 0); + } else + ct = ip_vs_ct_in_get(svc->af, iph.protocol, &snet, 0, + &iph.daddr, 0); if (!ct || !ip_vs_check_template(ct)) { /* @@ -282,18 +304,22 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a template according to the service */ - if (svc->fwmark) - ct = ip_vs_conn_new(IPPROTO_IP, - snet, 0, - htonl(svc->fwmark), 0, - dest->addr, 0, + if (svc->fwmark) { + union nf_inet_addr fwmark = { + .all = { 0, 0, 0, htonl(svc->fwmark) } + }; + + ct = ip_vs_conn_new(svc->af, IPPROTO_IP, + &snet, 0, + &fwmark, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); - else - ct = ip_vs_conn_new(iph->protocol, - snet, 0, - iph->daddr, 0, - dest->addr, 0, + } else + ct = ip_vs_conn_new(svc->af, iph.protocol, + &snet, 0, + &iph.daddr, 0, + &dest->addr, 0, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) @@ -310,10 +336,10 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, ports[0], - iph->daddr, ports[1], - dest->addr, dport, + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, ports[0], + &iph.daddr, ports[1], + &dest->addr, dport, 0, dest); if (cp == NULL) { @@ -342,12 +368,12 @@ struct ip_vs_conn * ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { struct ip_vs_conn *cp = NULL; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; struct ip_vs_dest *dest; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; @@ -377,22 +403,22 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* * Create a connection entry. */ - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], - dest->addr, dest->port?dest->port:pptr[1], + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], + &dest->addr, dest->port ? dest->port : pptr[1], 0, dest); if (cp == NULL) return NULL; - IP_VS_DBG(6, "Schedule fwd:%c c:%u.%u.%u.%u:%u v:%u.%u.%u.%u:%u " - "d:%u.%u.%u.%u:%u conn->flags:%X conn->refcnt:%d\n", - ip_vs_fwd_tag(cp), - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - NIPQUAD(cp->daddr), ntohs(cp->dport), - cp->flags, atomic_read(&cp->refcnt)); + IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " + "d:%s:%u conn->flags:%X conn->refcnt:%d\n", + ip_vs_fwd_tag(cp), + IP_VS_DBG_ADDR(svc->af, &cp->caddr), ntohs(cp->cport), + IP_VS_DBG_ADDR(svc->af, &cp->vaddr), ntohs(cp->vport), + IP_VS_DBG_ADDR(svc->af, &cp->daddr), ntohs(cp->dport), + cp->flags, atomic_read(&cp->refcnt)); ip_vs_conn_stats(cp, svc); return cp; @@ -408,31 +434,39 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { __be16 _ports[2], *pptr; - struct iphdr *iph = ip_hdr(skb); + struct ip_vs_iphdr iph; + int unicast; + ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); - pptr = skb_header_pointer(skb, iph->ihl*4, - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) { ip_vs_service_put(svc); return NF_DROP; } +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST; + else +#endif + unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST); + /* if it is fwmark-based service, the cache_bypass sysctl is up - and the destination is RTN_UNICAST (and not local), then create + and the destination is a non-local unicast, then create a cache_bypass connection entry */ - if (sysctl_ip_vs_cache_bypass && svc->fwmark - && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) { + if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; + union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); /* create a new connection entry */ IP_VS_DBG(6, "ip_vs_leave: create a cache_bypass entry\n"); - cp = ip_vs_conn_new(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1], - 0, 0, + cp = ip_vs_conn_new(svc->af, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], + &daddr, 0, IP_VS_CONN_F_BYPASS, NULL); if (cp == NULL) @@ -473,7 +507,14 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, * created, the TCP RST packet cannot be sent, instead that * ICMP_PORT_UNREACH is sent here no matter it is TCP/UDP. --WZ */ - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, + skb->dev); + else +#endif + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + return NF_DROP; } @@ -512,6 +553,14 @@ static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user) return err; } +#ifdef CONFIG_IP_VS_IPV6 +static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user) +{ + /* TODO IPv6: Find out what to do here for IPv6 */ + return 0; +} +#endif + /* * Packet has been made sufficiently writable in caller * - inout: 1=in->out, 0=out->in @@ -526,14 +575,14 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, struct iphdr *ciph = (struct iphdr *)(icmph + 1); if (inout) { - iph->saddr = cp->vaddr; + iph->saddr = cp->vaddr.ip; ip_send_check(iph); - ciph->daddr = cp->vaddr; + ciph->daddr = cp->vaddr.ip; ip_send_check(ciph); } else { - iph->daddr = cp->daddr; + iph->daddr = cp->daddr.ip; ip_send_check(iph); - ciph->saddr = cp->daddr; + ciph->saddr = cp->daddr.ip; ip_send_check(ciph); } @@ -560,21 +609,112 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, "Forwarding altered incoming ICMP"); } +#ifdef CONFIG_IP_VS_IPV6 +void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int inout) +{ + struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int icmp_offset = sizeof(struct ipv6hdr); + struct icmp6hdr *icmph = (struct icmp6hdr *)(skb_network_header(skb) + + icmp_offset); + struct ipv6hdr *ciph = (struct ipv6hdr *)(icmph + 1); + + if (inout) { + iph->saddr = cp->vaddr.in6; + ciph->daddr = cp->vaddr.in6; + } else { + iph->daddr = cp->daddr.in6; + ciph->saddr = cp->daddr.in6; + } + + /* the TCP/UDP port */ + if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) { + __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr); + + if (inout) + ports[1] = cp->vport; + else + ports[0] = cp->dport; + } + + /* And finally the ICMP checksum */ + icmph->icmp6_cksum = 0; + /* TODO IPv6: is this correct for ICMPv6? */ + ip_vs_checksum_complete(skb, icmp_offset); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (inout) + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered outgoing ICMPv6"); + else + IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + "Forwarding altered incoming ICMPv6"); +} +#endif + +/* Handle relevant response ICMP messages - forward to the right + * destination host. Used for NAT and local client. + */ +static int handle_response_icmp(int af, struct sk_buff *skb, + union nf_inet_addr *snet, + __u8 protocol, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, + unsigned int offset, unsigned int ihl) +{ + unsigned int verdict = NF_DROP; + + if (IP_VS_FWD_METHOD(cp) != 0) { + IP_VS_ERR("shouldn't reach here, because the box is on the " + "half connection in the tun/dr module.\n"); + } + + /* Ensure the checksum is correct */ + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { + /* Failed checksum! */ + IP_VS_DBG_BUF(1, "Forward ICMP: failed checksum from %s!\n", + IP_VS_DBG_ADDR(af, snet)); + goto out; + } + + if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol) + offset += 2 * sizeof(__u16); + if (!skb_make_writable(skb, offset)) + goto out; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ip_vs_nat_icmp_v6(skb, pp, cp, 1); + else +#endif + ip_vs_nat_icmp(skb, pp, cp, 1); + + /* do the statistics and put it back */ + ip_vs_out_stats(cp, skb); + + skb->ipvs_property = 1; + verdict = NF_ACCEPT; + +out: + __ip_vs_conn_put(cp); + + return verdict; +} + /* * Handle ICMP messages in the inside-to-outside direction (outgoing). - * Find any that might be relevant, check against existing connections, - * forward to the right destination host if relevant. + * Find any that might be relevant, check against existing connections. * Currently handles error types - unreachable, quench, ttl exceeded. - * (Only used in VS/NAT) */ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) { struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; - unsigned int offset, ihl, verdict; + unsigned int offset, ihl; + union nf_inet_addr snet; *related = 1; @@ -627,102 +767,231 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_out_get(skb, pp, cih, offset, 1); + cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); if (!cp) return NF_ACCEPT; - verdict = NF_DROP; + snet.ip = iph->saddr; + return handle_response_icmp(AF_INET, skb, &snet, cih->protocol, cp, + pp, offset, ihl); +} - if (IP_VS_FWD_METHOD(cp) != 0) { - IP_VS_ERR("shouldn't reach here, because the box is on the " - "half connection in the tun/dr module.\n"); +#ifdef CONFIG_IP_VS_IPV6 +static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related) +{ + struct ipv6hdr *iph; + struct icmp6hdr _icmph, *ic; + struct ipv6hdr _ciph, *cih; /* The ip header contained + within the ICMP */ + struct ip_vs_iphdr ciph; + struct ip_vs_conn *cp; + struct ip_vs_protocol *pp; + unsigned int offset; + union nf_inet_addr snet; + + *related = 1; + + /* reassemble IP fragments */ + if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { + if (ip_vs_gather_frags_v6(skb, IP_DEFRAG_VS_OUT)) + return NF_STOLEN; } - /* Ensure the checksum is correct */ - if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) { - /* Failed checksum! */ - IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n", - NIPQUAD(iph->saddr)); - goto out; + iph = ipv6_hdr(skb); + offset = sizeof(struct ipv6hdr); + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) + return NF_DROP; + + IP_VS_DBG(12, "Outgoing ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + NIP6(iph->saddr), NIP6(iph->daddr)); + + /* + * Work through seeing if this is for us. + * These checks are supposed to be in an order that means easy + * things are checked first to speed up processing.... however + * this means that some packets will manage to get a long way + * down this stack and then be rejected, but that's life. + */ + if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && + (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && + (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + *related = 0; + return NF_ACCEPT; } - if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol) - offset += 2 * sizeof(__u16); - if (!skb_make_writable(skb, offset)) - goto out; + /* Now find the contained IP header */ + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ - ip_vs_nat_icmp(skb, pp, cp, 1); + pp = ip_vs_proto_get(cih->nexthdr); + if (!pp) + return NF_ACCEPT; - /* do the statistics and put it back */ - ip_vs_out_stats(cp, skb); + /* Is the embedded protocol header present? */ + /* TODO: we don't support fragmentation at the moment anyways */ + if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + return NF_ACCEPT; - skb->ipvs_property = 1; - verdict = NF_ACCEPT; + IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); - out: - __ip_vs_conn_put(cp); + offset += sizeof(struct ipv6hdr); - return verdict; + ip_vs_fill_iphdr(AF_INET6, cih, &ciph); + /* The embedded headers contain source and dest in reverse order */ + cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (!cp) + return NF_ACCEPT; + + ipv6_addr_copy(&snet.in6, &iph->saddr); + return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp, + pp, offset, sizeof(struct ipv6hdr)); } +#endif -static inline int is_tcp_reset(const struct sk_buff *skb) +static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len) { struct tcphdr _tcph, *th; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, nh_len, sizeof(_tcph), &_tcph); if (th == NULL) return 0; return th->rst; } +/* Handle response packets: rewrite addresses and send away... + * Used for NAT and local client. + */ +static unsigned int +handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + struct ip_vs_conn *cp, int ihl) +{ + IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); + + if (!skb_make_writable(skb, ihl)) + goto drop; + + /* mangle the packet */ + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) + goto drop; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + ipv6_hdr(skb)->saddr = cp->vaddr.in6; + else +#endif + { + ip_hdr(skb)->saddr = cp->vaddr.ip; + ip_send_check(ip_hdr(skb)); + } + + /* For policy routing, packets originating from this + * machine itself may be routed differently to packets + * passing through. We want this packet to be routed as + * if it came from this machine itself. So re-compute + * the routing information. + */ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (ip6_route_me_harder(skb) != 0) + goto drop; + } else +#endif + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) + goto drop; + + IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); + + ip_vs_out_stats(cp, skb); + ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + ip_vs_conn_put(cp); + + skb->ipvs_property = 1; + + LeaveFunction(11); + return NF_ACCEPT; + +drop: + ip_vs_conn_put(cp); + kfree_skb(skb); + return NF_STOLEN; +} + /* * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. - * Check if outgoing packet belongs to the established ip_vs_conn, - * rewrite addresses of the packet and send it on its way... + * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int ip_vs_out(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int ihl; + int af; EnterFunction(11); + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; + if (skb->ipvs_property) return NF_ACCEPT; - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { - int related, verdict = ip_vs_out_icmp(skb, &related); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related, verdict = ip_vs_out_icmp_v6(skb, &related); - if (related) - return verdict; - iph = ip_hdr(skb); - } + if (related) + return verdict; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + } else +#endif + if (unlikely(iph.protocol == IPPROTO_ICMP)) { + int related, verdict = ip_vs_out_icmp(skb, &related); - pp = ip_vs_proto_get(iph->protocol); + if (related) + return verdict; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; /* reassemble IP fragments */ - if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) && - !pp->dont_defrag)) { - if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) - return NF_STOLEN; - iph = ip_hdr(skb); - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { + int related, verdict = ip_vs_out_icmp_v6(skb, &related); + + if (related) + return verdict; - ihl = iph->ihl << 2; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } + } else +#endif + if (unlikely(ip_hdr(skb)->frag_off & htons(IP_MF|IP_OFFSET) && + !pp->dont_defrag)) { + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT)) + return NF_STOLEN; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + } /* * Check if the packet belongs to an existing entry */ - cp = pp->conn_out_get(skb, pp, iph, ihl, 0); + cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { if (sysctl_ip_vs_nat_icmp_send && @@ -730,21 +999,31 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, pp->protocol == IPPROTO_UDP)) { __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ihl, + pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NF_ACCEPT; /* Not for me */ - if (ip_vs_lookup_real_service(iph->protocol, - iph->saddr, pptr[0])) { + if (ip_vs_lookup_real_service(af, iph.protocol, + &iph.saddr, + pptr[0])) { /* * Notify the real server: there is no * existing entry if it is not RST * packet or not TCP packet. */ - if (iph->protocol != IPPROTO_TCP - || !is_tcp_reset(skb)) { - icmp_send(skb,ICMP_DEST_UNREACH, - ICMP_PORT_UNREACH, 0); + if (iph.protocol != IPPROTO_TCP + || !is_tcp_reset(skb, iph.len)) { +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + icmpv6_send(skb, + ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + 0, skb->dev); + else +#endif + icmp_send(skb, + ICMP_DEST_UNREACH, + ICMP_PORT_UNREACH, 0); return NF_DROP; } } @@ -754,41 +1033,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, return NF_ACCEPT; } - IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); - - if (!skb_make_writable(skb, ihl)) - goto drop; - - /* mangle the packet */ - if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) - goto drop; - ip_hdr(skb)->saddr = cp->vaddr; - ip_send_check(ip_hdr(skb)); - - /* For policy routing, packets originating from this - * machine itself may be routed differently to packets - * passing through. We want this packet to be routed as - * if it came from this machine itself. So re-compute - * the routing information. - */ - if (ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; - - IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); - - ip_vs_out_stats(cp, skb); - ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); - ip_vs_conn_put(cp); - - skb->ipvs_property = 1; - - LeaveFunction(11); - return NF_ACCEPT; - - drop: - ip_vs_conn_put(cp); - kfree_skb(skb); - return NF_STOLEN; + return handle_response(af, skb, pp, cp, iph.len); } @@ -804,9 +1049,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) struct iphdr *iph; struct icmphdr _icmph, *ic; struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */ + struct ip_vs_iphdr ciph; struct ip_vs_conn *cp; struct ip_vs_protocol *pp; unsigned int offset, ihl, verdict; + union nf_inet_addr snet; *related = 1; @@ -860,10 +1107,20 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) offset += cih->ihl * 4; + ip_vs_fill_iphdr(AF_INET, cih, &ciph); /* The embedded headers contain source and dest in reverse order */ - cp = pp->conn_in_get(skb, pp, cih, offset, 1); - if (!cp) + cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1); + if (!cp) { + /* The packet could also belong to a local client */ + cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1); + if (cp) { + snet.ip = iph->saddr; + return handle_response_icmp(AF_INET, skb, &snet, + cih->protocol, cp, pp, + offset, ihl); + } return NF_ACCEPT; + } verdict = NF_DROP; @@ -888,6 +1145,105 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) return verdict; } +#ifdef CONFIG_IP_VS_IPV6 +static int +ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) +{ + struct ipv6hdr *iph; + struct icmp6hdr _icmph, *ic; + struct ipv6hdr _ciph, *cih; /* The ip header contained + within the ICMP */ + struct ip_vs_iphdr ciph; + struct ip_vs_conn *cp; + struct ip_vs_protocol *pp; + unsigned int offset, verdict; + union nf_inet_addr snet; + + *related = 1; + + /* reassemble IP fragments */ + if (ipv6_hdr(skb)->nexthdr == IPPROTO_FRAGMENT) { + if (ip_vs_gather_frags_v6(skb, hooknum == NF_INET_LOCAL_IN ? + IP_DEFRAG_VS_IN : + IP_DEFRAG_VS_FWD)) + return NF_STOLEN; + } + + iph = ipv6_hdr(skb); + offset = sizeof(struct ipv6hdr); + ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph); + if (ic == NULL) + return NF_DROP; + + IP_VS_DBG(12, "Incoming ICMPv6 (%d,%d) " NIP6_FMT "->" NIP6_FMT "\n", + ic->icmp6_type, ntohs(icmpv6_id(ic)), + NIP6(iph->saddr), NIP6(iph->daddr)); + + /* + * Work through seeing if this is for us. + * These checks are supposed to be in an order that means easy + * things are checked first to speed up processing.... however + * this means that some packets will manage to get a long way + * down this stack and then be rejected, but that's life. + */ + if ((ic->icmp6_type != ICMPV6_DEST_UNREACH) && + (ic->icmp6_type != ICMPV6_PKT_TOOBIG) && + (ic->icmp6_type != ICMPV6_TIME_EXCEED)) { + *related = 0; + return NF_ACCEPT; + } + + /* Now find the contained IP header */ + offset += sizeof(_icmph); + cih = skb_header_pointer(skb, offset, sizeof(_ciph), &_ciph); + if (cih == NULL) + return NF_ACCEPT; /* The packet looks wrong, ignore */ + + pp = ip_vs_proto_get(cih->nexthdr); + if (!pp) + return NF_ACCEPT; + + /* Is the embedded protocol header present? */ + /* TODO: we don't support fragmentation at the moment anyways */ + if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) + return NF_ACCEPT; + + IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); + + offset += sizeof(struct ipv6hdr); + + ip_vs_fill_iphdr(AF_INET6, cih, &ciph); + /* The embedded headers contain source and dest in reverse order */ + cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (!cp) { + /* The packet could also belong to a local client */ + cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1); + if (cp) { + ipv6_addr_copy(&snet.in6, &iph->saddr); + return handle_response_icmp(AF_INET6, skb, &snet, + cih->nexthdr, + cp, pp, offset, + sizeof(struct ipv6hdr)); + } + return NF_ACCEPT; + } + + verdict = NF_DROP; + + /* do the statistics and put it back */ + ip_vs_in_stats(cp, skb); + if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr) + offset += 2 * sizeof(__u16); + verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset); + /* do not touch skb anymore */ + + __ip_vs_conn_put(cp); + + return verdict; +} +#endif + + /* * Check if it's for virtual services, look it up, * and send it on its way... @@ -897,50 +1253,54 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct iphdr *iph; + struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int ret, restart; - int ihl; + int ret, restart, af; + + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); /* - * Big tappo: only PACKET_HOST (neither loopback nor mcasts) - * ... don't know why 1st test DOES NOT include 2nd (?) + * Big tappo: only PACKET_HOST, including loopback for local client + * Don't handle local packets on IPv6 for now */ - if (unlikely(skb->pkt_type != PACKET_HOST - || skb->dev->flags & IFF_LOOPBACK || skb->sk)) { - IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n", - skb->pkt_type, - ip_hdr(skb)->protocol, - NIPQUAD(ip_hdr(skb)->daddr)); + if (unlikely(skb->pkt_type != PACKET_HOST)) { + IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n", + skb->pkt_type, + iph.protocol, + IP_VS_DBG_ADDR(af, &iph.daddr)); return NF_ACCEPT; } - iph = ip_hdr(skb); - if (unlikely(iph->protocol == IPPROTO_ICMP)) { + if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_in_icmp(skb, &related, hooknum); if (related) return verdict; - iph = ip_hdr(skb); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } /* Protocol supported? */ - pp = ip_vs_proto_get(iph->protocol); + pp = ip_vs_proto_get(iph.protocol); if (unlikely(!pp)) return NF_ACCEPT; - ihl = iph->ihl << 2; - /* * Check if the packet belongs to an existing connection entry */ - cp = pp->conn_in_get(skb, pp, iph, ihl, 0); + cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0); if (unlikely(!cp)) { int v; - if (!pp->conn_schedule(skb, pp, &v, &cp)) + /* For local client packets, it could be a response */ + cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); + if (cp) + return handle_response(af, skb, pp, cp, iph.len); + + if (!pp->conn_schedule(af, skb, pp, &v, &cp)) return v; } @@ -984,7 +1344,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, * encorage the standby servers to update the connections timeout */ atomic_inc(&cp->in_pkts); - if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && + if (af == AF_INET && + (ip_vs_sync_state & IP_VS_STATE_MASTER) && (((cp->protocol != IPPROTO_TCP || cp->state == IP_VS_TCP_S_ESTABLISHED) && (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1] @@ -1023,6 +1384,21 @@ ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb, return ip_vs_in_icmp(skb, &r, hooknum); } +#ifdef CONFIG_IP_VS_IPV6 +static unsigned int +ip_vs_forward_icmp_v6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + int r; + + if (ipv6_hdr(skb)->nexthdr != IPPROTO_ICMPV6) + return NF_ACCEPT; + + return ip_vs_in_icmp_v6(skb, &r, hooknum); +} +#endif + static struct nf_hook_ops ip_vs_ops[] __read_mostly = { /* After packet filtering, forward packet through VS/DR, VS/TUN, @@ -1060,6 +1436,43 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_POST_ROUTING, .priority = NF_IP_PRI_NAT_SRC-1, }, +#ifdef CONFIG_IP_VS_IPV6 + /* After packet filtering, forward packet through VS/DR, VS/TUN, + * or VS/NAT(change destination), so that filtering rules can be + * applied to IPVS. */ + { + .hook = ip_vs_in, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_LOCAL_IN, + .priority = 100, + }, + /* After packet filtering, change source only for VS/NAT */ + { + .hook = ip_vs_out, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = 100, + }, + /* After packet filtering (but before ip_vs_out_icmp), catch icmp + * destined for 0.0.0.0/0, which is for incoming IPVS connections */ + { + .hook = ip_vs_forward_icmp_v6, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = 99, + }, + /* Before the netfilter connection tracking, exit from POST_ROUTING */ + { + .hook = ip_vs_post_routing, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC-1, + }, +#endif }; @@ -1070,10 +1483,12 @@ static int __init ip_vs_init(void) { int ret; + ip_vs_estimator_init(); + ret = ip_vs_control_init(); if (ret < 0) { IP_VS_ERR("can't setup control.\n"); - goto cleanup_nothing; + goto cleanup_estimator; } ip_vs_protocol_init(); @@ -1106,7 +1521,8 @@ static int __init ip_vs_init(void) cleanup_protocol: ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); - cleanup_nothing: + cleanup_estimator: + ip_vs_estimator_cleanup(); return ret; } @@ -1117,6 +1533,7 @@ static void __exit ip_vs_cleanup(void) ip_vs_app_cleanup(); ip_vs_protocol_cleanup(); ip_vs_control_cleanup(); + ip_vs_estimator_cleanup(); IP_VS_INFO("ipvs unloaded.\n"); } diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 6379705a8dcb..0302cf3e5039 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -35,8 +35,13 @@ #include <net/net_namespace.h> #include <net/ip.h> +#ifdef CONFIG_IP_VS_IPV6 +#include <net/ipv6.h> +#include <net/ip6_route.h> +#endif #include <net/route.h> #include <net/sock.h> +#include <net/genetlink.h> #include <asm/uaccess.h> @@ -90,6 +95,26 @@ int ip_vs_get_debug_level(void) } #endif +#ifdef CONFIG_IP_VS_IPV6 +/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */ +static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr) +{ + struct rt6_info *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = *addr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) + return 1; + + return 0; +} +#endif /* * update_defense_level is called from keventd and from sysctl, * so it needs to protect itself from softirqs @@ -281,11 +306,19 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) +ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif - return (proto^ntohl(addr)^(porth>>IP_VS_SVC_TAB_BITS)^porth) + return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth) & IP_VS_SVC_TAB_MASK; } @@ -316,7 +349,8 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) /* * Hash it by <protocol,addr,port> in ip_vs_svc_table */ - hash = ip_vs_svc_hashkey(svc->protocol, svc->addr, svc->port); + hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr, + svc->port); list_add(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* @@ -362,17 +396,19 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) /* * Get service by {proto,addr,port} in the service table. */ -static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) +static inline struct ip_vs_service * +__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr, + __be16 vport) { unsigned hash; struct ip_vs_service *svc; /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(protocol, vaddr, vport); + hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport); list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){ - if ((svc->addr == vaddr) + if ((svc->af == af) + && ip_vs_addr_equal(af, &svc->addr, vaddr) && (svc->port == vport) && (svc->protocol == protocol)) { /* HIT */ @@ -388,7 +424,8 @@ __ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) /* * Get service by {fwmark} in the service table. */ -static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) +static inline struct ip_vs_service * +__ip_vs_svc_fwm_get(int af, __u32 fwmark) { unsigned hash; struct ip_vs_service *svc; @@ -397,7 +434,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) hash = ip_vs_svc_fwm_hashkey(fwmark); list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark) { + if (svc->fwmark == fwmark && svc->af == af) { /* HIT */ atomic_inc(&svc->usecnt); return svc; @@ -408,7 +445,8 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) +ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, + const union nf_inet_addr *vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -417,14 +455,14 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check the table hashed by fwmark first */ - if (fwmark && (svc = __ip_vs_svc_fwm_get(fwmark))) + if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark))) goto out; /* * Check the table hashed by <protocol,addr,port> * for "full" addressed entries */ - svc = __ip_vs_service_get(protocol, vaddr, vport); + svc = __ip_vs_service_get(af, protocol, vaddr, vport); if (svc == NULL && protocol == IPPROTO_TCP @@ -434,7 +472,7 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ - svc = __ip_vs_service_get(protocol, vaddr, FTPPORT); + svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT); } if (svc == NULL @@ -442,16 +480,16 @@ ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) /* * Check if the catch-all port (port zero) exists */ - svc = __ip_vs_service_get(protocol, vaddr, 0); + svc = __ip_vs_service_get(af, protocol, vaddr, 0); } out: read_unlock(&__ip_vs_svc_lock); - IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n", - fwmark, ip_vs_proto_name(protocol), - NIPQUAD(vaddr), ntohs(vport), - svc?"hit":"not hit"); + IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n", + fwmark, ip_vs_proto_name(protocol), + IP_VS_DBG_ADDR(af, vaddr), ntohs(vport), + svc ? "hit" : "not hit"); return svc; } @@ -478,11 +516,20 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) +static inline unsigned ip_vs_rs_hashkey(int af, + const union nf_inet_addr *addr, + __be16 port) { register unsigned porth = ntohs(port); + __be32 addr_fold = addr->ip; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + addr_fold = addr->ip6[0]^addr->ip6[1]^ + addr->ip6[2]^addr->ip6[3]; +#endif - return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth) + return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth) & IP_VS_RTAB_MASK; } @@ -502,7 +549,8 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest) * Hash by proto,addr,port, * which are the parameters of the real service. */ - hash = ip_vs_rs_hashkey(dest->addr, dest->port); + hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port); + list_add(&dest->d_list, &ip_vs_rtable[hash]); return 1; @@ -529,7 +577,9 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by <proto,addr,port> in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) +ip_vs_lookup_real_service(int af, __u16 protocol, + const union nf_inet_addr *daddr, + __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -538,11 +588,12 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Check for "full" addressed entries * Return the first found entry */ - hash = ip_vs_rs_hashkey(daddr, dport); + hash = ip_vs_rs_hashkey(af, daddr, dport); read_lock(&__ip_vs_rs_lock); list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) { - if ((dest->addr == daddr) + if ((dest->af == af) + && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->port == dport) && ((dest->protocol == protocol) || dest->vfwmark)) { @@ -560,7 +611,8 @@ ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest; @@ -568,7 +620,9 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination for the given service */ list_for_each_entry(dest, &svc->destinations, n_list) { - if ((dest->addr == daddr) && (dest->port == dport)) { + if ((dest->af == svc->af) + && ip_vs_addr_equal(svc->af, &dest->addr, daddr) + && (dest->port == dport)) { /* HIT */ return dest; } @@ -587,13 +641,15 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * ip_vs_lookup_real_service() looked promissing, but * seems not working as expected. */ -struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, - __be32 vaddr, __be16 vport, __u16 protocol) +struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr, + __be16 dport, + const union nf_inet_addr *vaddr, + __be16 vport, __u16 protocol) { struct ip_vs_dest *dest; struct ip_vs_service *svc; - svc = ip_vs_service_get(0, protocol, vaddr, vport); + svc = ip_vs_service_get(af, 0, protocol, vaddr, vport); if (!svc) return NULL; dest = ip_vs_lookup_dest(svc, daddr, dport); @@ -614,7 +670,8 @@ struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport, * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr, + __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -622,17 +679,19 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Find the destination in trash */ list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) { - IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, " - "dest->refcnt=%d\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); - if (dest->addr == daddr && + IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, " + "dest->refcnt=%d\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); + if (dest->af == svc->af && + ip_vs_addr_equal(svc->af, &dest->addr, daddr) && dest->port == dport && dest->vfwmark == svc->fwmark && dest->protocol == svc->protocol && (svc->fwmark || - (dest->vaddr == svc->addr && + (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) && dest->vport == svc->port))) { /* HIT */ return dest; @@ -642,10 +701,11 @@ ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) * Try to purge the destination from trash if not referenced */ if (atomic_read(&dest->refcnt) == 1) { - IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u " - "from trash\n", - dest->vfwmark, - NIPQUAD(dest->addr), ntohs(dest->port)); + IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u " + "from trash\n", + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port)); list_del(&dest->n_list); ip_vs_dst_reset(dest); __ip_vs_unbind_svc(dest); @@ -684,18 +744,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) { spin_lock_bh(&stats->lock); - stats->conns = 0; - stats->inpkts = 0; - stats->outpkts = 0; - stats->inbytes = 0; - stats->outbytes = 0; - - stats->cps = 0; - stats->inpps = 0; - stats->outpps = 0; - stats->inbps = 0; - stats->outbps = 0; - + memset(&stats->ustats, 0, sizeof(stats->ustats)); ip_vs_zero_estimator(stats); spin_unlock_bh(&stats->lock); @@ -706,7 +755,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats) */ static void __ip_vs_update_dest(struct ip_vs_service *svc, - struct ip_vs_dest *dest, struct ip_vs_dest_user *udest) + struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest) { int conn_flags; @@ -715,10 +764,18 @@ __ip_vs_update_dest(struct ip_vs_service *svc, conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ - if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } + } else +#endif + if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { + conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) + | IP_VS_CONN_F_LOCALNODE; + } /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { @@ -759,7 +816,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, * Create a destination for the given service */ static int -ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, +ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest, struct ip_vs_dest **dest_p) { struct ip_vs_dest *dest; @@ -767,9 +824,20 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, EnterFunction(2); - atype = inet_addr_type(&init_net, udest->addr); - if (atype != RTN_LOCAL && atype != RTN_UNICAST) - return -EINVAL; +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) { + atype = ipv6_addr_type(&udest->addr.in6); + if ((!(atype & IPV6_ADDR_UNICAST) || + atype & IPV6_ADDR_LINKLOCAL) && + !__ip_vs_addr_is_local_v6(&udest->addr.in6)) + return -EINVAL; + } else +#endif + { + atype = inet_addr_type(&init_net, udest->addr.ip); + if (atype != RTN_LOCAL && atype != RTN_UNICAST) + return -EINVAL; + } dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC); if (dest == NULL) { @@ -777,11 +845,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, return -ENOMEM; } + dest->af = svc->af; dest->protocol = svc->protocol; dest->vaddr = svc->addr; dest->vport = svc->port; dest->vfwmark = svc->fwmark; - dest->addr = udest->addr; + ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr); dest->port = udest->port; atomic_set(&dest->activeconns, 0); @@ -806,10 +875,10 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest, * Add a destination into an existing service */ static int -ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; int ret; @@ -826,10 +895,13 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Check if the dest already exists in the list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest != NULL) { IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n"); return -EEXIST; @@ -839,15 +911,17 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Check if the dest already exists in the trash and * is from the same service */ - dest = ip_vs_trash_get_dest(svc, daddr, dport); + dest = ip_vs_trash_get_dest(svc, &daddr, dport); + if (dest != NULL) { - IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, " - "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n", - NIPQUAD(daddr), ntohs(dport), - atomic_read(&dest->refcnt), - dest->vfwmark, - NIPQUAD(dest->vaddr), - ntohs(dest->vport)); + IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, " + "dest->refcnt=%d, service %u/%s:%u\n", + IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport), + atomic_read(&dest->refcnt), + dest->vfwmark, + IP_VS_DBG_ADDR(svc->af, &dest->vaddr), + ntohs(dest->vport)); + __ip_vs_update_dest(svc, dest, udest); /* @@ -868,7 +942,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); return 0; @@ -898,7 +973,8 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) svc->num_dests++; /* call the update_service function of its scheduler */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -912,10 +988,10 @@ ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) * Edit a destination in the given service */ static int -ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) +ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; + union nf_inet_addr daddr; __be16 dport = udest->port; EnterFunction(2); @@ -931,10 +1007,13 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) return -ERANGE; } + ip_vs_addr_copy(svc->af, &daddr, &udest->addr); + /* * Lookup the destination list */ - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &daddr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n"); return -ENOENT; @@ -948,7 +1027,8 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1); /* call the update_service, because server weight may be changed */ - svc->scheduler->update_service(svc); + if (svc->scheduler->update_service) + svc->scheduler->update_service(svc); write_unlock_bh(&__ip_vs_svc_lock); @@ -987,10 +1067,11 @@ static void __ip_vs_del_dest(struct ip_vs_dest *dest) atomic_dec(&dest->svc->refcnt); kfree(dest); } else { - IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, " - "dest->refcnt=%d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->refcnt)); + IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, " + "dest->refcnt=%d\n", + IP_VS_DBG_ADDR(dest->af, &dest->addr), + ntohs(dest->port), + atomic_read(&dest->refcnt)); list_add(&dest->n_list, &ip_vs_dest_trash); atomic_inc(&dest->refcnt); } @@ -1011,12 +1092,12 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, */ list_del(&dest->n_list); svc->num_dests--; - if (svcupd) { - /* - * Call the update_service function of its scheduler - */ - svc->scheduler->update_service(svc); - } + + /* + * Call the update_service function of its scheduler + */ + if (svcupd && svc->scheduler->update_service) + svc->scheduler->update_service(svc); } @@ -1024,15 +1105,15 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, * Delete a destination server in the given service */ static int -ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) +ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) { struct ip_vs_dest *dest; - __be32 daddr = udest->addr; __be16 dport = udest->port; EnterFunction(2); - dest = ip_vs_lookup_dest(svc, daddr, dport); + dest = ip_vs_lookup_dest(svc, &udest->addr, dport); + if (dest == NULL) { IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n"); return -ENOENT; @@ -1067,7 +1148,8 @@ ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) * Add a service into the service hash table */ static int -ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) +ip_vs_add_service(struct ip_vs_service_user_kern *u, + struct ip_vs_service **svc_p) { int ret = 0; struct ip_vs_scheduler *sched = NULL; @@ -1085,6 +1167,19 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) goto out_mod_dec; } +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out_err; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out_err; + } + } +#endif + svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC); if (svc == NULL) { IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n"); @@ -1096,8 +1191,9 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_set(&svc->usecnt, 1); atomic_set(&svc->refcnt, 0); + svc->af = u->af; svc->protocol = u->protocol; - svc->addr = u->addr; + ip_vs_addr_copy(svc->af, &svc->addr, &u->addr); svc->port = u->port; svc->fwmark = u->fwmark; svc->flags = u->flags; @@ -1121,7 +1217,10 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) atomic_inc(&ip_vs_nullsvc_counter); ip_vs_new_estimator(&svc->stats); - ip_vs_num_services++; + + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services++; /* Hash the service into the service table */ write_lock_bh(&__ip_vs_svc_lock); @@ -1156,7 +1255,7 @@ ip_vs_add_service(struct ip_vs_service_user *u, struct ip_vs_service **svc_p) * Edit a service and bind it with a new scheduler */ static int -ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) +ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; int ret = 0; @@ -1172,6 +1271,19 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) } old_sched = sched; +#ifdef CONFIG_IP_VS_IPV6 + if (u->af == AF_INET6) { + if (!sched->supports_ipv6) { + ret = -EAFNOSUPPORT; + goto out; + } + if ((u->netmask < 1) || (u->netmask > 128)) { + ret = -EINVAL; + goto out; + } + } +#endif + write_lock_bh(&__ip_vs_svc_lock); /* @@ -1193,7 +1305,7 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ if ((ret = ip_vs_unbind_scheduler(svc))) { old_sched = sched; - goto out; + goto out_unlock; } /* @@ -1212,12 +1324,15 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user *u) */ ip_vs_bind_scheduler(svc, old_sched); old_sched = sched; - goto out; + goto out_unlock; } } - out: + out_unlock: write_unlock_bh(&__ip_vs_svc_lock); +#ifdef CONFIG_IP_VS_IPV6 + out: +#endif if (old_sched) ip_vs_scheduler_put(old_sched); @@ -1236,7 +1351,10 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; - ip_vs_num_services--; + /* Count only IPv4 services for old get/setsockopt interface */ + if (svc->af == AF_INET) + ip_vs_num_services--; + ip_vs_kill_estimator(&svc->stats); /* Unbind scheduler */ @@ -1671,6 +1789,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) +__acquires(__ip_vs_svc_lock) { read_lock_bh(&__ip_vs_svc_lock); @@ -1724,6 +1843,7 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) +__releases(__ip_vs_svc_lock) { read_unlock_bh(&__ip_vs_svc_lock); } @@ -1744,15 +1864,25 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; - if (iter->table == ip_vs_svc_table) - seq_printf(seq, "%s %08X:%04X %s ", - ip_vs_proto_name(svc->protocol), - ntohl(svc->addr), - ntohs(svc->port), - svc->scheduler->name); - else + if (iter->table == ip_vs_svc_table) { +#ifdef CONFIG_IP_VS_IPV6 + if (svc->af == AF_INET6) + seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ", + ip_vs_proto_name(svc->protocol), + NIP6(svc->addr.in6), + ntohs(svc->port), + svc->scheduler->name); + else +#endif + seq_printf(seq, "%s %08X:%04X %s ", + ip_vs_proto_name(svc->protocol), + ntohl(svc->addr.ip), + ntohs(svc->port), + svc->scheduler->name); + } else { seq_printf(seq, "FWM %08X %s ", svc->fwmark, svc->scheduler->name); + } if (svc->flags & IP_VS_SVC_F_PERSISTENT) seq_printf(seq, "persistent %d %08X\n", @@ -1762,13 +1892,29 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); list_for_each_entry(dest, &svc->destinations, n_list) { - seq_printf(seq, - " -> %08X:%04X %-7s %-6d %-10d %-10d\n", - ntohl(dest->addr), ntohs(dest->port), - ip_vs_fwd_name(atomic_read(&dest->conn_flags)), - atomic_read(&dest->weight), - atomic_read(&dest->activeconns), - atomic_read(&dest->inactconns)); +#ifdef CONFIG_IP_VS_IPV6 + if (dest->af == AF_INET6) + seq_printf(seq, + " -> [" NIP6_FMT "]:%04X" + " %-7s %-6d %-10d %-10d\n", + NIP6(dest->addr.in6), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + else +#endif + seq_printf(seq, + " -> %08X:%04X " + "%-7s %-6d %-10d %-10d\n", + ntohl(dest->addr.ip), + ntohs(dest->port), + ip_vs_fwd_name(atomic_read(&dest->conn_flags)), + atomic_read(&dest->weight), + atomic_read(&dest->activeconns), + atomic_read(&dest->inactconns)); + } } return 0; @@ -1812,20 +1958,20 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v) " Conns Packets Packets Bytes Bytes\n"); spin_lock_bh(&ip_vs_stats.lock); - seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns, - ip_vs_stats.inpkts, ip_vs_stats.outpkts, - (unsigned long long) ip_vs_stats.inbytes, - (unsigned long long) ip_vs_stats.outbytes); + seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns, + ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts, + (unsigned long long) ip_vs_stats.ustats.inbytes, + (unsigned long long) ip_vs_stats.ustats.outbytes); /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */ seq_puts(seq, " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n"); seq_printf(seq,"%8X %8X %8X %16X %16X\n", - ip_vs_stats.cps, - ip_vs_stats.inpps, - ip_vs_stats.outpps, - ip_vs_stats.inbps, - ip_vs_stats.outbps); + ip_vs_stats.ustats.cps, + ip_vs_stats.ustats.inpps, + ip_vs_stats.ustats.outpps, + ip_vs_stats.ustats.inbps, + ip_vs_stats.ustats.outbps); spin_unlock_bh(&ip_vs_stats.lock); return 0; @@ -1900,14 +2046,44 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN, }; +static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, + struct ip_vs_service_user *usvc_compat) +{ + usvc->af = AF_INET; + usvc->protocol = usvc_compat->protocol; + usvc->addr.ip = usvc_compat->addr; + usvc->port = usvc_compat->port; + usvc->fwmark = usvc_compat->fwmark; + + /* Deep copy of sched_name is not needed here */ + usvc->sched_name = usvc_compat->sched_name; + + usvc->flags = usvc_compat->flags; + usvc->timeout = usvc_compat->timeout; + usvc->netmask = usvc_compat->netmask; +} + +static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, + struct ip_vs_dest_user *udest_compat) +{ + udest->addr.ip = udest_compat->addr; + udest->port = udest_compat->port; + udest->conn_flags = udest_compat->conn_flags; + udest->weight = udest_compat->weight; + udest->u_threshold = udest_compat->u_threshold; + udest->l_threshold = udest_compat->l_threshold; +} + static int do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) { int ret; unsigned char arg[MAX_ARG_LEN]; - struct ip_vs_service_user *usvc; + struct ip_vs_service_user *usvc_compat; + struct ip_vs_service_user_kern usvc; struct ip_vs_service *svc; - struct ip_vs_dest_user *udest; + struct ip_vs_dest_user *udest_compat; + struct ip_vs_dest_user_kern udest; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -1947,35 +2123,40 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) goto out_unlock; } - usvc = (struct ip_vs_service_user *)arg; - udest = (struct ip_vs_dest_user *)(usvc + 1); + usvc_compat = (struct ip_vs_service_user *)arg; + udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1); + + /* We only use the new structs internally, so copy userspace compat + * structs to extended internal versions */ + ip_vs_copy_usvc_compat(&usvc, usvc_compat); + ip_vs_copy_udest_compat(&udest, udest_compat); if (cmd == IP_VS_SO_SET_ZERO) { /* if no service address is set, zero counters in all */ - if (!usvc->fwmark && !usvc->addr && !usvc->port) { + if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) { ret = ip_vs_zero_all(); goto out_unlock; } } /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */ - if (usvc->protocol!=IPPROTO_TCP && usvc->protocol!=IPPROTO_UDP) { + if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) { IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n", - usvc->protocol, NIPQUAD(usvc->addr), - ntohs(usvc->port), usvc->sched_name); + usvc.protocol, NIPQUAD(usvc.addr.ip), + ntohs(usvc.port), usvc.sched_name); ret = -EFAULT; goto out_unlock; } /* Lookup the exact service by <protocol, addr, port> or fwmark */ - if (usvc->fwmark == 0) - svc = __ip_vs_service_get(usvc->protocol, - usvc->addr, usvc->port); + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); else - svc = __ip_vs_svc_fwm_get(usvc->fwmark); + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); if (cmd != IP_VS_SO_SET_ADD - && (svc == NULL || svc->protocol != usvc->protocol)) { + && (svc == NULL || svc->protocol != usvc.protocol)) { ret = -ESRCH; goto out_unlock; } @@ -1985,10 +2166,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) if (svc != NULL) ret = -EEXIST; else - ret = ip_vs_add_service(usvc, &svc); + ret = ip_vs_add_service(&usvc, &svc); break; case IP_VS_SO_SET_EDIT: - ret = ip_vs_edit_service(svc, usvc); + ret = ip_vs_edit_service(svc, &usvc); break; case IP_VS_SO_SET_DEL: ret = ip_vs_del_service(svc); @@ -1999,13 +2180,13 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) ret = ip_vs_zero_service(svc); break; case IP_VS_SO_SET_ADDDEST: - ret = ip_vs_add_dest(svc, udest); + ret = ip_vs_add_dest(svc, &udest); break; case IP_VS_SO_SET_EDITDEST: - ret = ip_vs_edit_dest(svc, udest); + ret = ip_vs_edit_dest(svc, &udest); break; case IP_VS_SO_SET_DELDEST: - ret = ip_vs_del_dest(svc, udest); + ret = ip_vs_del_dest(svc, &udest); break; default: ret = -EINVAL; @@ -2028,7 +2209,7 @@ static void ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src) { spin_lock_bh(&src->lock); - memcpy(dst, src, (char*)&src->lock - (char*)src); + memcpy(dst, &src->ustats, sizeof(*dst)); spin_unlock_bh(&src->lock); } @@ -2036,7 +2217,7 @@ static void ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) { dst->protocol = src->protocol; - dst->addr = src->addr; + dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name)); @@ -2058,6 +2239,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2073,6 +2258,10 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get, for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { + /* Only expose IPv4 entries to old interface */ + if (svc->af != AF_INET) + continue; + if (count >= get->num_services) goto out; memset(&entry, 0, sizeof(entry)); @@ -2094,13 +2283,15 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, struct ip_vs_get_dests __user *uptr) { struct ip_vs_service *svc; + union nf_inet_addr addr = { .ip = get->addr }; int ret = 0; if (get->fwmark) - svc = __ip_vs_svc_fwm_get(get->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark); else - svc = __ip_vs_service_get(get->protocol, - get->addr, get->port); + svc = __ip_vs_service_get(AF_INET, get->protocol, &addr, + get->port); + if (svc) { int count = 0; struct ip_vs_dest *dest; @@ -2110,7 +2301,7 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get, if (count >= get->num_dests) break; - entry.addr = dest->addr; + entry.addr = dest->addr.ip; entry.port = dest->port; entry.conn_flags = atomic_read(&dest->conn_flags); entry.weight = atomic_read(&dest->weight); @@ -2235,13 +2426,15 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { struct ip_vs_service_entry *entry; struct ip_vs_service *svc; + union nf_inet_addr addr; entry = (struct ip_vs_service_entry *)arg; + addr.ip = entry->addr; if (entry->fwmark) - svc = __ip_vs_svc_fwm_get(entry->fwmark); + svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark); else - svc = __ip_vs_service_get(entry->protocol, - entry->addr, entry->port); + svc = __ip_vs_service_get(AF_INET, entry->protocol, + &addr, entry->port); if (svc) { ip_vs_copy_service(entry, svc); if (copy_to_user(user, entry, sizeof(*entry)) != 0) @@ -2320,6 +2513,875 @@ static struct nf_sockopt_ops ip_vs_sockopts = { .owner = THIS_MODULE, }; +/* + * Generic Netlink interface + */ + +/* IPVS genetlink family */ +static struct genl_family ip_vs_genl_family = { + .id = GENL_ID_GENERATE, + .hdrsize = 0, + .name = IPVS_GENL_NAME, + .version = IPVS_GENL_VERSION, + .maxattr = IPVS_CMD_MAX, +}; + +/* Policy used for first-level command attributes */ +static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = { + [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED }, + [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 }, + [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */ +static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = { + [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 }, + [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING, + .len = IP_VS_IFNAME_MAXLEN }, + [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */ +static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { + [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, + .len = sizeof(struct ip_vs_flags) }, + [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 }, + [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */ +static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = { + [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY, + .len = sizeof(union nf_inet_addr) }, + [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 }, + [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 }, + [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED }, +}; + +static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type, + struct ip_vs_stats *stats) +{ + struct nlattr *nl_stats = nla_nest_start(skb, container_type); + if (!nl_stats) + return -EMSGSIZE; + + spin_lock_bh(&stats->lock); + + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes); + NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps); + NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps); + + spin_unlock_bh(&stats->lock); + + nla_nest_end(skb, nl_stats); + + return 0; + +nla_put_failure: + spin_unlock_bh(&stats->lock); + nla_nest_cancel(skb, nl_stats); + return -EMSGSIZE; +} + +static int ip_vs_genl_fill_service(struct sk_buff *skb, + struct ip_vs_service *svc) +{ + struct nlattr *nl_service; + struct ip_vs_flags flags = { .flags = svc->flags, + .mask = ~0 }; + + nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE); + if (!nl_service) + return -EMSGSIZE; + + NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af); + + if (svc->fwmark) { + NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark); + } else { + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol); + NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr); + NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port); + } + + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); + NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); + + if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_service); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_service); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_service(struct sk_buff *skb, + struct ip_vs_service *svc, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_SERVICE); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_service(skb, svc) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_services(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0, i; + int start = cb->args[0]; + struct ip_vs_service *svc; + + mutex_lock(&__ip_vs_mutex); + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + + for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { + list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + cb->args[0] = idx; + + return skb->len; +} + +static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1]; + struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr; + + /* Parse mandatory identifying service fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy)) + return -EINVAL; + + nla_af = attrs[IPVS_SVC_ATTR_AF]; + nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL]; + nla_addr = attrs[IPVS_SVC_ATTR_ADDR]; + nla_port = attrs[IPVS_SVC_ATTR_PORT]; + nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK]; + + if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr)))) + return -EINVAL; + + usvc->af = nla_get_u16(nla_af); +#ifdef CONFIG_IP_VS_IPV6 + if (usvc->af != AF_INET && usvc->af != AF_INET6) +#else + if (usvc->af != AF_INET) +#endif + return -EAFNOSUPPORT; + + if (nla_fwmark) { + usvc->protocol = IPPROTO_TCP; + usvc->fwmark = nla_get_u32(nla_fwmark); + } else { + usvc->protocol = nla_get_u16(nla_protocol); + nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr)); + usvc->port = nla_get_u16(nla_port); + usvc->fwmark = 0; + } + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_sched, *nla_flags, *nla_timeout, + *nla_netmask; + struct ip_vs_flags flags; + struct ip_vs_service *svc; + + nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; + nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; + nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; + + if (!(nla_sched && nla_flags && nla_timeout && nla_netmask)) + return -EINVAL; + + nla_memcpy(&flags, nla_flags, sizeof(flags)); + + /* prefill flags from service if it already exists */ + if (usvc->fwmark) + svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark); + else + svc = __ip_vs_service_get(usvc->af, usvc->protocol, + &usvc->addr, usvc->port); + if (svc) { + usvc->flags = svc->flags; + ip_vs_service_put(svc); + } else + usvc->flags = 0; + + /* set new flags from userland */ + usvc->flags = (usvc->flags & ~flags.mask) | + (flags.flags & flags.mask); + usvc->sched_name = nla_data(nla_sched); + usvc->timeout = nla_get_u32(nla_timeout); + usvc->netmask = nla_get_u32(nla_netmask); + } + + return 0; +} + +static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla) +{ + struct ip_vs_service_user_kern usvc; + int ret; + + ret = ip_vs_genl_parse_service(&usvc, nla, 0); + if (ret) + return ERR_PTR(ret); + + if (usvc.fwmark) + return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + else + return __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); +} + +static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest) +{ + struct nlattr *nl_dest; + + nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST); + if (!nl_dest) + return -EMSGSIZE; + + NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr); + NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port); + + NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD, + atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS, + atomic_read(&dest->activeconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS, + atomic_read(&dest->inactconns)); + NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS, + atomic_read(&dest->persistconns)); + + if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats)) + goto nla_put_failure; + + nla_nest_end(skb, nl_dest); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_dest); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, + struct netlink_callback *cb) +{ + void *hdr; + + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DEST); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_dest(skb, dest) < 0) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_dests(struct sk_buff *skb, + struct netlink_callback *cb) +{ + int idx = 0; + int start = cb->args[0]; + struct ip_vs_service *svc; + struct ip_vs_dest *dest; + struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1]; + + mutex_lock(&__ip_vs_mutex); + + /* Try to find the service for which to dump destinations */ + if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs, + IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy)) + goto out_err; + + svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc) || svc == NULL) + goto out_err; + + /* Dump the destinations */ + list_for_each_entry(dest, &svc->destinations, n_list) { + if (++idx <= start) + continue; + if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { + idx--; + goto nla_put_failure; + } + } + +nla_put_failure: + cb->args[0] = idx; + ip_vs_service_put(svc); + +out_err: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest, + struct nlattr *nla, int full_entry) +{ + struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1]; + struct nlattr *nla_addr, *nla_port; + + /* Parse mandatory identifying destination fields first */ + if (nla == NULL || + nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy)) + return -EINVAL; + + nla_addr = attrs[IPVS_DEST_ATTR_ADDR]; + nla_port = attrs[IPVS_DEST_ATTR_PORT]; + + if (!(nla_addr && nla_port)) + return -EINVAL; + + nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr)); + udest->port = nla_get_u16(nla_port); + + /* If a full entry was requested, check for the additional fields */ + if (full_entry) { + struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh, + *nla_l_thresh; + + nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD]; + nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT]; + nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH]; + nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH]; + + if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh)) + return -EINVAL; + + udest->conn_flags = nla_get_u32(nla_fwd) + & IP_VS_CONN_F_FWD_MASK; + udest->weight = nla_get_u32(nla_weight); + udest->u_threshold = nla_get_u32(nla_u_thresh); + udest->l_threshold = nla_get_u32(nla_l_thresh); + } + + return 0; +} + +static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid) +{ + struct nlattr *nl_daemon; + + nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON); + if (!nl_daemon) + return -EMSGSIZE; + + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state); + NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn); + NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid); + + nla_nest_end(skb, nl_daemon); + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nl_daemon); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, + const char *mcast_ifn, __be32 syncid, + struct netlink_callback *cb) +{ + void *hdr; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + &ip_vs_genl_family, NLM_F_MULTI, + IPVS_CMD_NEW_DAEMON); + if (!hdr) + return -EMSGSIZE; + + if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid)) + goto nla_put_failure; + + return genlmsg_end(skb, hdr); + +nla_put_failure: + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; +} + +static int ip_vs_genl_dump_daemons(struct sk_buff *skb, + struct netlink_callback *cb) +{ + mutex_lock(&__ip_vs_mutex); + if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER, + ip_vs_master_mcast_ifn, + ip_vs_master_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[0] = 1; + } + + if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) { + if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP, + ip_vs_backup_mcast_ifn, + ip_vs_backup_syncid, cb) < 0) + goto nla_put_failure; + + cb->args[1] = 1; + } + +nla_put_failure: + mutex_unlock(&__ip_vs_mutex); + + return skb->len; +} + +static int ip_vs_genl_new_daemon(struct nlattr **attrs) +{ + if (!(attrs[IPVS_DAEMON_ATTR_STATE] && + attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && + attrs[IPVS_DAEMON_ATTR_SYNC_ID])) + return -EINVAL; + + return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]), + nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID])); +} + +static int ip_vs_genl_del_daemon(struct nlattr **attrs) +{ + if (!attrs[IPVS_DAEMON_ATTR_STATE]) + return -EINVAL; + + return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE])); +} + +static int ip_vs_genl_set_config(struct nlattr **attrs) +{ + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]) + t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]) + t.tcp_fin_timeout = + nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]); + + if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]) + t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]); + + return ip_vs_set_timeout(&t); +} + +static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct ip_vs_service *svc = NULL; + struct ip_vs_service_user_kern usvc; + struct ip_vs_dest_user_kern udest; + int ret = 0, cmd; + int need_full_svc = 0, need_full_dest = 0; + + cmd = info->genlhdr->cmd; + + mutex_lock(&__ip_vs_mutex); + + if (cmd == IPVS_CMD_FLUSH) { + ret = ip_vs_flush(); + goto out; + } else if (cmd == IPVS_CMD_SET_CONFIG) { + ret = ip_vs_genl_set_config(info->attrs); + goto out; + } else if (cmd == IPVS_CMD_NEW_DAEMON || + cmd == IPVS_CMD_DEL_DAEMON) { + + struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1]; + + if (!info->attrs[IPVS_CMD_ATTR_DAEMON] || + nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX, + info->attrs[IPVS_CMD_ATTR_DAEMON], + ip_vs_daemon_policy)) { + ret = -EINVAL; + goto out; + } + + if (cmd == IPVS_CMD_NEW_DAEMON) + ret = ip_vs_genl_new_daemon(daemon_attrs); + else + ret = ip_vs_genl_del_daemon(daemon_attrs); + goto out; + } else if (cmd == IPVS_CMD_ZERO && + !info->attrs[IPVS_CMD_ATTR_SERVICE]) { + ret = ip_vs_zero_all(); + goto out; + } + + /* All following commands require a service argument, so check if we + * received a valid one. We need a full service specification when + * adding / editing a service. Only identifying members otherwise. */ + if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) + need_full_svc = 1; + + ret = ip_vs_genl_parse_service(&usvc, + info->attrs[IPVS_CMD_ATTR_SERVICE], + need_full_svc); + if (ret) + goto out; + + /* Lookup the exact service by <protocol, addr, port> or fwmark */ + if (usvc.fwmark == 0) + svc = __ip_vs_service_get(usvc.af, usvc.protocol, + &usvc.addr, usvc.port); + else + svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark); + + /* Unless we're adding a new service, the service must already exist */ + if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) { + ret = -ESRCH; + goto out; + } + + /* Destination commands require a valid destination argument. For + * adding / editing a destination, we need a full destination + * specification. */ + if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST || + cmd == IPVS_CMD_DEL_DEST) { + if (cmd != IPVS_CMD_DEL_DEST) + need_full_dest = 1; + + ret = ip_vs_genl_parse_dest(&udest, + info->attrs[IPVS_CMD_ATTR_DEST], + need_full_dest); + if (ret) + goto out; + } + + switch (cmd) { + case IPVS_CMD_NEW_SERVICE: + if (svc == NULL) + ret = ip_vs_add_service(&usvc, &svc); + else + ret = -EEXIST; + break; + case IPVS_CMD_SET_SERVICE: + ret = ip_vs_edit_service(svc, &usvc); + break; + case IPVS_CMD_DEL_SERVICE: + ret = ip_vs_del_service(svc); + break; + case IPVS_CMD_NEW_DEST: + ret = ip_vs_add_dest(svc, &udest); + break; + case IPVS_CMD_SET_DEST: + ret = ip_vs_edit_dest(svc, &udest); + break; + case IPVS_CMD_DEL_DEST: + ret = ip_vs_del_dest(svc, &udest); + break; + case IPVS_CMD_ZERO: + ret = ip_vs_zero_service(svc); + break; + default: + ret = -EINVAL; + } + +out: + if (svc) + ip_vs_service_put(svc); + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + +static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *msg; + void *reply; + int ret, cmd, reply_cmd; + + cmd = info->genlhdr->cmd; + + if (cmd == IPVS_CMD_GET_SERVICE) + reply_cmd = IPVS_CMD_NEW_SERVICE; + else if (cmd == IPVS_CMD_GET_INFO) + reply_cmd = IPVS_CMD_SET_INFO; + else if (cmd == IPVS_CMD_GET_CONFIG) + reply_cmd = IPVS_CMD_SET_CONFIG; + else { + IP_VS_ERR("unknown Generic Netlink command\n"); + return -EINVAL; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return -ENOMEM; + + mutex_lock(&__ip_vs_mutex); + + reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); + if (reply == NULL) + goto nla_put_failure; + + switch (cmd) { + case IPVS_CMD_GET_SERVICE: + { + struct ip_vs_service *svc; + + svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]); + if (IS_ERR(svc)) { + ret = PTR_ERR(svc); + goto out_err; + } else if (svc) { + ret = ip_vs_genl_fill_service(msg, svc); + ip_vs_service_put(svc); + if (ret) + goto nla_put_failure; + } else { + ret = -ESRCH; + goto out_err; + } + + break; + } + + case IPVS_CMD_GET_CONFIG: + { + struct ip_vs_timeout_user t; + + __ip_vs_get_timeouts(&t); +#ifdef CONFIG_IP_VS_PROTO_TCP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout); + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN, + t.tcp_fin_timeout); +#endif +#ifdef CONFIG_IP_VS_PROTO_UDP + NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout); +#endif + + break; + } + + case IPVS_CMD_GET_INFO: + NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE); + NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, + IP_VS_CONN_TAB_SIZE); + break; + } + + genlmsg_end(msg, reply); + ret = genlmsg_unicast(msg, info->snd_pid); + goto out; + +nla_put_failure: + IP_VS_ERR("not enough space in Netlink message\n"); + ret = -EMSGSIZE; + +out_err: + nlmsg_free(msg); +out: + mutex_unlock(&__ip_vs_mutex); + + return ret; +} + + +static struct genl_ops ip_vs_genl_ops[] __read_mostly = { + { + .cmd = IPVS_CMD_NEW_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_SERVICE, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_SERVICE, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + .dumpit = ip_vs_genl_dump_services, + .policy = ip_vs_cmd_policy, + }, + { + .cmd = IPVS_CMD_NEW_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_SET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DEST, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .dumpit = ip_vs_genl_dump_dests, + }, + { + .cmd = IPVS_CMD_NEW_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_DEL_DAEMON, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_DAEMON, + .flags = GENL_ADMIN_PERM, + .dumpit = ip_vs_genl_dump_daemons, + }, + { + .cmd = IPVS_CMD_SET_CONFIG, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_GET_CONFIG, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_GET_INFO, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_get_cmd, + }, + { + .cmd = IPVS_CMD_ZERO, + .flags = GENL_ADMIN_PERM, + .policy = ip_vs_cmd_policy, + .doit = ip_vs_genl_set_cmd, + }, + { + .cmd = IPVS_CMD_FLUSH, + .flags = GENL_ADMIN_PERM, + .doit = ip_vs_genl_set_cmd, + }, +}; + +static int __init ip_vs_genl_register(void) +{ + int ret, i; + + ret = genl_register_family(&ip_vs_genl_family); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) { + ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]); + if (ret) + goto err_out; + } + return 0; + +err_out: + genl_unregister_family(&ip_vs_genl_family); + return ret; +} + +static void ip_vs_genl_unregister(void) +{ + genl_unregister_family(&ip_vs_genl_family); +} + +/* End of Generic Netlink interface definitions */ + int __init ip_vs_control_init(void) { @@ -2334,6 +3396,13 @@ int __init ip_vs_control_init(void) return ret; } + ret = ip_vs_genl_register(); + if (ret) { + IP_VS_ERR("cannot register Generic Netlink interface.\n"); + nf_unregister_sockopt(&ip_vs_sockopts); + return ret; + } + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops); proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops); @@ -2368,6 +3437,7 @@ void ip_vs_control_cleanup(void) unregister_sysctl_table(sysctl_header); proc_net_remove(&init_net, "ip_vs_stats"); proc_net_remove(&init_net, "ip_vs"); + ip_vs_genl_unregister(); nf_unregister_sockopt(&ip_vs_sockopts); LeaveFunction(2); } diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/netfilter/ipvs/ip_vs_dh.c index fa66824d264f..a16943fd72f1 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/netfilter/ipvs/ip_vs_dh.c @@ -218,7 +218,7 @@ ip_vs_dh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "DH: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->daddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -234,6 +234,9 @@ static struct ip_vs_scheduler ip_vs_dh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_dh_init_svc, .done_service = ip_vs_dh_done_svc, .update_service = ip_vs_dh_update_svc, diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 5a20f93bd7f9..2eb2860dabb5 100644 --- a/net/ipv4/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -65,37 +65,37 @@ static void estimation_timer(unsigned long arg) s = container_of(e, struct ip_vs_stats, est); spin_lock(&s->lock); - n_conns = s->conns; - n_inpkts = s->inpkts; - n_outpkts = s->outpkts; - n_inbytes = s->inbytes; - n_outbytes = s->outbytes; + n_conns = s->ustats.conns; + n_inpkts = s->ustats.inpkts; + n_outpkts = s->ustats.outpkts; + n_inbytes = s->ustats.inbytes; + n_outbytes = s->ustats.outbytes; /* scaled by 2^10, but divided 2 seconds */ rate = (n_conns - e->last_conns)<<9; e->last_conns = n_conns; e->cps += ((long)rate - (long)e->cps)>>2; - s->cps = (e->cps+0x1FF)>>10; + s->ustats.cps = (e->cps+0x1FF)>>10; rate = (n_inpkts - e->last_inpkts)<<9; e->last_inpkts = n_inpkts; e->inpps += ((long)rate - (long)e->inpps)>>2; - s->inpps = (e->inpps+0x1FF)>>10; + s->ustats.inpps = (e->inpps+0x1FF)>>10; rate = (n_outpkts - e->last_outpkts)<<9; e->last_outpkts = n_outpkts; e->outpps += ((long)rate - (long)e->outpps)>>2; - s->outpps = (e->outpps+0x1FF)>>10; + s->ustats.outpps = (e->outpps+0x1FF)>>10; rate = (n_inbytes - e->last_inbytes)<<4; e->last_inbytes = n_inbytes; e->inbps += ((long)rate - (long)e->inbps)>>2; - s->inbps = (e->inbps+0xF)>>5; + s->ustats.inbps = (e->inbps+0xF)>>5; rate = (n_outbytes - e->last_outbytes)<<4; e->last_outbytes = n_outbytes; e->outbps += ((long)rate - (long)e->outbps)>>2; - s->outbps = (e->outbps+0xF)>>5; + s->ustats.outbps = (e->outbps+0xF)>>5; spin_unlock(&s->lock); } spin_unlock(&est_lock); @@ -108,24 +108,22 @@ void ip_vs_new_estimator(struct ip_vs_stats *stats) INIT_LIST_HEAD(&est->list); - est->last_conns = stats->conns; - est->cps = stats->cps<<10; + est->last_conns = stats->ustats.conns; + est->cps = stats->ustats.cps<<10; - est->last_inpkts = stats->inpkts; - est->inpps = stats->inpps<<10; + est->last_inpkts = stats->ustats.inpkts; + est->inpps = stats->ustats.inpps<<10; - est->last_outpkts = stats->outpkts; - est->outpps = stats->outpps<<10; + est->last_outpkts = stats->ustats.outpkts; + est->outpps = stats->ustats.outpps<<10; - est->last_inbytes = stats->inbytes; - est->inbps = stats->inbps<<5; + est->last_inbytes = stats->ustats.inbytes; + est->inbps = stats->ustats.inbps<<5; - est->last_outbytes = stats->outbytes; - est->outbps = stats->outbps<<5; + est->last_outbytes = stats->ustats.outbytes; + est->outbps = stats->ustats.outbps<<5; spin_lock_bh(&est_lock); - if (list_empty(&est_list)) - mod_timer(&est_timer, jiffies + 2 * HZ); list_add(&est->list, &est_list); spin_unlock_bh(&est_lock); } @@ -136,11 +134,6 @@ void ip_vs_kill_estimator(struct ip_vs_stats *stats) spin_lock_bh(&est_lock); list_del(&est->list); - while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) { - spin_unlock_bh(&est_lock); - cpu_relax(); - spin_lock_bh(&est_lock); - } spin_unlock_bh(&est_lock); } @@ -160,3 +153,14 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats) est->inbps = 0; est->outbps = 0; } + +int __init ip_vs_estimator_init(void) +{ + mod_timer(&est_timer, jiffies + 2 * HZ); + return 0; +} + +void ip_vs_estimator_cleanup(void) +{ + del_timer_sync(&est_timer); +} diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index c1c758e4f733..2e7dbd8b73a4 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -140,13 +140,21 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __be32 from; + union nf_inet_addr from; __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; int ret; +#ifdef CONFIG_IP_VS_IPV6 + /* This application helper doesn't work with IPv6 yet, + * so turn this into a no-op for IPv6 packets + */ + if (cp->af == AF_INET6) + return 1; +#endif + *diff = 0; /* Only useful for established sessions */ @@ -166,24 +174,25 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, if (ip_vs_ftp_get_addrport(data, data_limit, SERVER_STRING, sizeof(SERVER_STRING)-1, ')', - &from, &port, + &from.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PASV response (%u.%u.%u.%u:%d) -> " "%u.%u.%u.%u:%d detected\n", - NIPQUAD(from), ntohs(port), NIPQUAD(cp->caddr), 0); + NIPQUAD(from.ip), ntohs(port), + NIPQUAD(cp->caddr.ip), 0); /* * Now update or create an connection entry for it */ - n_cp = ip_vs_conn_out_get(iph->protocol, from, port, - cp->caddr, 0); + n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, + &cp->caddr, 0); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - cp->caddr, 0, - cp->vaddr, port, - from, port, + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &cp->caddr, 0, + &cp->vaddr, port, + &from, port, IP_VS_CONN_F_NO_CPORT, cp->dest); if (!n_cp) @@ -196,9 +205,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Replace the old passive address with the new one */ - from = n_cp->vaddr; + from.ip = n_cp->vaddr.ip; port = n_cp->vport; - sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), + sprintf(buf, "%d,%d,%d,%d,%d,%d", NIPQUAD(from.ip), (ntohs(port)>>8)&255, ntohs(port)&255); buf_len = strlen(buf); @@ -243,10 +252,18 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __be32 to; + union nf_inet_addr to; __be16 port; struct ip_vs_conn *n_cp; +#ifdef CONFIG_IP_VS_IPV6 + /* This application helper doesn't work with IPv6 yet, + * so turn this into a no-op for IPv6 packets + */ + if (cp->af == AF_INET6) + return 1; +#endif + /* no diff required for incoming packets */ *diff = 0; @@ -291,12 +308,12 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ if (ip_vs_ftp_get_addrport(data_start, data_limit, CLIENT_STRING, sizeof(CLIENT_STRING)-1, - '\r', &to, &port, + '\r', &to.ip, &port, &start, &end) != 1) return 1; IP_VS_DBG(7, "PORT %u.%u.%u.%u:%d detected\n", - NIPQUAD(to), ntohs(port)); + NIPQUAD(to.ip), ntohs(port)); /* Passive mode off */ cp->app_data = NULL; @@ -306,16 +323,16 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, */ IP_VS_DBG(7, "protocol %s %u.%u.%u.%u:%d %u.%u.%u.%u:%d\n", ip_vs_proto_name(iph->protocol), - NIPQUAD(to), ntohs(port), NIPQUAD(cp->vaddr), 0); + NIPQUAD(to.ip), ntohs(port), NIPQUAD(cp->vaddr.ip), 0); - n_cp = ip_vs_conn_in_get(iph->protocol, - to, port, - cp->vaddr, htons(ntohs(cp->vport)-1)); + n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1)); if (!n_cp) { - n_cp = ip_vs_conn_new(IPPROTO_TCP, - to, port, - cp->vaddr, htons(ntohs(cp->vport)-1), - cp->daddr, htons(ntohs(cp->dport)-1), + n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, + &to, port, + &cp->vaddr, htons(ntohs(cp->vport)-1), + &cp->daddr, htons(ntohs(cp->dport)-1), 0, cp->dest); if (!n_cp) diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index 7a6a319f544a..6ecef3518cac 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -96,7 +96,6 @@ struct ip_vs_lblc_entry { * IPVS lblc hash table */ struct ip_vs_lblc_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -123,31 +122,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblc_entry, which is a mapping of a destionation - * IP address to a server. - */ -static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) -{ - struct ip_vs_lblc_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblc_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - atomic_inc(&dest->refcnt); - en->dest = dest; - - return en; -} - - static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) { list_del(&en->list); @@ -173,55 +147,66 @@ static inline unsigned ip_vs_lblc_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblc_table. * returns bool success. */ -static int +static void ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) { - unsigned hash; - - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblc_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } - - /* - * Hash by destination IP address - */ - hash = ip_vs_lblc_hashkey(en->addr); + unsigned hash = ip_vs_lblc_hashkey(en->addr); - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblc_entry associated with supplied parameters. + * Get ip_vs_lblc_entry associated with supplied parameters. Called under read + * lock */ static inline struct ip_vs_lblc_entry * ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblc_hashkey(addr); struct ip_vs_lblc_entry *en; - hash = ip_vs_lblc_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; - read_lock(&tbl->lock); + return NULL; +} - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; + +/* + * Create or update an ip_vs_lblc_entry, which is a mapping of a destination IP + * address to a server. Called under write lock. + */ +static inline struct ip_vs_lblc_entry * +ip_vs_lblc_new(struct ip_vs_lblc_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblc_entry *en; + + en = ip_vs_lblc_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblc_new(): no memory\n"); + return NULL; } - } - read_unlock(&tbl->lock); + en->addr = daddr; + en->lastuse = jiffies; - return NULL; + atomic_inc(&dest->refcnt); + en->dest = dest; + + ip_vs_lblc_hash(tbl, en); + } else if (en->dest != dest) { + atomic_dec(&en->dest->refcnt); + atomic_inc(&dest->refcnt); + en->dest = dest; + } + + return en; } @@ -230,30 +215,29 @@ ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) */ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl) { - int i; struct ip_vs_lblc_entry *en, *nxt; + int i; for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { - write_lock(&tbl->lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) +static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_entry *en, *nxt; unsigned long now = jiffies; int i, j; - struct ip_vs_lblc_entry *en, *nxt; for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + sysctl_ip_vs_lblc_expiration)) @@ -262,7 +246,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) ip_vs_lblc_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -281,17 +265,16 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_lblc_table *tbl) */ static void ip_vs_lblc_check_expire(unsigned long data) { - struct ip_vs_lblc_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblc_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblc_entry *en, *nxt; - tbl = (struct ip_vs_lblc_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblc_full_check(tbl); + ip_vs_lblc_full_check(svc); tbl->counter = 1; goto out; } @@ -308,7 +291,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLC_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse + ENTRY_TIMEOUT)) continue; @@ -317,7 +300,7 @@ static void ip_vs_lblc_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -336,15 +319,14 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblc_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblc_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblc_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblc_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -352,7 +334,6 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) for (i=0; i<IP_VS_LBLC_TAB_SIZE; i++) { INIT_LIST_HEAD(&tbl->bucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLC_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -361,9 +342,8 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -380,22 +360,16 @@ static int ip_vs_lblc_done_svc(struct ip_vs_service *svc) ip_vs_lblc_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLC hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblc_table)); + sizeof(*tbl)); return 0; } -static int ip_vs_lblc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblc_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -448,7 +422,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLC: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -484,47 +458,55 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblc_table *tbl; - struct ip_vs_lblc_entry *en; + struct ip_vs_lblc_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblc_entry *en; IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblc_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblc_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblc_new(iph->daddr, dest); - if (en == NULL) { - return NULL; - } - ip_vs_lblc_hash(tbl, en); - } else { - dest = en->dest; - if (!(dest->flags & IP_VS_DEST_F_AVAILABLE) - || atomic_read(&dest->weight) <= 0 - || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - atomic_dec(&en->dest->refcnt); - atomic_inc(&dest->refcnt); - en->dest = dest; - } + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* + * If the destination is not available, i.e. it's in the trash, + * we must ignore it, as it may be removed from under our feet, + * if someone drops our reference count. Our caller only makes + * sure that destinations, that are not in the trash, are not + * moved to the trash, while we are scheduling. But anyone can + * free up entries from the trash at any time. + */ + + if (en->dest->flags & IP_VS_DEST_F_AVAILABLE) + dest = en->dest; + } + read_unlock(&svc->sched_lock); + + /* If the destination has a weight and is not overloaded, use it */ + if (dest && atomic_read(&dest->weight) > 0 && !is_overloaded(dest, svc)) + goto out; + + /* No cache entry or it is invalid, time to schedule */ + dest = __ip_vs_lblc_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; } - en->lastuse = jiffies; + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblc_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLC: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), - NIPQUAD(dest->addr), + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -540,9 +522,11 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblc_init_svc, .done_service = ip_vs_lblc_done_svc, - .update_service = ip_vs_lblc_update_svc, .schedule = ip_vs_lblc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c234e73968a6..1f75ea83bcf8 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -106,7 +106,7 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) return NULL; } - e = kmalloc(sizeof(struct ip_vs_dest_list), GFP_ATOMIC); + e = kmalloc(sizeof(*e), GFP_ATOMIC); if (e == NULL) { IP_VS_ERR("ip_vs_dest_set_insert(): no memory\n"); return NULL; @@ -116,11 +116,9 @@ ip_vs_dest_set_insert(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) e->dest = dest; /* link it to the list */ - write_lock(&set->lock); e->next = set->list; set->list = e; atomic_inc(&set->size); - write_unlock(&set->lock); set->lastmod = jiffies; return e; @@ -131,7 +129,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) { struct ip_vs_dest_list *e, **ep; - write_lock(&set->lock); for (ep=&set->list, e=*ep; e!=NULL; e=*ep) { if (e->dest == dest) { /* HIT */ @@ -144,7 +141,6 @@ ip_vs_dest_set_erase(struct ip_vs_dest_set *set, struct ip_vs_dest *dest) } ep = &e->next; } - write_unlock(&set->lock); } static void ip_vs_dest_set_eraseall(struct ip_vs_dest_set *set) @@ -174,7 +170,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { least = e->dest; @@ -188,7 +183,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted least load */ @@ -207,11 +201,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_min(struct ip_vs_dest_set *set) loh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_min: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -229,7 +222,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) if (set == NULL) return NULL; - read_lock(&set->lock); /* select the first destination server, whose weight > 0 */ for (e=set->list; e!=NULL; e=e->next) { most = e->dest; @@ -239,7 +231,6 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) goto nextstage; } } - read_unlock(&set->lock); return NULL; /* find the destination with the weighted most load */ @@ -256,11 +247,10 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) moh = doh; } } - read_unlock(&set->lock); IP_VS_DBG(6, "ip_vs_dest_set_max: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(most->addr), ntohs(most->port), + NIPQUAD(most->addr.ip), ntohs(most->port), atomic_read(&most->activeconns), atomic_read(&most->refcnt), atomic_read(&most->weight), moh); @@ -284,7 +274,6 @@ struct ip_vs_lblcr_entry { * IPVS lblcr hash table */ struct ip_vs_lblcr_table { - rwlock_t lock; /* lock for this table */ struct list_head bucket[IP_VS_LBLCR_TAB_SIZE]; /* hash bucket */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ @@ -311,32 +300,6 @@ static ctl_table vs_vars_table[] = { static struct ctl_table_header * sysctl_header; -/* - * new/free a ip_vs_lblcr_entry, which is a mapping of a destination - * IP address to a server. - */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) -{ - struct ip_vs_lblcr_entry *en; - - en = kmalloc(sizeof(struct ip_vs_lblcr_entry), GFP_ATOMIC); - if (en == NULL) { - IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); - return NULL; - } - - INIT_LIST_HEAD(&en->list); - en->addr = daddr; - - /* initilize its dest set */ - atomic_set(&(en->set.size), 0); - en->set.list = NULL; - rwlock_init(&en->set.lock); - - return en; -} - - static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) { list_del(&en->list); @@ -358,55 +321,68 @@ static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) * Hash an entry in the ip_vs_lblcr_table. * returns bool success. */ -static int +static void ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) { - unsigned hash; + unsigned hash = ip_vs_lblcr_hashkey(en->addr); - if (!list_empty(&en->list)) { - IP_VS_ERR("ip_vs_lblcr_hash(): request for already hashed, " - "called from %p\n", __builtin_return_address(0)); - return 0; - } - - /* - * Hash by destination IP address - */ - hash = ip_vs_lblcr_hashkey(en->addr); - - write_lock(&tbl->lock); list_add(&en->list, &tbl->bucket[hash]); atomic_inc(&tbl->entries); - write_unlock(&tbl->lock); - - return 1; } /* - * Get ip_vs_lblcr_entry associated with supplied parameters. + * Get ip_vs_lblcr_entry associated with supplied parameters. Called under + * read lock. */ static inline struct ip_vs_lblcr_entry * ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { - unsigned hash; + unsigned hash = ip_vs_lblcr_hashkey(addr); struct ip_vs_lblcr_entry *en; - hash = ip_vs_lblcr_hashkey(addr); + list_for_each_entry(en, &tbl->bucket[hash], list) + if (en->addr == addr) + return en; - read_lock(&tbl->lock); + return NULL; +} - list_for_each_entry(en, &tbl->bucket[hash], list) { - if (en->addr == addr) { - /* HIT */ - read_unlock(&tbl->lock); - return en; + +/* + * Create or update an ip_vs_lblcr_entry, which is a mapping of a destination + * IP address to a server. Called under write lock. + */ +static inline struct ip_vs_lblcr_entry * +ip_vs_lblcr_new(struct ip_vs_lblcr_table *tbl, __be32 daddr, + struct ip_vs_dest *dest) +{ + struct ip_vs_lblcr_entry *en; + + en = ip_vs_lblcr_get(tbl, daddr); + if (!en) { + en = kmalloc(sizeof(*en), GFP_ATOMIC); + if (!en) { + IP_VS_ERR("ip_vs_lblcr_new(): no memory\n"); + return NULL; } + + en->addr = daddr; + en->lastuse = jiffies; + + /* initilize its dest set */ + atomic_set(&(en->set.size), 0); + en->set.list = NULL; + rwlock_init(&en->set.lock); + + ip_vs_lblcr_hash(tbl, en); } - read_unlock(&tbl->lock); + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); - return NULL; + return en; } @@ -418,19 +394,18 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl) int i; struct ip_vs_lblcr_entry *en, *nxt; + /* No locking required, only called during cleanup. */ for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { - write_lock(&tbl->lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[i], list) { ip_vs_lblcr_free(en); - atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); } } -static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) +static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) { + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int i, j; struct ip_vs_lblcr_entry *en, *nxt; @@ -438,7 +413,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration, now)) @@ -447,7 +422,7 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) ip_vs_lblcr_free(en); atomic_dec(&tbl->entries); } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); } tbl->rover = j; } @@ -466,17 +441,16 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_lblcr_table *tbl) */ static void ip_vs_lblcr_check_expire(unsigned long data) { - struct ip_vs_lblcr_table *tbl; + struct ip_vs_service *svc = (struct ip_vs_service *) data; + struct ip_vs_lblcr_table *tbl = svc->sched_data; unsigned long now = jiffies; int goal; int i, j; struct ip_vs_lblcr_entry *en, *nxt; - tbl = (struct ip_vs_lblcr_table *)data; - if ((tbl->counter % COUNT_FOR_FULL_EXPIRATION) == 0) { /* do full expiration check */ - ip_vs_lblcr_full_check(tbl); + ip_vs_lblcr_full_check(svc); tbl->counter = 1; goto out; } @@ -493,7 +467,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) { j = (j + 1) & IP_VS_LBLCR_TAB_MASK; - write_lock(&tbl->lock); + write_lock(&svc->sched_lock); list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) { if (time_before(now, en->lastuse+ENTRY_TIMEOUT)) continue; @@ -502,7 +476,7 @@ static void ip_vs_lblcr_check_expire(unsigned long data) atomic_dec(&tbl->entries); goal--; } - write_unlock(&tbl->lock); + write_unlock(&svc->sched_lock); if (goal <= 0) break; } @@ -520,15 +494,14 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) /* * Allocate the ip_vs_lblcr_table for this service */ - tbl = kmalloc(sizeof(struct ip_vs_lblcr_table), GFP_ATOMIC); + tbl = kmalloc(sizeof(*tbl), GFP_ATOMIC); if (tbl == NULL) { IP_VS_ERR("ip_vs_lblcr_init_svc(): no memory\n"); return -ENOMEM; } svc->sched_data = tbl; IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) allocated for " - "current service\n", - sizeof(struct ip_vs_lblcr_table)); + "current service\n", sizeof(*tbl)); /* * Initialize the hash buckets @@ -536,7 +509,6 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) { INIT_LIST_HEAD(&tbl->bucket[i]); } - rwlock_init(&tbl->lock); tbl->max_size = IP_VS_LBLCR_TAB_SIZE*16; tbl->rover = 0; tbl->counter = 1; @@ -545,9 +517,8 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) * Hook periodic timer for garbage collection */ setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, - (unsigned long)tbl); - tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL; - add_timer(&tbl->periodic_timer); + (unsigned long)svc); + mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; } @@ -564,22 +535,16 @@ static int ip_vs_lblcr_done_svc(struct ip_vs_service *svc) ip_vs_lblcr_flush(tbl); /* release the table itself */ - kfree(svc->sched_data); + kfree(tbl); IP_VS_DBG(6, "LBLCR hash table (memory=%Zdbytes) released\n", - sizeof(struct ip_vs_lblcr_table)); + sizeof(*tbl)); return 0; } -static int ip_vs_lblcr_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline struct ip_vs_dest * -__ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) +__ip_vs_lblcr_schedule(struct ip_vs_service *svc, struct iphdr *iph) { struct ip_vs_dest *dest, *least; int loh, doh; @@ -633,7 +598,7 @@ __ip_vs_wlc_schedule(struct ip_vs_service *svc, struct iphdr *iph) IP_VS_DBG(6, "LBLCR: server %d.%d.%d.%d:%d " "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), + NIPQUAD(least->addr.ip), ntohs(least->port), atomic_read(&least->activeconns), atomic_read(&least->refcnt), atomic_read(&least->weight), loh); @@ -669,51 +634,79 @@ is_overloaded(struct ip_vs_dest *dest, struct ip_vs_service *svc) static struct ip_vs_dest * ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) { - struct ip_vs_dest *dest; - struct ip_vs_lblcr_table *tbl; - struct ip_vs_lblcr_entry *en; + struct ip_vs_lblcr_table *tbl = svc->sched_data; struct iphdr *iph = ip_hdr(skb); + struct ip_vs_dest *dest = NULL; + struct ip_vs_lblcr_entry *en; IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n"); - tbl = (struct ip_vs_lblcr_table *)svc->sched_data; + /* First look in our cache */ + read_lock(&svc->sched_lock); en = ip_vs_lblcr_get(tbl, iph->daddr); - if (en == NULL) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - en = ip_vs_lblcr_new(iph->daddr); - if (en == NULL) { - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - ip_vs_lblcr_hash(tbl, en); - } else { + if (en) { + /* We only hold a read lock, but this is atomic */ + en->lastuse = jiffies; + + /* Get the least loaded destination */ + read_lock(&en->set.lock); dest = ip_vs_dest_set_min(&en->set); - if (!dest || is_overloaded(dest, svc)) { - dest = __ip_vs_wlc_schedule(svc, iph); - if (dest == NULL) { - IP_VS_DBG(1, "no destination available\n"); - return NULL; - } - ip_vs_dest_set_insert(&en->set, dest); - } + read_unlock(&en->set.lock); + + /* More than one destination + enough time passed by, cleanup */ if (atomic_read(&en->set.size) > 1 && - jiffies-en->set.lastmod > sysctl_ip_vs_lblcr_expiration) { + time_after(jiffies, en->set.lastmod + + sysctl_ip_vs_lblcr_expiration)) { struct ip_vs_dest *m; + + write_lock(&en->set.lock); m = ip_vs_dest_set_max(&en->set); if (m) ip_vs_dest_set_erase(&en->set, m); + write_unlock(&en->set.lock); } + + /* If the destination is not overloaded, use it */ + if (dest && !is_overloaded(dest, svc)) { + read_unlock(&svc->sched_lock); + goto out; + } + + /* The cache entry is invalid, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + read_unlock(&svc->sched_lock); + return NULL; + } + + /* Update our cache entry */ + write_lock(&en->set.lock); + ip_vs_dest_set_insert(&en->set, dest); + write_unlock(&en->set.lock); + } + read_unlock(&svc->sched_lock); + + if (dest) + goto out; + + /* No cache entry, time to schedule */ + dest = __ip_vs_lblcr_schedule(svc, iph); + if (!dest) { + IP_VS_DBG(1, "no destination available\n"); + return NULL; } - en->lastuse = jiffies; + /* If we fail to create a cache entry, we'll just use the valid dest */ + write_lock(&svc->sched_lock); + ip_vs_lblcr_new(tbl, iph->daddr, dest); + write_unlock(&svc->sched_lock); + +out: IP_VS_DBG(6, "LBLCR: destination IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", - NIPQUAD(en->addr), - NIPQUAD(dest->addr), + NIPQUAD(iph->daddr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -729,9 +722,11 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_lblcr_init_svc, .done_service = ip_vs_lblcr_done_svc, - .update_service = ip_vs_lblcr_update_svc, .schedule = ip_vs_lblcr_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_lc.c b/net/netfilter/ipvs/ip_vs_lc.c index ebcdbf75ac65..b69f808ac461 100644 --- a/net/ipv4/ipvs/ip_vs_lc.c +++ b/net/netfilter/ipvs/ip_vs_lc.c @@ -20,24 +20,6 @@ #include <net/ip_vs.h> -static int ip_vs_lc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int ip_vs_lc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_lc_dest_overhead(struct ip_vs_dest *dest) { @@ -85,10 +67,10 @@ ip_vs_lc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } if (least) - IP_VS_DBG(6, "LC: server %u.%u.%u.%u:%u activeconns %d inactconns %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->inactconns)); + IP_VS_DBG_BUF(6, "LC: server %s:%u activeconns %d inactconns %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->inactconns)); return least; } @@ -99,9 +81,9 @@ static struct ip_vs_scheduler ip_vs_lc_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list), - .init_service = ip_vs_lc_init_svc, - .done_service = ip_vs_lc_done_svc, - .update_service = ip_vs_lc_update_svc, +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_lc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_nq.c b/net/netfilter/ipvs/ip_vs_nq.c index 92f3a6770031..9a2d8033f08f 100644 --- a/net/ipv4/ipvs/ip_vs_nq.c +++ b/net/netfilter/ipvs/ip_vs_nq.c @@ -37,27 +37,6 @@ #include <net/ip_vs.h> -static int -ip_vs_nq_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_nq_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_nq_dest_overhead(struct ip_vs_dest *dest) { @@ -120,12 +99,12 @@ ip_vs_nq_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) return NULL; out: - IP_VS_DBG(6, "NQ: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "NQ: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -137,9 +116,9 @@ static struct ip_vs_scheduler ip_vs_nq_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list), - .init_service = ip_vs_nq_init_svc, - .done_service = ip_vs_nq_done_svc, - .update_service = ip_vs_nq_update_svc, +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_nq_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 6099a88fc200..0791f9e08feb 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -151,11 +151,11 @@ const char * ip_vs_state_name(__u16 proto, int state) } -void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, - const struct sk_buff *skb, - int offset, - const char *msg) +static void +ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) { char buf[128]; struct iphdr _iph, *ih; @@ -189,6 +189,61 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); } +#ifdef CONFIG_IP_VS_IPV6 +static void +ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) +{ + char buf[192]; + struct ipv6hdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else if (ih->nexthdr == IPPROTO_FRAGMENT) + sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT " frag", + pp->name, NIP6(ih->saddr), + NIP6(ih->daddr)); + else { + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, offset + sizeof(struct ipv6hdr), + sizeof(_ports), _ports); + if (pptr == NULL) + sprintf(buf, "%s TRUNCATED " NIP6_FMT "->" NIP6_FMT, + pp->name, + NIP6(ih->saddr), + NIP6(ih->daddr)); + else + sprintf(buf, "%s " NIP6_FMT ":%u->" NIP6_FMT ":%u", + pp->name, + NIP6(ih->saddr), + ntohs(pptr[0]), + NIP6(ih->daddr), + ntohs(pptr[1])); + } + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} +#endif + + +void +ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, + const struct sk_buff *skb, + int offset, + const char *msg) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (skb->protocol == htons(ETH_P_IPV6)) + ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); + else +#endif + ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); +} + int __init ip_vs_protocol_init(void) { diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c new file mode 100644 index 000000000000..80ab0c8e5b4a --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -0,0 +1,235 @@ +/* + * ip_vs_proto_ah_esp.c: AH/ESP IPSec load balancing support for IPVS + * + * Authors: Julian Anastasov <ja@ssi.bg>, February 2002 + * Wensong Zhang <wensong@linuxvirtualserver.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation; + * + */ + +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> + +#include <net/ip_vs.h> + + +/* TODO: + +struct isakmp_hdr { + __u8 icookie[8]; + __u8 rcookie[8]; + __u8 np; + __u8 version; + __u8 xchgtype; + __u8 flags; + __u32 msgid; + __u32 length; +}; + +*/ + +#define PORT_ISAKMP 500 + + +static struct ip_vs_conn * +ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) +{ + struct ip_vs_conn *cp; + + if (likely(!inverse)) { + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->saddr, + htons(PORT_ISAKMP), + &iph->daddr, + htons(PORT_ISAKMP)); + } else { + cp = ip_vs_conn_in_get(af, IPPROTO_UDP, + &iph->daddr, + htons(PORT_ISAKMP), + &iph->saddr, + htons(PORT_ISAKMP)); + } + + if (!cp) { + /* + * We are not sure if the packet is from our + * service, so our conn_schedule hook should return NF_ACCEPT + */ + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); + } + + return cp; +} + + +static struct ip_vs_conn * +ah_esp_conn_out_get(int af, const struct sk_buff *skb, + struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, + int inverse) +{ + struct ip_vs_conn *cp; + + if (likely(!inverse)) { + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->saddr, + htons(PORT_ISAKMP), + &iph->daddr, + htons(PORT_ISAKMP)); + } else { + cp = ip_vs_conn_out_get(af, IPPROTO_UDP, + &iph->daddr, + htons(PORT_ISAKMP), + &iph->saddr, + htons(PORT_ISAKMP)); + } + + if (!cp) { + IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " + "%s%s %s->%s\n", + inverse ? "ICMP+" : "", + pp->name, + IP_VS_DBG_ADDR(af, &iph->saddr), + IP_VS_DBG_ADDR(af, &iph->daddr)); + } + + return cp; +} + + +static int +ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, + int *verdict, struct ip_vs_conn **cpp) +{ + /* + * AH/ESP is only related traffic. Pass the packet to IP stack. + */ + *verdict = NF_ACCEPT; + return 0; +} + + +static void +ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ + char buf[256]; + struct iphdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else + sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u", + pp->name, NIPQUAD(ih->saddr), + NIPQUAD(ih->daddr)); + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} + +#ifdef CONFIG_IP_VS_IPV6 +static void +ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ + char buf[256]; + struct ipv6hdr _iph, *ih; + + ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); + if (ih == NULL) + sprintf(buf, "%s TRUNCATED", pp->name); + else + sprintf(buf, "%s " NIP6_FMT "->" NIP6_FMT, + pp->name, NIP6(ih->saddr), + NIP6(ih->daddr)); + + printk(KERN_DEBUG "IPVS: %s: %s\n", msg, buf); +} +#endif + +static void +ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, + int offset, const char *msg) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (skb->protocol == htons(ETH_P_IPV6)) + ah_esp_debug_packet_v6(pp, skb, offset, msg); + else +#endif + ah_esp_debug_packet_v4(pp, skb, offset, msg); +} + + +static void ah_esp_init(struct ip_vs_protocol *pp) +{ + /* nothing to do now */ +} + + +static void ah_esp_exit(struct ip_vs_protocol *pp) +{ + /* nothing to do now */ +} + + +#ifdef CONFIG_IP_VS_PROTO_AH +struct ip_vs_protocol ip_vs_protocol_ah = { + .name = "AH", + .protocol = IPPROTO_AH, + .num_states = 1, + .dont_defrag = 1, + .init = ah_esp_init, + .exit = ah_esp_exit, + .conn_schedule = ah_esp_conn_schedule, + .conn_in_get = ah_esp_conn_in_get, + .conn_out_get = ah_esp_conn_out_get, + .snat_handler = NULL, + .dnat_handler = NULL, + .csum_check = NULL, + .state_transition = NULL, + .register_app = NULL, + .unregister_app = NULL, + .app_conn_bind = NULL, + .debug_packet = ah_esp_debug_packet, + .timeout_change = NULL, /* ISAKMP */ + .set_state_timeout = NULL, +}; +#endif + +#ifdef CONFIG_IP_VS_PROTO_ESP +struct ip_vs_protocol ip_vs_protocol_esp = { + .name = "ESP", + .protocol = IPPROTO_ESP, + .num_states = 1, + .dont_defrag = 1, + .init = ah_esp_init, + .exit = ah_esp_exit, + .conn_schedule = ah_esp_conn_schedule, + .conn_in_get = ah_esp_conn_in_get, + .conn_out_get = ah_esp_conn_out_get, + .snat_handler = NULL, + .dnat_handler = NULL, + .csum_check = NULL, + .state_transition = NULL, + .register_app = NULL, + .unregister_app = NULL, + .app_conn_bind = NULL, + .debug_packet = ah_esp_debug_packet, + .timeout_change = NULL, /* ISAKMP */ +}; +#endif diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index d0ea467986a0..dd4566ea2bff 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -18,6 +18,7 @@ #include <linux/tcp.h> /* for tcphdr */ #include <net/ip.h> #include <net/tcp.h> /* for csum_tcpudp_magic */ +#include <net/ip6_checksum.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -25,8 +26,9 @@ static struct ip_vs_conn * -tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -35,19 +37,20 @@ tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + return ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } static struct ip_vs_conn * -tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +tcp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { __be16 _ports[2], *pptr; @@ -56,34 +59,36 @@ tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - return ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - return ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + return ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } } static int -tcp_conn_schedule(struct sk_buff *skb, - struct ip_vs_protocol *pp, +tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct tcphdr _tcph, *th; + struct ip_vs_iphdr iph; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); + + th = skb_header_pointer(skb, iph.len, sizeof(_tcph), &_tcph); if (th == NULL) { *verdict = NF_DROP; return 0; } if (th->syn && - (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, th->dest))) { + (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, + th->dest))) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -110,22 +115,62 @@ tcp_conn_schedule(struct sk_buff *skb, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, +tcp_fast_csum_update(int af, struct tcphdr *tcph, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcph->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(tcph->check)))); + else +#endif tcph->check = - csum_fold(ip_vs_check_diff4(oldip, newip, + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, ip_vs_check_diff2(oldport, newport, ~csum_unfold(tcph->check)))); } +static inline void +tcp_partial_csum_update(int af, struct tcphdr *tcph, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, + __be16 oldlen, __be16 newlen) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcph->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(tcph->check)))); + else +#endif + tcph->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(tcph->check)))); +} + + static int tcp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); + oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -133,7 +178,7 @@ tcp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ @@ -141,13 +186,17 @@ tcp_snat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->source = cp->vport; /* Adjust TCP checksums */ - if (!cp->app) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - tcphoff)); + } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, + tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -155,9 +204,20 @@ tcp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); + IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, (char*)&(tcph->check) - (char*)tcph); @@ -171,7 +231,16 @@ tcp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct tcphdr *tcph; - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); + oldlen = skb->len - tcphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, tcphoff+sizeof(*tcph))) @@ -179,7 +248,7 @@ tcp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -190,15 +259,19 @@ tcp_dnat_handler(struct sk_buff *skb, return 0; } - tcph = (void *)ip_hdr(skb) + tcphoff; + tcph = (void *)skb_network_header(skb) + tcphoff; tcph->dest = cp->dport; /* * Adjust TCP checksums */ - if (!cp->app) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - tcphoff)); + } else if (!cp->app) { /* Only port and addr are changed, do fast csum update */ - tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, + tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -206,9 +279,19 @@ tcp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ tcph->check = 0; skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - tcphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + tcph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - tcphoff, + cp->protocol, skb->csum); + else +#endif + tcph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - tcphoff, + cp->protocol, + skb->csum); skb->ip_summed = CHECKSUM_UNNECESSARY; } return 1; @@ -216,21 +299,43 @@ tcp_dnat_handler(struct sk_buff *skb, static int -tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { - const unsigned int tcphoff = ip_hdrlen(skb); + unsigned int tcphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + tcphoff = sizeof(struct ipv6hdr); + else +#endif + tcphoff = ip_hdrlen(skb); switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr, - skb->len - tcphoff, - ip_hdr(skb)->protocol, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - tcphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - tcphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ @@ -419,19 +524,23 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp, if (new_state != cp->state) { struct ip_vs_dest *dest = cp->dest; - IP_VS_DBG(8, "%s %s [%c%c%c%c] %u.%u.%u.%u:%d->" - "%u.%u.%u.%u:%d state: %s->%s conn->refcnt:%d\n", - pp->name, - (state_off==TCP_DIR_OUTPUT)?"output ":"input ", - th->syn? 'S' : '.', - th->fin? 'F' : '.', - th->ack? 'A' : '.', - th->rst? 'R' : '.', - NIPQUAD(cp->daddr), ntohs(cp->dport), - NIPQUAD(cp->caddr), ntohs(cp->cport), - tcp_state_name(cp->state), - tcp_state_name(new_state), - atomic_read(&cp->refcnt)); + IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->" + "%s:%d state: %s->%s conn->refcnt:%d\n", + pp->name, + ((state_off == TCP_DIR_OUTPUT) ? + "output " : "input "), + th->syn ? 'S' : '.', + th->fin ? 'F' : '.', + th->ack ? 'A' : '.', + th->rst ? 'R' : '.', + IP_VS_DBG_ADDR(cp->af, &cp->daddr), + ntohs(cp->dport), + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + tcp_state_name(cp->state), + tcp_state_name(new_state), + atomic_read(&cp->refcnt)); + if (dest) { if (!(cp->flags & IP_VS_CONN_F_INACTIVE) && (new_state != IP_VS_TCP_S_ESTABLISHED)) { @@ -461,7 +570,13 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction, { struct tcphdr _tcph, *th; - th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); +#ifdef CONFIG_IP_VS_IPV6 + int ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr); +#else + int ihl = ip_hdrlen(skb); +#endif + + th = skb_header_pointer(skb, ihl, sizeof(_tcph), &_tcph); if (th == NULL) return 0; @@ -546,12 +661,15 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&tcp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index c6be5d56823f..6eb6039d6343 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -22,10 +22,12 @@ #include <net/ip_vs.h> #include <net/ip.h> +#include <net/ip6_checksum.h> static struct ip_vs_conn * -udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; @@ -35,13 +37,13 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_in_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_in_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + cp = ip_vs_conn_in_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; @@ -49,25 +51,25 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static struct ip_vs_conn * -udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, - const struct iphdr *iph, unsigned int proto_off, int inverse) +udp_conn_out_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, + const struct ip_vs_iphdr *iph, unsigned int proto_off, + int inverse) { struct ip_vs_conn *cp; __be16 _ports[2], *pptr; - pptr = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) return NULL; if (likely(!inverse)) { - cp = ip_vs_conn_out_get(iph->protocol, - iph->saddr, pptr[0], - iph->daddr, pptr[1]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->saddr, pptr[0], + &iph->daddr, pptr[1]); } else { - cp = ip_vs_conn_out_get(iph->protocol, - iph->daddr, pptr[1], - iph->saddr, pptr[0]); + cp = ip_vs_conn_out_get(af, iph->protocol, + &iph->daddr, pptr[1], + &iph->saddr, pptr[0]); } return cp; @@ -75,21 +77,24 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, static int -udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, +udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, int *verdict, struct ip_vs_conn **cpp) { struct ip_vs_service *svc; struct udphdr _udph, *uh; + struct ip_vs_iphdr iph; + + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); - uh = skb_header_pointer(skb, ip_hdrlen(skb), - sizeof(_udph), &_udph); + uh = skb_header_pointer(skb, iph.len, sizeof(_udph), &_udph); if (uh == NULL) { *verdict = NF_DROP; return 0; } - if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, - ip_hdr(skb)->daddr, uh->dest))) { + svc = ip_vs_service_get(af, skb->mark, iph.protocol, + &iph.daddr, uh->dest); + if (svc) { if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -116,23 +121,63 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, +udp_fast_csum_update(int af, struct udphdr *uhdr, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, __be16 oldport, __be16 newport) { - uhdr->check = - csum_fold(ip_vs_check_diff4(oldip, newip, - ip_vs_check_diff2(oldport, newport, - ~csum_unfold(uhdr->check)))); +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + uhdr->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); + else +#endif + uhdr->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldport, newport, + ~csum_unfold(uhdr->check)))); if (!uhdr->check) uhdr->check = CSUM_MANGLED_0; } +static inline void +udp_partial_csum_update(int af, struct udphdr *uhdr, + const union nf_inet_addr *oldip, + const union nf_inet_addr *newip, + __be16 oldlen, __be16 newlen) +{ +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + uhdr->check = + csum_fold(ip_vs_check_diff16(oldip->ip6, newip->ip6, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(uhdr->check)))); + else +#endif + uhdr->check = + csum_fold(ip_vs_check_diff4(oldip->ip, newip->ip, + ip_vs_check_diff2(oldlen, newlen, + ~csum_unfold(uhdr->check)))); +} + + static int udp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); + oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -140,7 +185,7 @@ udp_snat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -150,15 +195,19 @@ udp_snat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->source = cp->vport; /* * Adjust UDP checksums */ - if (!cp->app && (udph->check != 0)) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - udphoff)); + } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->daddr, cp->vaddr, + udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -166,9 +215,19 @@ udp_snat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->vaddr.in6, + &cp->caddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->vaddr.ip, + cp->caddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", @@ -184,7 +243,16 @@ udp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp) { struct udphdr *udph; - unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + int oldlen; + +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); + oldlen = skb->len - udphoff; /* csum_check requires unshared skb */ if (!skb_make_writable(skb, udphoff+sizeof(*udph))) @@ -192,7 +260,7 @@ udp_dnat_handler(struct sk_buff *skb, if (unlikely(cp->app != NULL)) { /* Some checks before mangling */ - if (pp->csum_check && !pp->csum_check(skb, pp)) + if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* @@ -203,15 +271,19 @@ udp_dnat_handler(struct sk_buff *skb, return 0; } - udph = (void *)ip_hdr(skb) + udphoff; + udph = (void *)skb_network_header(skb) + udphoff; udph->dest = cp->dport; /* * Adjust UDP checksums */ - if (!cp->app && (udph->check != 0)) { + if (skb->ip_summed == CHECKSUM_PARTIAL) { + udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, + htonl(oldlen), + htonl(skb->len - udphoff)); + } else if (!cp->app && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ - udp_fast_csum_update(udph, cp->vaddr, cp->daddr, + udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; @@ -219,9 +291,19 @@ udp_dnat_handler(struct sk_buff *skb, /* full checksum calculation */ udph->check = 0; skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, - skb->len - udphoff, - cp->protocol, skb->csum); +#ifdef CONFIG_IP_VS_IPV6 + if (cp->af == AF_INET6) + udph->check = csum_ipv6_magic(&cp->caddr.in6, + &cp->daddr.in6, + skb->len - udphoff, + cp->protocol, skb->csum); + else +#endif + udph->check = csum_tcpudp_magic(cp->caddr.ip, + cp->daddr.ip, + skb->len - udphoff, + cp->protocol, + skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -231,10 +313,17 @@ udp_dnat_handler(struct sk_buff *skb, static int -udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) +udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) { struct udphdr _udph, *uh; - const unsigned int udphoff = ip_hdrlen(skb); + unsigned int udphoff; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + udphoff = sizeof(struct ipv6hdr); + else +#endif + udphoff = ip_hdrlen(skb); uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); if (uh == NULL) @@ -246,15 +335,28 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); case CHECKSUM_COMPLETE: - if (csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, - skb->len - udphoff, - ip_hdr(skb)->protocol, - skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, - "Failed checksum for"); - return 0; - } +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) { + if (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + skb->len - udphoff, + ipv6_hdr(skb)->nexthdr, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } + } else +#endif + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + skb->len - udphoff, + ip_hdr(skb)->protocol, + skb->csum)) { + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "Failed checksum for"); + return 0; + } break; default: /* No need to checksum. */ @@ -340,12 +442,15 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) break; spin_unlock(&udp_app_lock); - IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" - "%u.%u.%u.%u:%u to app %s on port %u\n", - __func__, - NIPQUAD(cp->caddr), ntohs(cp->cport), - NIPQUAD(cp->vaddr), ntohs(cp->vport), - inc->name, ntohs(inc->port)); + IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" + "%s:%u to app %s on port %u\n", + __func__, + IP_VS_DBG_ADDR(cp->af, &cp->caddr), + ntohs(cp->cport), + IP_VS_DBG_ADDR(cp->af, &cp->vaddr), + ntohs(cp->vport), + inc->name, ntohs(inc->port)); + cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); diff --git a/net/ipv4/ipvs/ip_vs_rr.c b/net/netfilter/ipvs/ip_vs_rr.c index 358110d17e59..a22195f68ac4 100644 --- a/net/ipv4/ipvs/ip_vs_rr.c +++ b/net/netfilter/ipvs/ip_vs_rr.c @@ -32,12 +32,6 @@ static int ip_vs_rr_init_svc(struct ip_vs_service *svc) } -static int ip_vs_rr_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static int ip_vs_rr_update_svc(struct ip_vs_service *svc) { svc->sched_data = &svc->destinations; @@ -80,11 +74,11 @@ ip_vs_rr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) out: svc->sched_data = q; write_unlock(&svc->sched_lock); - IP_VS_DBG(6, "RR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "RR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), atomic_read(&dest->weight)); return dest; } @@ -95,8 +89,10 @@ static struct ip_vs_scheduler ip_vs_rr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_rr_init_svc, - .done_service = ip_vs_rr_done_svc, .update_service = ip_vs_rr_update_svc, .schedule = ip_vs_rr_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c index a46ad9e35016..a46ad9e35016 100644 --- a/net/ipv4/ipvs/ip_vs_sched.c +++ b/net/netfilter/ipvs/ip_vs_sched.c diff --git a/net/ipv4/ipvs/ip_vs_sed.c b/net/netfilter/ipvs/ip_vs_sed.c index 77663d84cbd1..7d2f22f04b83 100644 --- a/net/ipv4/ipvs/ip_vs_sed.c +++ b/net/netfilter/ipvs/ip_vs_sed.c @@ -41,27 +41,6 @@ #include <net/ip_vs.h> -static int -ip_vs_sed_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_sed_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_sed_dest_overhead(struct ip_vs_dest *dest) { @@ -122,12 +101,12 @@ ip_vs_sed_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "SED: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "SED: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -139,9 +118,9 @@ static struct ip_vs_scheduler ip_vs_sed_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list), - .init_service = ip_vs_sed_init_svc, - .done_service = ip_vs_sed_done_svc, - .update_service = ip_vs_sed_update_svc, +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_sed_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 7b979e228056..1d96de27fefd 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -215,7 +215,7 @@ ip_vs_sh_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) IP_VS_DBG(6, "SH: source IP address %u.%u.%u.%u " "--> server %u.%u.%u.%u:%d\n", NIPQUAD(iph->saddr), - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), ntohs(dest->port)); return dest; @@ -231,6 +231,9 @@ static struct ip_vs_scheduler ip_vs_sh_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 0, +#endif .init_service = ip_vs_sh_init_svc, .done_service = ip_vs_sh_done_svc, .update_service = ip_vs_sh_update_svc, diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index a652da2c3200..de5e7e118eed 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -30,6 +30,7 @@ #include <linux/err.h> #include <linux/kthread.h> #include <linux/wait.h> +#include <linux/kernel.h> #include <net/ip.h> #include <net/sock.h> @@ -99,6 +100,7 @@ struct ip_vs_sync_thread_data { */ #define SYNC_MESG_HEADER_LEN 4 +#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */ struct ip_vs_sync_mesg { __u8 nr_conns; @@ -256,9 +258,9 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) s->cport = cp->cport; s->vport = cp->vport; s->dport = cp->dport; - s->caddr = cp->caddr; - s->vaddr = cp->vaddr; - s->daddr = cp->daddr; + s->caddr = cp->caddr.ip; + s->vaddr = cp->vaddr.ip; + s->daddr = cp->daddr.ip; s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); s->state = htons(cp->state); if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { @@ -366,21 +368,28 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_conn_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); else - cp = ip_vs_ct_in_get(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport); + cp = ip_vs_ct_in_get(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport); if (!cp) { /* * Find the appropriate destination for the connection. * If it is not found the connection will remain unbound * but still handled. */ - dest = ip_vs_find_dest(s->daddr, s->dport, - s->vaddr, s->vport, + dest = ip_vs_find_dest(AF_INET, + (union nf_inet_addr *)&s->daddr, + s->dport, + (union nf_inet_addr *)&s->vaddr, + s->vport, s->protocol); /* Set the approprite ativity flag */ if (s->protocol == IPPROTO_TCP) { @@ -389,10 +398,13 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) else flags &= ~IP_VS_CONN_F_INACTIVE; } - cp = ip_vs_conn_new(s->protocol, - s->caddr, s->cport, - s->vaddr, s->vport, - s->daddr, s->dport, + cp = ip_vs_conn_new(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport, + (union nf_inet_addr *)&s->daddr, + s->dport, flags, dest); if (dest) atomic_dec(&dest->refcnt); @@ -506,8 +518,8 @@ static int set_sync_mesg_maxlen(int sync_state) num = (dev->mtu - sizeof(struct iphdr) - sizeof(struct udphdr) - SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; - sync_send_mesg_maxlen = - SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num; + sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN + + SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF); IP_VS_DBG(7, "setting the maximum length of sync sending " "message %d.\n", sync_send_mesg_maxlen); } else if (sync_state == IP_VS_STATE_BACKUP) { diff --git a/net/ipv4/ipvs/ip_vs_wlc.c b/net/netfilter/ipvs/ip_vs_wlc.c index 9b0ef86bb1f7..8c596e712599 100644 --- a/net/ipv4/ipvs/ip_vs_wlc.c +++ b/net/netfilter/ipvs/ip_vs_wlc.c @@ -25,27 +25,6 @@ #include <net/ip_vs.h> -static int -ip_vs_wlc_init_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_done_svc(struct ip_vs_service *svc) -{ - return 0; -} - - -static int -ip_vs_wlc_update_svc(struct ip_vs_service *svc) -{ - return 0; -} - - static inline unsigned int ip_vs_wlc_dest_overhead(struct ip_vs_dest *dest) { @@ -110,12 +89,12 @@ ip_vs_wlc_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WLC: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d overhead %d\n", - NIPQUAD(least->addr), ntohs(least->port), - atomic_read(&least->activeconns), - atomic_read(&least->refcnt), - atomic_read(&least->weight), loh); + IP_VS_DBG_BUF(6, "WLC: server %s:%u " + "activeconns %d refcnt %d weight %d overhead %d\n", + IP_VS_DBG_ADDR(svc->af, &least->addr), ntohs(least->port), + atomic_read(&least->activeconns), + atomic_read(&least->refcnt), + atomic_read(&least->weight), loh); return least; } @@ -127,9 +106,9 @@ static struct ip_vs_scheduler ip_vs_wlc_scheduler = .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list), - .init_service = ip_vs_wlc_init_svc, - .done_service = ip_vs_wlc_done_svc, - .update_service = ip_vs_wlc_update_svc, +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .schedule = ip_vs_wlc_schedule, }; diff --git a/net/ipv4/ipvs/ip_vs_wrr.c b/net/netfilter/ipvs/ip_vs_wrr.c index 0d86a79b87b5..7ea92fed50bf 100644 --- a/net/ipv4/ipvs/ip_vs_wrr.c +++ b/net/netfilter/ipvs/ip_vs_wrr.c @@ -195,12 +195,12 @@ ip_vs_wrr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) } } - IP_VS_DBG(6, "WRR: server %u.%u.%u.%u:%u " - "activeconns %d refcnt %d weight %d\n", - NIPQUAD(dest->addr), ntohs(dest->port), - atomic_read(&dest->activeconns), - atomic_read(&dest->refcnt), - atomic_read(&dest->weight)); + IP_VS_DBG_BUF(6, "WRR: server %s:%u " + "activeconns %d refcnt %d weight %d\n", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port), + atomic_read(&dest->activeconns), + atomic_read(&dest->refcnt), + atomic_read(&dest->weight)); out: write_unlock(&svc->sched_lock); @@ -213,6 +213,9 @@ static struct ip_vs_scheduler ip_vs_wrr_scheduler = { .refcnt = ATOMIC_INIT(0), .module = THIS_MODULE, .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list), +#ifdef CONFIG_IP_VS_IPV6 + .supports_ipv6 = 1, +#endif .init_service = ip_vs_wrr_init_svc, .done_service = ip_vs_wrr_done_svc, .update_service = ip_vs_wrr_update_svc, diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 9892d4aca42e..e90d52f199bc 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -20,6 +20,9 @@ #include <net/udp.h> #include <net/icmp.h> /* for icmp_send */ #include <net/route.h> /* for ip_route_output */ +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <linux/icmpv6.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> @@ -47,7 +50,8 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) if (!dst) return NULL; - if ((dst->obsolete || rtos != dest->dst_rtos) && + if ((dst->obsolete + || (dest->af == AF_INET && rtos != dest->dst_rtos)) && dst->ops->check(dst, cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); @@ -71,7 +75,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = dest->addr, + .daddr = dest->addr.ip, .saddr = 0, .tos = rtos, } }, }; @@ -80,12 +84,12 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, " "dest: %u.%u.%u.%u\n", - NIPQUAD(dest->addr)); + NIPQUAD(dest->addr.ip)); return NULL; } __ip_vs_dst_set(dest, rtos, dst_clone(&rt->u.dst)); IP_VS_DBG(10, "new dst %u.%u.%u.%u, refcnt=%d, rtos=%X\n", - NIPQUAD(dest->addr), + NIPQUAD(dest->addr.ip), atomic_read(&rt->u.dst.__refcnt), rtos); } spin_unlock(&dest->dst_lock); @@ -94,14 +98,14 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = cp->daddr, + .daddr = cp->daddr.ip, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(&init_net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: " - "%u.%u.%u.%u\n", NIPQUAD(cp->daddr)); + "%u.%u.%u.%u\n", NIPQUAD(cp->daddr.ip)); return NULL; } } @@ -109,6 +113,70 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) return rt; } +#ifdef CONFIG_IP_VS_IPV6 +static struct rt6_info * +__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct ip_vs_dest *dest = cp->dest; + + if (dest) { + spin_lock(&dest->dst_lock); + rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); + if (!rt) { + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = dest->addr.in6, + .saddr = { + .s6_addr32 = + { 0, 0, 0, 0 }, + }, + }, + }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, + NULL, &fl); + if (!rt) { + spin_unlock(&dest->dst_lock); + IP_VS_DBG_RL("ip6_route_output error, " + "dest: " NIP6_FMT "\n", + NIP6(dest->addr.in6)); + return NULL; + } + __ip_vs_dst_set(dest, 0, dst_clone(&rt->u.dst)); + IP_VS_DBG(10, "new dst " NIP6_FMT ", refcnt=%d\n", + NIP6(dest->addr.in6), + atomic_read(&rt->u.dst.__refcnt)); + } + spin_unlock(&dest->dst_lock); + } else { + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = cp->daddr.in6, + .saddr = { + .s6_addr32 = { 0, 0, 0, 0 }, + }, + }, + }, + }; + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (!rt) { + IP_VS_DBG_RL("ip6_route_output error, dest: " + NIP6_FMT "\n", NIP6(cp->daddr.in6)); + return NULL; + } + } + + return rt; +} +#endif + /* * Release dest->dst_cache before a dest is removed @@ -123,11 +191,11 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) dst_release(old_dst); } -#define IP_VS_XMIT(skb, rt) \ +#define IP_VS_XMIT(pf, skb, rt) \ do { \ (skb)->ipvs_property = 1; \ skb_forward_csum(skb); \ - NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \ + NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ } while (0) @@ -200,7 +268,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -213,6 +281,70 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct ipv6hdr *iph = ipv6_hdr(skb); + int mtu; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, + }; + + EnterFunction(10); + + rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); + if (!rt) { + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): ip6_route_output error, " + "dest: " NIP6_FMT "\n", NIP6(iph->daddr)); + goto tx_error_icmp; + } + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_bypass_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + + tx_error_icmp: + dst_link_failure(skb); + tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif /* * NAT transmitter (only for outside-to-inside nat forwarding) @@ -264,7 +396,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; - ip_hdr(skb)->daddr = cp->daddr; + ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); @@ -276,7 +408,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -292,6 +424,83 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + /* check if it is a connection of no-client-port */ + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + __be16 _pt, *p; + p = skb_header_pointer(skb, sizeof(struct ipv6hdr), + sizeof(_pt), &_pt); + if (p == NULL) + goto tx_error; + ip_vs_conn_fill_cport(cp, *p); + IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL_PKT(0, pp, skb, 0, + "ip_vs_nat_xmit_v6(): frag needed for"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, sizeof(struct ipv6hdr))) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* mangle the packet */ + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) + goto tx_error; + ipv6_hdr(skb)->daddr = cp->daddr.in6; + + IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + + /* FIXME: when application helper enlarges the packet and the length + is larger than the MTU of outgoing device, there will be still + MTU problem. */ + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + LeaveFunction(10); + kfree_skb(skb); + return NF_STOLEN; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif + /* * IP Tunneling transmitter @@ -423,6 +632,113 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *old_iph = ipv6_hdr(skb); + sk_buff_data_t old_transport_header = skb->transport_header; + struct ipv6hdr *iph; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int mtu; + + EnterFunction(10); + + if (skb->protocol != htons(ETH_P_IPV6)) { + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): protocol error, " + "ETH_P_IPV6: %d, skb protocol: %d\n", + htons(ETH_P_IPV6), skb->protocol); + goto tx_error; + } + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + tdev = rt->u.dst.dev; + + mtu = dst_mtu(&rt->u.dst) - sizeof(struct ipv6hdr); + /* TODO IPv6: do we need this check in IPv6? */ + if (mtu < 1280) { + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): mtu less than 1280\n"); + goto tx_error; + } + if (skb->dst) + skb->dst->ops->update_pmtu(skb->dst, mtu); + + if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_tunnel_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Okay, now see if we can stuff it in the buffer as-is. + */ + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct ipv6hdr); + + if (skb_headroom(skb) < max_headroom + || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = + skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + dst_release(&rt->u.dst); + kfree_skb(skb); + IP_VS_ERR_RL("ip_vs_tunnel_xmit_v6(): no memory\n"); + return NF_STOLEN; + } + kfree_skb(skb); + skb = new_skb; + old_iph = ipv6_hdr(skb); + } + + skb->transport_header = old_transport_header; + + skb_push(skb, sizeof(struct ipv6hdr)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* + * Push down and install the IPIP header. + */ + iph = ipv6_hdr(skb); + iph->version = 6; + iph->nexthdr = IPPROTO_IPV6; + iph->payload_len = old_iph->payload_len; + be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); + iph->priority = old_iph->priority; + memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); + iph->daddr = rt->rt6i_dst.addr; + iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ + iph->hop_limit = old_iph->hop_limit; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + ip6_local_out(skb); + + LeaveFunction(10); + + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * Direct Routing transmitter @@ -467,7 +783,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); LeaveFunction(10); return NF_STOLEN; @@ -480,6 +796,60 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, return NF_STOLEN; } +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + + EnterFunction(10); + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + dst_release(&rt->u.dst); + IP_VS_DBG_RL("ip_vs_dr_xmit_v6(): frag needed\n"); + goto tx_error; + } + + /* + * Call ip_send_check because we are not sure it is called + * after ip_defrag. Is copy-on-write needed? + */ + skb = skb_share_check(skb, GFP_ATOMIC); + if (unlikely(skb == NULL)) { + dst_release(&rt->u.dst); + return NF_STOLEN; + } + + /* drop old route */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + LeaveFunction(10); + return NF_STOLEN; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + kfree_skb(skb); + LeaveFunction(10); + return NF_STOLEN; +} +#endif + /* * ICMP packet transmitter @@ -540,7 +910,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(skb, rt); + IP_VS_XMIT(PF_INET, skb, rt); rc = NF_STOLEN; goto out; @@ -557,3 +927,79 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_rt_put(rt); goto tx_error; } + +#ifdef CONFIG_IP_VS_IPV6 +int +ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct ip_vs_protocol *pp, int offset) +{ + struct rt6_info *rt; /* Route to the other host */ + int mtu; + int rc; + + EnterFunction(10); + + /* The ICMP packet for VS/TUN, VS/DR and LOCALNODE will be + forwarded directly here, because there is no need to + translate address/port back */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + if (cp->packet_xmit) + rc = cp->packet_xmit(skb, cp, pp); + else + rc = NF_ACCEPT; + /* do not touch skb anymore */ + atomic_inc(&cp->in_pkts); + goto out; + } + + /* + * mangle and send the packet here (only for VS/NAT) + */ + + rt = __ip_vs_get_out_rt_v6(cp); + if (!rt) + goto tx_error_icmp; + + /* MTU checking */ + mtu = dst_mtu(&rt->u.dst); + if (skb->len > mtu) { + dst_release(&rt->u.dst); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n"); + goto tx_error; + } + + /* copy-on-write the packet before mangling it */ + if (!skb_make_writable(skb, offset)) + goto tx_error_put; + + if (skb_cow(skb, rt->u.dst.dev->hard_header_len)) + goto tx_error_put; + + /* drop the old route when skb is not shared */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + ip_vs_nat_icmp_v6(skb, pp, cp, 0); + + /* Another hack: avoid icmp_send in ip_fragment */ + skb->local_df = 1; + + IP_VS_XMIT(PF_INET6, skb, rt); + + rc = NF_STOLEN; + goto out; + +tx_error_icmp: + dst_link_failure(skb); +tx_error: + dev_kfree_skb(skb); + rc = NF_STOLEN; +out: + LeaveFunction(10); + return rc; +tx_error_put: + dst_release(&rt->u.dst); + goto tx_error; +} +#endif diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 59bd8b903a19..b92df5c1dfcf 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -22,19 +22,17 @@ #define NF_CT_ACCT_DEFAULT 0 #endif -int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; -EXPORT_SYMBOL_GPL(nf_ct_acct); +static int nf_ct_acct __read_mostly = NF_CT_ACCT_DEFAULT; module_param_named(acct, nf_ct_acct, bool, 0644); MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); #ifdef CONFIG_SYSCTL -static struct ctl_table_header *acct_sysctl_header; static struct ctl_table acct_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_acct", - .data = &nf_ct_acct, + .data = &init_net.ct.sysctl_acct, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -64,41 +62,87 @@ static struct nf_ct_ext_type acct_extend __read_mostly = { .id = NF_CT_EXT_ACCT, }; -int nf_conntrack_acct_init(void) +#ifdef CONFIG_SYSCTL +static int nf_conntrack_acct_init_sysctl(struct net *net) { - int ret; + struct ctl_table *table; -#ifdef CONFIG_NF_CT_ACCT - printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Plase use\n"); - printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); - printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); -#endif + table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out; + + table[0].data = &net->ct.sysctl_acct; - ret = nf_ct_extend_register(&acct_extend); - if (ret < 0) { - printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); - return ret; + net->ct.acct_sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.acct_sysctl_header) { + printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); + goto out_register; } + return 0; -#ifdef CONFIG_SYSCTL - acct_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, - acct_sysctl_table); +out_register: + kfree(table); +out: + return -ENOMEM; +} - if (!acct_sysctl_header) { - nf_ct_extend_unregister(&acct_extend); +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ + struct ctl_table *table; - printk(KERN_ERR "nf_conntrack_acct: can't register to sysctl.\n"); - return -ENOMEM; - } + table = net->ct.acct_sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.acct_sysctl_header); + kfree(table); +} +#else +static int nf_conntrack_acct_init_sysctl(struct net *net) +{ + return 0; +} + +static void nf_conntrack_acct_fini_sysctl(struct net *net) +{ +} +#endif + +int nf_conntrack_acct_init(struct net *net) +{ + int ret; + + net->ct.sysctl_acct = nf_ct_acct; + + if (net_eq(net, &init_net)) { +#ifdef CONFIG_NF_CT_ACCT + printk(KERN_WARNING "CONFIG_NF_CT_ACCT is deprecated and will be removed soon. Please use\n"); + printk(KERN_WARNING "nf_conntrack.acct=1 kernel paramater, acct=1 nf_conntrack module option or\n"); + printk(KERN_WARNING "sysctl net.netfilter.nf_conntrack_acct=1 to enable it.\n"); #endif + ret = nf_ct_extend_register(&acct_extend); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_acct: Unable to register extension\n"); + goto out_extend_register; + } + } + + ret = nf_conntrack_acct_init_sysctl(net); + if (ret < 0) + goto out_sysctl; + return 0; + +out_sysctl: + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); +out_extend_register: + return ret; } -void nf_conntrack_acct_fini(void) +void nf_conntrack_acct_fini(struct net *net) { -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(acct_sysctl_header); -#endif - nf_ct_extend_unregister(&acct_extend); + nf_conntrack_acct_fini_sysctl(net); + if (net_eq(net, &init_net)) + nf_ct_extend_unregister(&acct_extend); } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9d1830da8e84..622d7c671cb7 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -38,36 +38,30 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_extend.h> #include <net/netfilter/nf_conntrack_acct.h> +#include <net/netfilter/nf_nat.h> #define NF_CONNTRACK_VERSION "0.5.0" +unsigned int +(*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) __read_mostly; +EXPORT_SYMBOL_GPL(nfnetlink_parse_nat_setup_hook); + DEFINE_SPINLOCK(nf_conntrack_lock); EXPORT_SYMBOL_GPL(nf_conntrack_lock); -/* nf_conntrack_standalone needs this */ -atomic_t nf_conntrack_count = ATOMIC_INIT(0); -EXPORT_SYMBOL_GPL(nf_conntrack_count); - unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); -struct hlist_head *nf_conntrack_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_hash); - struct nf_conn nf_conntrack_untracked __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_untracked); -unsigned int nf_ct_log_invalid __read_mostly; -HLIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; static struct kmem_cache *nf_conntrack_cachep __read_mostly; -DEFINE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); -EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); - static int nf_conntrack_hash_rnd_initted; static unsigned int nf_conntrack_hash_rnd; @@ -180,6 +174,7 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; + struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); @@ -212,7 +207,7 @@ destroy_conntrack(struct nf_conntrack *nfct) hlist_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode); } - NF_CT_STAT_INC(delete); + NF_CT_STAT_INC(net, delete); spin_unlock_bh(&nf_conntrack_lock); if (ct->master) @@ -225,6 +220,7 @@ destroy_conntrack(struct nf_conntrack *nfct) static void death_by_timeout(unsigned long ul_conntrack) { struct nf_conn *ct = (void *)ul_conntrack; + struct net *net = nf_ct_net(ct); struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_helper *helper; @@ -239,14 +235,14 @@ static void death_by_timeout(unsigned long ul_conntrack) spin_lock_bh(&nf_conntrack_lock); /* Inside lock so preempt is disabled on module removal path. * Otherwise we can get spurious warnings. */ - NF_CT_STAT_INC(delete_list); + NF_CT_STAT_INC(net, delete_list); clean_from_lists(ct); spin_unlock_bh(&nf_conntrack_lock); nf_ct_put(ct); } struct nf_conntrack_tuple_hash * -__nf_conntrack_find(const struct nf_conntrack_tuple *tuple) +__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct hlist_node *n; @@ -256,13 +252,13 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple) * at least once for the stats anyway. */ local_bh_disable(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); local_bh_enable(); return h; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } local_bh_enable(); @@ -272,13 +268,13 @@ EXPORT_SYMBOL_GPL(__nf_conntrack_find); /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * -nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple) +nf_conntrack_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; rcu_read_lock(); - h = __nf_conntrack_find(tuple); + h = __nf_conntrack_find(net, tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) @@ -294,10 +290,12 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int hash, unsigned int repl_hash) { + struct net *net = nf_ct_net(ct); + hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, - &nf_conntrack_hash[hash]); + &net->ct.hash[hash]); hlist_add_head_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnode, - &nf_conntrack_hash[repl_hash]); + &net->ct.hash[repl_hash]); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -323,8 +321,10 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct nf_conn_help *help; struct hlist_node *n; enum ip_conntrack_info ctinfo; + struct net *net; ct = nf_ct_get(skb, &ctinfo); + net = nf_ct_net(ct); /* ipt_REJECT uses nf_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -351,11 +351,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_for_each_entry(h, n, &nf_conntrack_hash[hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, &h->tuple)) goto out; - hlist_for_each_entry(h, n, &nf_conntrack_hash[repl_hash], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[repl_hash], hnode) if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, &h->tuple)) goto out; @@ -371,22 +371,22 @@ __nf_conntrack_confirm(struct sk_buff *skb) add_timer(&ct->timeout); atomic_inc(&ct->ct_general.use); set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); + NF_CT_STAT_INC(net, insert); spin_unlock_bh(&nf_conntrack_lock); help = nfct_help(ct); if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, skb); + nf_conntrack_event_cache(IPCT_HELPER, ct); #ifdef CONFIG_NF_NAT_NEEDED if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, skb); + nf_conntrack_event_cache(IPCT_NATINFO, ct); #endif nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, skb); + IPCT_RELATED : IPCT_NEW, ct); return NF_ACCEPT; out: - NF_CT_STAT_INC(insert_failed); + NF_CT_STAT_INC(net, insert_failed); spin_unlock_bh(&nf_conntrack_lock); return NF_DROP; } @@ -398,6 +398,7 @@ int nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) { + struct net *net = nf_ct_net(ignored_conntrack); struct nf_conntrack_tuple_hash *h; struct hlist_node *n; unsigned int hash = hash_conntrack(tuple); @@ -406,14 +407,14 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, * least once for the stats anyway. */ rcu_read_lock_bh(); - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], hnode) { + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && nf_ct_tuple_equal(tuple, &h->tuple)) { - NF_CT_STAT_INC(found); + NF_CT_STAT_INC(net, found); rcu_read_unlock_bh(); return 1; } - NF_CT_STAT_INC(searched); + NF_CT_STAT_INC(net, searched); } rcu_read_unlock_bh(); @@ -425,7 +426,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken); /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ -static noinline int early_drop(unsigned int hash) +static noinline int early_drop(struct net *net, unsigned int hash) { /* Use oldest entry, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; @@ -436,7 +437,7 @@ static noinline int early_drop(unsigned int hash) rcu_read_lock(); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash], + hlist_for_each_entry_rcu(h, n, &net->ct.hash[hash], hnode) { tmp = nf_ct_tuplehash_to_ctrack(h); if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) @@ -458,13 +459,14 @@ static noinline int early_drop(unsigned int hash) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - NF_CT_STAT_INC_ATOMIC(early_drop); + NF_CT_STAT_INC_ATOMIC(net, early_drop); } nf_ct_put(ct); return dropped; } -struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, +struct nf_conn *nf_conntrack_alloc(struct net *net, + const struct nf_conntrack_tuple *orig, const struct nf_conntrack_tuple *repl, gfp_t gfp) { @@ -476,13 +478,13 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, } /* We don't want any race condition at early drop stage */ - atomic_inc(&nf_conntrack_count); + atomic_inc(&net->ct.count); if (nf_conntrack_max && - unlikely(atomic_read(&nf_conntrack_count) > nf_conntrack_max)) { + unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) { unsigned int hash = hash_conntrack(orig); - if (!early_drop(hash)) { - atomic_dec(&nf_conntrack_count); + if (!early_drop(net, hash)) { + atomic_dec(&net->ct.count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -494,7 +496,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ct = kmem_cache_zalloc(nf_conntrack_cachep, gfp); if (ct == NULL) { pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n"); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); return ERR_PTR(-ENOMEM); } @@ -503,6 +505,9 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; /* Don't set timer yet: wait for confirmation */ setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); +#ifdef CONFIG_NET_NS + ct->ct_net = net; +#endif INIT_RCU_HEAD(&ct->rcu); return ct; @@ -512,10 +517,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); + struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&nf_conntrack_count); + atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) @@ -528,7 +534,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); /* Allocate a new conntrack: we return -ENOMEM if classification failed due to stress. Otherwise it really is unclassifiable. */ static struct nf_conntrack_tuple_hash * -init_conntrack(const struct nf_conntrack_tuple *tuple, +init_conntrack(struct net *net, + const struct nf_conntrack_tuple *tuple, struct nf_conntrack_l3proto *l3proto, struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, @@ -544,7 +551,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, return NULL; } - ct = nf_conntrack_alloc(tuple, &repl_tuple, GFP_ATOMIC); + ct = nf_conntrack_alloc(net, tuple, &repl_tuple, GFP_ATOMIC); if (ct == NULL || IS_ERR(ct)) { pr_debug("Can't allocate conntrack.\n"); return (struct nf_conntrack_tuple_hash *)ct; @@ -559,7 +566,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, nf_ct_acct_ext_add(ct, GFP_ATOMIC); spin_lock_bh(&nf_conntrack_lock); - exp = nf_ct_find_expectation(tuple); + exp = nf_ct_find_expectation(net, tuple); if (exp) { pr_debug("conntrack: expectation arrives ct=%p exp=%p\n", ct, exp); @@ -579,7 +586,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, ct->secmark = exp->master->secmark; #endif nf_conntrack_get(&ct->master->ct_general); - NF_CT_STAT_INC(expect_new); + NF_CT_STAT_INC(net, expect_new); } else { struct nf_conntrack_helper *helper; @@ -589,11 +596,12 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, if (help) rcu_assign_pointer(help->helper, helper); } - NF_CT_STAT_INC(new); + NF_CT_STAT_INC(net, new); } /* Overload tuple linked list to put us in unconfirmed list. */ - hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, &unconfirmed); + hlist_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnode, + &net->ct.unconfirmed); spin_unlock_bh(&nf_conntrack_lock); @@ -608,7 +616,8 @@ init_conntrack(const struct nf_conntrack_tuple *tuple, /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ static inline struct nf_conn * -resolve_normal_ct(struct sk_buff *skb, +resolve_normal_ct(struct net *net, + struct sk_buff *skb, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, @@ -629,9 +638,9 @@ resolve_normal_ct(struct sk_buff *skb, } /* look for tuple match */ - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(net, &tuple); if (!h) { - h = init_conntrack(&tuple, l3proto, l4proto, skb, dataoff); + h = init_conntrack(net, &tuple, l3proto, l4proto, skb, dataoff); if (!h) return NULL; if (IS_ERR(h)) @@ -665,7 +674,8 @@ resolve_normal_ct(struct sk_buff *skb, } unsigned int -nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) +nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, + struct sk_buff *skb) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -678,44 +688,46 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) /* Previously seen (loopback or untracked)? Ignore. */ if (skb->nfct) { - NF_CT_STAT_INC_ATOMIC(ignore); + NF_CT_STAT_INC_ATOMIC(net, ignore); return NF_ACCEPT; } /* rcu_read_lock()ed by nf_hook_slow */ - l3proto = __nf_ct_l3proto_find((u_int16_t)pf); + l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); if (ret <= 0) { pr_debug("not prepared to track yet or error occured\n"); - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } - l4proto = __nf_ct_l4proto_find((u_int16_t)pf, protonum); + l4proto = __nf_ct_l4proto_find(pf, protonum); /* It may be an special packet, error, unclean... * inverse of the return code tells to the netfilter * core what to do with the packet. */ - if (l4proto->error != NULL && - (ret = l4proto->error(skb, dataoff, &ctinfo, pf, hooknum)) <= 0) { - NF_CT_STAT_INC_ATOMIC(error); - NF_CT_STAT_INC_ATOMIC(invalid); - return -ret; + if (l4proto->error != NULL) { + ret = l4proto->error(net, skb, dataoff, &ctinfo, pf, hooknum); + if (ret <= 0) { + NF_CT_STAT_INC_ATOMIC(net, error); + NF_CT_STAT_INC_ATOMIC(net, invalid); + return -ret; + } } - ct = resolve_normal_ct(skb, dataoff, pf, protonum, l3proto, l4proto, - &set_reply, &ctinfo); + ct = resolve_normal_ct(net, skb, dataoff, pf, protonum, + l3proto, l4proto, &set_reply, &ctinfo); if (!ct) { /* Not valid part of a connection */ - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - NF_CT_STAT_INC_ATOMIC(drop); + NF_CT_STAT_INC_ATOMIC(net, drop); return NF_DROP; } @@ -728,12 +740,12 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff *skb) pr_debug("nf_conntrack_in: Can't track with proto module\n"); nf_conntrack_put(skb->nfct); skb->nfct = NULL; - NF_CT_STAT_INC_ATOMIC(invalid); + NF_CT_STAT_INC_ATOMIC(net, invalid); return -ret; } if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); return ret; } @@ -846,7 +858,7 @@ acct: /* must be unlocked when calling event cache */ if (event) - nf_conntrack_event_cache(event, skb); + nf_conntrack_event_cache(event, ct); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); @@ -938,7 +950,7 @@ static void nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) /* Bring out ya dead! */ static struct nf_conn * -get_next_corpse(int (*iter)(struct nf_conn *i, void *data), +get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { struct nf_conntrack_tuple_hash *h; @@ -947,13 +959,13 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), spin_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnode) { + hlist_for_each_entry(h, n, &net->ct.hash[*bucket], hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; } } - hlist_for_each_entry(h, n, &unconfirmed, hnode) { + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) { ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) set_bit(IPS_DYING_BIT, &ct->status); @@ -966,13 +978,14 @@ found: return ct; } -void -nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +void nf_ct_iterate_cleanup(struct net *net, + int (*iter)(struct nf_conn *i, void *data), + void *data) { struct nf_conn *ct; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); @@ -998,27 +1011,26 @@ void nf_ct_free_hashtable(struct hlist_head *hash, int vmalloced, unsigned int s } EXPORT_SYMBOL_GPL(nf_ct_free_hashtable); -void nf_conntrack_flush(void) +void nf_conntrack_flush(struct net *net) { - nf_ct_iterate_cleanup(kill_all, NULL); + nf_ct_iterate_cleanup(net, kill_all, NULL); } EXPORT_SYMBOL_GPL(nf_conntrack_flush); -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void nf_conntrack_cleanup(void) +static void nf_conntrack_cleanup_init_net(void) { - rcu_assign_pointer(ip_ct_attach, NULL); - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); + nf_conntrack_helper_fini(); + nf_conntrack_proto_fini(); + kmem_cache_destroy(nf_conntrack_cachep); +} - nf_ct_event_cache_flush(); +static void nf_conntrack_cleanup_net(struct net *net) +{ + nf_ct_event_cache_flush(net); + nf_conntrack_ecache_fini(net); i_see_dead_people: - nf_conntrack_flush(); - if (atomic_read(&nf_conntrack_count) != 0) { + nf_conntrack_flush(net); + if (atomic_read(&net->ct.count) != 0) { schedule(); goto i_see_dead_people; } @@ -1026,16 +1038,31 @@ void nf_conntrack_cleanup(void) while (atomic_read(&nf_conntrack_untracked.ct_general.use) > 1) schedule(); - rcu_assign_pointer(nf_ct_destroy, NULL); - - kmem_cache_destroy(nf_conntrack_cachep); - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); + nf_conntrack_acct_fini(net); + nf_conntrack_expect_fini(net); + free_percpu(net->ct.stat); +} - nf_conntrack_acct_fini(); - nf_conntrack_expect_fini(); - nf_conntrack_helper_fini(); - nf_conntrack_proto_fini(); +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void nf_conntrack_cleanup(struct net *net) +{ + if (net_eq(net, &init_net)) + rcu_assign_pointer(ip_ct_attach, NULL); + + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + nf_conntrack_cleanup_net(net); + + if (net_eq(net, &init_net)) { + rcu_assign_pointer(nf_ct_destroy, NULL); + nf_conntrack_cleanup_init_net(); + } } struct hlist_head *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced) @@ -1094,8 +1121,8 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) */ spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_conntrack_htable_size; i++) { - while (!hlist_empty(&nf_conntrack_hash[i])) { - h = hlist_entry(nf_conntrack_hash[i].first, + while (!hlist_empty(&init_net.ct.hash[i])) { + h = hlist_entry(init_net.ct.hash[i].first, struct nf_conntrack_tuple_hash, hnode); hlist_del_rcu(&h->hnode); bucket = __hash_conntrack(&h->tuple, hashsize, rnd); @@ -1103,12 +1130,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) } } old_size = nf_conntrack_htable_size; - old_vmalloced = nf_conntrack_vmalloc; - old_hash = nf_conntrack_hash; + old_vmalloced = init_net.ct.hash_vmalloc; + old_hash = init_net.ct.hash; nf_conntrack_htable_size = hashsize; - nf_conntrack_vmalloc = vmalloced; - nf_conntrack_hash = hash; + init_net.ct.hash_vmalloc = vmalloced; + init_net.ct.hash = hash; nf_conntrack_hash_rnd = rnd; spin_unlock_bh(&nf_conntrack_lock); @@ -1120,7 +1147,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize); module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, &nf_conntrack_htable_size, 0600); -int __init nf_conntrack_init(void) +static int nf_conntrack_init_init_net(void) { int max_factor = 8; int ret; @@ -1142,13 +1169,6 @@ int __init nf_conntrack_init(void) * entries. */ max_factor = 4; } - nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, - &nf_conntrack_vmalloc); - if (!nf_conntrack_hash) { - printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); - goto err_out; - } - nf_conntrack_max = max_factor * nf_conntrack_htable_size; printk("nf_conntrack version %s (%u buckets, %d max)\n", @@ -1160,48 +1180,103 @@ int __init nf_conntrack_init(void) 0, 0, NULL); if (!nf_conntrack_cachep) { printk(KERN_ERR "Unable to create nf_conn slab cache\n"); - goto err_free_hash; + ret = -ENOMEM; + goto err_cache; } ret = nf_conntrack_proto_init(); if (ret < 0) - goto err_free_conntrack_slab; - - ret = nf_conntrack_expect_init(); - if (ret < 0) - goto out_fini_proto; + goto err_proto; ret = nf_conntrack_helper_init(); if (ret < 0) - goto out_fini_expect; + goto err_helper; - ret = nf_conntrack_acct_init(); - if (ret < 0) - goto out_fini_helper; + return 0; + +err_helper: + nf_conntrack_proto_fini(); +err_proto: + kmem_cache_destroy(nf_conntrack_cachep); +err_cache: + return ret; +} - /* For use by REJECT target */ - rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); - rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); +static int nf_conntrack_init_net(struct net *net) +{ + int ret; + + atomic_set(&net->ct.count, 0); + INIT_HLIST_HEAD(&net->ct.unconfirmed); + net->ct.stat = alloc_percpu(struct ip_conntrack_stat); + if (!net->ct.stat) { + ret = -ENOMEM; + goto err_stat; + } + ret = nf_conntrack_ecache_init(net); + if (ret < 0) + goto err_ecache; + net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, + &net->ct.hash_vmalloc); + if (!net->ct.hash) { + ret = -ENOMEM; + printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); + goto err_hash; + } + ret = nf_conntrack_expect_init(net); + if (ret < 0) + goto err_expect; + ret = nf_conntrack_acct_init(net); + if (ret < 0) + goto err_acct; /* Set up fake conntrack: - to never be deleted, not in any hashes */ +#ifdef CONFIG_NET_NS + nf_conntrack_untracked.ct_net = &init_net; +#endif atomic_set(&nf_conntrack_untracked.ct_general.use, 1); /* - and look it like as a confirmed connection */ set_bit(IPS_CONFIRMED_BIT, &nf_conntrack_untracked.status); - return ret; + return 0; -out_fini_helper: - nf_conntrack_helper_fini(); -out_fini_expect: - nf_conntrack_expect_fini(); -out_fini_proto: - nf_conntrack_proto_fini(); -err_free_conntrack_slab: - kmem_cache_destroy(nf_conntrack_cachep); -err_free_hash: - nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_vmalloc, +err_acct: + nf_conntrack_expect_fini(net); +err_expect: + nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc, nf_conntrack_htable_size); -err_out: - return -ENOMEM; +err_hash: + nf_conntrack_ecache_fini(net); +err_ecache: + free_percpu(net->ct.stat); +err_stat: + return ret; +} + +int nf_conntrack_init(struct net *net) +{ + int ret; + + if (net_eq(net, &init_net)) { + ret = nf_conntrack_init_init_net(); + if (ret < 0) + goto out_init_net; + } + ret = nf_conntrack_init_net(net); + if (ret < 0) + goto out_net; + + if (net_eq(net, &init_net)) { + /* For use by REJECT target */ + rcu_assign_pointer(ip_ct_attach, nf_conntrack_attach); + rcu_assign_pointer(nf_ct_destroy, destroy_conntrack); + } + return 0; + +out_net: + if (net_eq(net, &init_net)) + nf_conntrack_cleanup_init_net(); +out_init_net: + return ret; } diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 83c41ac3505b..a5f5e2e65d13 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -29,9 +29,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_chain); ATOMIC_NOTIFIER_HEAD(nf_ct_expect_chain); EXPORT_SYMBOL_GPL(nf_ct_expect_chain); -DEFINE_PER_CPU(struct nf_conntrack_ecache, nf_conntrack_ecache); -EXPORT_PER_CPU_SYMBOL_GPL(nf_conntrack_ecache); - /* deliver cached events and clear cache entry - must be called with locally * disabled softirqs */ static inline void @@ -51,10 +48,11 @@ __nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache) * by code prior to async packet handling for freeing the skb */ void nf_ct_deliver_cached_events(const struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; local_bh_disable(); - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); if (ecache->ct == ct) __nf_ct_deliver_cached_events(ecache); local_bh_enable(); @@ -64,10 +62,11 @@ EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); /* Deliver cached events for old pending events, if current conntrack != old */ void __nf_ct_event_cache_init(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_ecache *ecache; /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(nf_conntrack_ecache); + ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id()); BUG_ON(ecache->ct == ct); if (ecache->ct) __nf_ct_deliver_cached_events(ecache); @@ -79,18 +78,31 @@ EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init); /* flush the event cache - touches other CPU's data and must not be called * while packets are still passing through the code */ -void nf_ct_event_cache_flush(void) +void nf_ct_event_cache_flush(struct net *net) { struct nf_conntrack_ecache *ecache; int cpu; for_each_possible_cpu(cpu) { - ecache = &per_cpu(nf_conntrack_ecache, cpu); + ecache = per_cpu_ptr(net->ct.ecache, cpu); if (ecache->ct) nf_ct_put(ecache->ct); } } +int nf_conntrack_ecache_init(struct net *net) +{ + net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache); + if (!net->ct.ecache) + return -ENOMEM; + return 0; +} + +void nf_conntrack_ecache_fini(struct net *net) +{ + free_percpu(net->ct.ecache); +} + int nf_conntrack_register_notifier(struct notifier_block *nb) { return atomic_notifier_chain_register(&nf_conntrack_chain, nb); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index e8f0dead267f..37a703bc3b8e 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -28,17 +28,12 @@ #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_tuple.h> -struct hlist_head *nf_ct_expect_hash __read_mostly; -EXPORT_SYMBOL_GPL(nf_ct_expect_hash); - unsigned int nf_ct_expect_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_expect_hsize); static unsigned int nf_ct_expect_hash_rnd __read_mostly; -static unsigned int nf_ct_expect_count; unsigned int nf_ct_expect_max __read_mostly; static int nf_ct_expect_hash_rnd_initted __read_mostly; -static int nf_ct_expect_vmalloc; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; @@ -46,18 +41,19 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); - nf_ct_expect_count--; + net->ct.expect_count--; hlist_del(&exp->lnode); master_help->expecting[exp->class]--; nf_ct_expect_put(exp); - NF_CT_STAT_INC(expect_delete); + NF_CT_STAT_INC(net, expect_delete); } EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); @@ -87,17 +83,17 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple } struct nf_conntrack_expect * -__nf_ct_expect_find(const struct nf_conntrack_tuple *tuple) +__nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) { if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) return i; } @@ -107,12 +103,12 @@ EXPORT_SYMBOL_GPL(__nf_ct_expect_find); /* Just find a expectation corresponding to a tuple. */ struct nf_conntrack_expect * -nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple) +nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i; rcu_read_lock(); - i = __nf_ct_expect_find(tuple); + i = __nf_ct_expect_find(net, tuple); if (i && !atomic_inc_not_zero(&i->use)) i = NULL; rcu_read_unlock(); @@ -124,17 +120,17 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get); /* If an expectation for this connection is found, it gets delete from * global list then returned. */ struct nf_conntrack_expect * -nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple) +nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple) { struct nf_conntrack_expect *i, *exp = NULL; struct hlist_node *n; unsigned int h; - if (!nf_ct_expect_count) + if (!net->ct.expect_count) return NULL; h = nf_ct_expect_dst_hash(tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (!(i->flags & NF_CT_EXPECT_INACTIVE) && nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { exp = i; @@ -241,7 +237,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me) EXPORT_SYMBOL_GPL(nf_ct_expect_alloc); void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class, - int family, + u_int8_t family, const union nf_inet_addr *saddr, const union nf_inet_addr *daddr, u_int8_t proto, const __be16 *src, const __be16 *dst) @@ -311,6 +307,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_put); static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct net *net = nf_ct_exp_net(exp); const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); @@ -319,8 +316,8 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) hlist_add_head(&exp->lnode, &master_help->expectations); master_help->expecting[exp->class]++; - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - nf_ct_expect_count++; + hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); + net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); @@ -329,7 +326,7 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) add_timer(&exp->timeout); atomic_inc(&exp->use); - NF_CT_STAT_INC(expect_create); + NF_CT_STAT_INC(net, expect_create); } /* Race with expectations being used means we could have none to find; OK. */ @@ -371,6 +368,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret; @@ -383,7 +381,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) goto out; } h = nf_ct_expect_dst_hash(&expect->tuple); - hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) { + hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) { if (expect_matches(i, expect)) { /* Refresh timer: if it's dying, ignore.. */ if (refresh_timer(i)) { @@ -406,7 +404,7 @@ int nf_ct_expect_related(struct nf_conntrack_expect *expect) } } - if (nf_ct_expect_count >= nf_ct_expect_max) { + if (net->ct.expect_count >= nf_ct_expect_max) { if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: expectation table full\n"); @@ -425,16 +423,18 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_related); #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_expect_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + n = rcu_dereference(net->ct.expect_hash[st->bucket].first); if (n) return n; } @@ -444,13 +444,14 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq) static struct hlist_node *ct_expect_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_expect_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_ct_expect_hsize) return NULL; - head = rcu_dereference(nf_ct_expect_hash[st->bucket].first); + head = rcu_dereference(net->ct.expect_hash[st->bucket].first); } return head; } @@ -524,7 +525,7 @@ static const struct seq_operations exp_seq_ops = { static int exp_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &exp_seq_ops, + return seq_open_net(inode, file, &exp_seq_ops, sizeof(struct ct_expect_iter_state)); } @@ -533,72 +534,79 @@ static const struct file_operations exp_file_ops = { .open = exp_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; #endif /* CONFIG_PROC_FS */ -static int __init exp_proc_init(void) +static int exp_proc_init(struct net *net) { #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc; - proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops); + proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops); if (!proc) return -ENOMEM; #endif /* CONFIG_PROC_FS */ return 0; } -static void exp_proc_remove(void) +static void exp_proc_remove(struct net *net) { #ifdef CONFIG_PROC_FS - proc_net_remove(&init_net, "nf_conntrack_expect"); + proc_net_remove(net, "nf_conntrack_expect"); #endif /* CONFIG_PROC_FS */ } module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600); -int __init nf_conntrack_expect_init(void) +int nf_conntrack_expect_init(struct net *net) { int err = -ENOMEM; - if (!nf_ct_expect_hsize) { - nf_ct_expect_hsize = nf_conntrack_htable_size / 256; - if (!nf_ct_expect_hsize) - nf_ct_expect_hsize = 1; + if (net_eq(net, &init_net)) { + if (!nf_ct_expect_hsize) { + nf_ct_expect_hsize = nf_conntrack_htable_size / 256; + if (!nf_ct_expect_hsize) + nf_ct_expect_hsize = 1; + } + nf_ct_expect_max = nf_ct_expect_hsize * 4; } - nf_ct_expect_max = nf_ct_expect_hsize * 4; - nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, - &nf_ct_expect_vmalloc); - if (nf_ct_expect_hash == NULL) + net->ct.expect_count = 0; + net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, + &net->ct.expect_vmalloc); + if (net->ct.expect_hash == NULL) goto err1; - nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", + if (net_eq(net, &init_net)) { + nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect", sizeof(struct nf_conntrack_expect), 0, 0, NULL); - if (!nf_ct_expect_cachep) - goto err2; + if (!nf_ct_expect_cachep) + goto err2; + } - err = exp_proc_init(); + err = exp_proc_init(net); if (err < 0) goto err3; return 0; err3: - kmem_cache_destroy(nf_ct_expect_cachep); + if (net_eq(net, &init_net)) + kmem_cache_destroy(nf_ct_expect_cachep); err2: - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); err1: return err; } -void nf_conntrack_expect_fini(void) +void nf_conntrack_expect_fini(struct net *net) { - exp_proc_remove(); - kmem_cache_destroy(nf_ct_expect_cachep); - nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc, + exp_proc_remove(net); + if (net_eq(net, &init_net)) + kmem_cache_destroy(nf_ct_expect_cachep); + nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index bb20672fe036..4f7107107e99 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -318,7 +318,8 @@ static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) } /* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, +static void update_nl_seq(struct nf_conn *ct, u32 nl_seq, + struct nf_ct_ftp_master *info, int dir, struct sk_buff *skb) { unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; @@ -336,11 +337,11 @@ static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir, if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } else if (oldest != NUM_SEQ_TO_REMEMBER && after(nl_seq, info->seq_aft_nl[dir][oldest])) { info->seq_aft_nl[dir][oldest] = nl_seq; - nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, ct); } } @@ -509,7 +510,7 @@ out_update_nl: /* Now if this ends in \n, update ftp info. Seq may have been * adjusted by NAT code. */ if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info, dir, skb); + update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: spin_unlock_bh(&nf_ftp_lock); return ret; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 2f83c158934d..c1504f71cdff 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -709,7 +709,8 @@ static int expect_h245(struct sk_buff *skb, struct nf_conn *ct, /* If the calling party is on the same side of the forward-to party, * we don't need to track the second call */ static int callforward_do_filter(const union nf_inet_addr *src, - const union nf_inet_addr *dst, int family) + const union nf_inet_addr *dst, + u_int8_t family) { const struct nf_afinfo *afinfo; struct flowi fl1, fl2; @@ -1209,6 +1210,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, union nf_inet_addr *addr, __be16 port) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; @@ -1218,7 +1220,7 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct, tuple.dst.u.tcp.port = port; tuple.dst.protonum = IPPROTO_TCP; - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(net, &tuple); if (exp && exp->master == ct) return exp; return NULL; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 8e0b4c8f62a8..c39b6a994133 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -21,6 +21,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/rculist.h> +#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -123,29 +124,18 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me, + struct net *net) { struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; const struct hlist_node *n, *next; unsigned int i; - mutex_lock(&nf_ct_helper_mutex); - hlist_del_rcu(&me->hnode); - nf_ct_helper_count--; - mutex_unlock(&nf_ct_helper_mutex); - - /* Make sure every nothing is still using the helper unless its a - * connection in the hash. - */ - synchronize_rcu(); - - spin_lock_bh(&nf_conntrack_lock); - /* Get rid of expectations */ for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], hnode) { + &net->ct.expect_hash[i], hnode) { struct nf_conn_help *help = nfct_help(exp->master); if ((help->helper == me || exp->helper == me) && del_timer(&exp->timeout)) { @@ -156,13 +146,34 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - hlist_for_each_entry(h, n, &unconfirmed, hnode) + hlist_for_each_entry(h, n, &net->ct.unconfirmed, hnode) unhelp(h, me); for (i = 0; i < nf_conntrack_htable_size; i++) { - hlist_for_each_entry(h, n, &nf_conntrack_hash[i], hnode) + hlist_for_each_entry(h, n, &net->ct.hash[i], hnode) unhelp(h, me); } +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + struct net *net; + + mutex_lock(&nf_ct_helper_mutex); + hlist_del_rcu(&me->hnode); + nf_ct_helper_count--; + mutex_unlock(&nf_ct_helper_mutex); + + /* Make sure every nothing is still using the helper unless its a + * connection in the hash. + */ + synchronize_rcu(); + + rtnl_lock(); + spin_lock_bh(&nf_conntrack_lock); + for_each_net(net) + __nf_conntrack_helper_unregister(me, net); spin_unlock_bh(&nf_conntrack_lock); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a8752031adcb..a040d46f85d6 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -549,7 +549,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conn *)cb->args[1]; for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: - hlist_for_each_entry_rcu(h, n, &nf_conntrack_hash[cb->args[0]], + hlist_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]], hnode) { if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; @@ -689,71 +689,6 @@ ctnetlink_parse_tuple(struct nlattr *cda[], struct nf_conntrack_tuple *tuple, return 0; } -#ifdef CONFIG_NF_NAT_NEEDED -static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { - [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, - [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, -}; - -static int nfnetlink_parse_nat_proto(struct nlattr *attr, - const struct nf_conn *ct, - struct nf_nat_range *range) -{ - struct nlattr *tb[CTA_PROTONAT_MAX+1]; - const struct nf_nat_protocol *npt; - int err; - - err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy); - if (err < 0) - return err; - - npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); - if (npt->nlattr_to_range) - err = npt->nlattr_to_range(tb, range); - nf_nat_proto_put(npt); - return err; -} - -static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { - [CTA_NAT_MINIP] = { .type = NLA_U32 }, - [CTA_NAT_MAXIP] = { .type = NLA_U32 }, -}; - -static inline int -nfnetlink_parse_nat(struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) -{ - struct nlattr *tb[CTA_NAT_MAX+1]; - int err; - - memset(range, 0, sizeof(*range)); - - err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy); - if (err < 0) - return err; - - if (tb[CTA_NAT_MINIP]) - range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]); - - if (!tb[CTA_NAT_MAXIP]) - range->max_ip = range->min_ip; - else - range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); - - if (range->min_ip) - range->flags |= IP_NAT_RANGE_MAP_IPS; - - if (!tb[CTA_NAT_PROTO]) - return 0; - - err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range); - if (err < 0) - return err; - - return 0; -} -#endif - static inline int ctnetlink_parse_help(struct nlattr *attr, char **helper_name) { @@ -794,14 +729,14 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3); else { /* Flush the whole table */ - nf_conntrack_flush(); + nf_conntrack_flush(&init_net); return 0; } if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -847,7 +782,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - h = nf_conntrack_find_get(&tuple); + h = nf_conntrack_find_get(&init_net, &tuple); if (!h) return -ENOENT; @@ -878,6 +813,36 @@ out: return err; } +#ifdef CONFIG_NF_NAT_NEEDED +static int +ctnetlink_parse_nat_setup(struct nf_conn *ct, + enum nf_nat_manip_type manip, + struct nlattr *attr) +{ + typeof(nfnetlink_parse_nat_setup_hook) parse_nat_setup; + + parse_nat_setup = rcu_dereference(nfnetlink_parse_nat_setup_hook); + if (!parse_nat_setup) { +#ifdef CONFIG_MODULES + rcu_read_unlock(); + nfnl_unlock(); + if (request_module("nf-nat-ipv4") < 0) { + nfnl_lock(); + rcu_read_lock(); + return -EOPNOTSUPP; + } + nfnl_lock(); + rcu_read_lock(); + if (nfnetlink_parse_nat_setup_hook) + return -EAGAIN; +#endif + return -EOPNOTSUPP; + } + + return parse_nat_setup(ct, manip, attr); +} +#endif + static int ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) { @@ -897,31 +862,6 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) /* ASSURED bit can only be set */ return -EBUSY; - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { -#ifndef CONFIG_NF_NAT_NEEDED - return -EOPNOTSUPP; -#else - struct nf_nat_range range; - - if (cda[CTA_NAT_DST]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_DST], ct, - &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, IP_NAT_MANIP_DST)) - return -EEXIST; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); - } - if (cda[CTA_NAT_SRC]) { - if (nfnetlink_parse_nat(cda[CTA_NAT_SRC], ct, - &range) < 0) - return -EINVAL; - if (nf_nat_initialized(ct, IP_NAT_MANIP_SRC)) - return -EEXIST; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); - } -#endif - } - /* Be careful here, modifying NAT bits can screw up things, * so don't let users modify them directly if they don't pass * nf_nat_range. */ @@ -929,6 +869,31 @@ ctnetlink_change_status(struct nf_conn *ct, struct nlattr *cda[]) return 0; } +static int +ctnetlink_change_nat(struct nf_conn *ct, struct nlattr *cda[]) +{ +#ifdef CONFIG_NF_NAT_NEEDED + int ret; + + if (cda[CTA_NAT_DST]) { + ret = ctnetlink_parse_nat_setup(ct, + IP_NAT_MANIP_DST, + cda[CTA_NAT_DST]); + if (ret < 0) + return ret; + } + if (cda[CTA_NAT_SRC]) { + ret = ctnetlink_parse_nat_setup(ct, + IP_NAT_MANIP_SRC, + cda[CTA_NAT_SRC]); + if (ret < 0) + return ret; + } + return 0; +#else + return -EOPNOTSUPP; +#endif +} static inline int ctnetlink_change_helper(struct nf_conn *ct, struct nlattr *cda[]) @@ -1125,7 +1090,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conn_help *help; struct nf_conntrack_helper *helper; - ct = nf_conntrack_alloc(otuple, rtuple, GFP_KERNEL); + ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_KERNEL); if (ct == NULL || IS_ERR(ct)) return -ENOMEM; @@ -1157,6 +1122,14 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } } + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { + err = ctnetlink_change_nat(ct, cda); + if (err < 0) { + rcu_read_unlock(); + goto err; + } + } + if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); if (err < 0) { @@ -1213,9 +1186,9 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); if (cda[CTA_TUPLE_ORIG]) - h = __nf_conntrack_find(&otuple); + h = __nf_conntrack_find(&init_net, &otuple); else if (cda[CTA_TUPLE_REPLY]) - h = __nf_conntrack_find(&rtuple); + h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { struct nf_conntrack_tuple master; @@ -1230,7 +1203,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (err < 0) goto out_unlock; - master_h = __nf_conntrack_find(&master); + master_h = __nf_conntrack_find(&init_net, &master); if (master_h == NULL) { err = -ENOENT; goto out_unlock; @@ -1458,6 +1431,7 @@ static int ctnetlink_exp_done(struct netlink_callback *cb) static int ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) { + struct net *net = &init_net; struct nf_conntrack_expect *exp, *last; struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); struct hlist_node *n; @@ -1467,7 +1441,7 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) last = (struct nf_conntrack_expect *)cb->args[1]; for (; cb->args[0] < nf_ct_expect_hsize; cb->args[0]++) { restart: - hlist_for_each_entry(exp, n, &nf_ct_expect_hash[cb->args[0]], + hlist_for_each_entry(exp, n, &net->ct.expect_hash[cb->args[0]], hnode) { if (l3proto && exp->tuple.src.l3num != l3proto) continue; @@ -1529,7 +1503,7 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1583,7 +1557,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, return err; /* bump usage count to 2 */ - exp = nf_ct_expect_find_get(&tuple); + exp = nf_ct_expect_find_get(&init_net, &tuple); if (!exp) return -ENOENT; @@ -1613,7 +1587,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { m_help = nfct_help(exp->master); if (m_help->helper == h @@ -1629,7 +1603,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, spin_lock_bh(&nf_conntrack_lock); for (i = 0; i < nf_ct_expect_hsize; i++) { hlist_for_each_entry_safe(exp, n, next, - &nf_ct_expect_hash[i], + &init_net.ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect(exp); @@ -1670,7 +1644,7 @@ ctnetlink_create_expect(struct nlattr *cda[], u_int8_t u3) return err; /* Look for master conntrack of this expectation */ - h = nf_conntrack_find_get(&master_tuple); + h = nf_conntrack_find_get(&init_net, &master_tuple); if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); @@ -1724,7 +1698,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, return err; spin_lock_bh(&nf_conntrack_lock); - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(&init_net, &tuple); if (!exp) { spin_unlock_bh(&nf_conntrack_lock); diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 97e54b0e43a3..1bc3001d1827 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -65,7 +65,7 @@ void struct nf_conntrack_expect *exp) __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_pptp_hook_expectfn); -#ifdef DEBUG +#if defined(DEBUG) || defined(CONFIG_DYNAMIC_PRINTK_DEBUG) /* PptpControlMessageType names */ const char *const pptp_msg_name[] = { "UNKNOWN_MESSAGE", @@ -98,6 +98,7 @@ EXPORT_SYMBOL(pptp_msg_name); static void pptp_expectfn(struct nf_conn *ct, struct nf_conntrack_expect *exp) { + struct net *net = nf_ct_net(ct); typeof(nf_nat_pptp_hook_expectfn) nf_nat_pptp_expectfn; pr_debug("increasing timeouts\n"); @@ -121,7 +122,7 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("trying to unexpect other dir: "); nf_ct_dump_tuple(&inv_t); - exp_other = nf_ct_expect_find_get(&inv_t); + exp_other = nf_ct_expect_find_get(net, &inv_t); if (exp_other) { /* delete other expectation. */ pr_debug("found\n"); @@ -134,7 +135,8 @@ static void pptp_expectfn(struct nf_conn *ct, rcu_read_unlock(); } -static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) +static int destroy_sibling_or_exp(struct net *net, + const struct nf_conntrack_tuple *t) { const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp; @@ -143,7 +145,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) pr_debug("trying to timeout ct or exp for tuple "); nf_ct_dump_tuple(t); - h = nf_conntrack_find_get(t); + h = nf_conntrack_find_get(net, t); if (h) { sibling = nf_ct_tuplehash_to_ctrack(h); pr_debug("setting timeout of conntrack %p to 0\n", sibling); @@ -154,7 +156,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) nf_ct_put(sibling); return 1; } else { - exp = nf_ct_expect_find_get(t); + exp = nf_ct_expect_find_get(net, t); if (exp) { pr_debug("unexpect_related of expect %p\n", exp); nf_ct_unexpect_related(exp); @@ -168,6 +170,7 @@ static int destroy_sibling_or_exp(const struct nf_conntrack_tuple *t) /* timeout GRE data connections */ static void pptp_destroy_siblings(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); const struct nf_conn_help *help = nfct_help(ct); struct nf_conntrack_tuple t; @@ -178,7 +181,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pns_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pac_call_id; - if (!destroy_sibling_or_exp(&t)) + if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout original pns->pac ct/exp\n"); /* try reply (pac->pns) tuple */ @@ -186,7 +189,7 @@ static void pptp_destroy_siblings(struct nf_conn *ct) t.dst.protonum = IPPROTO_GRE; t.src.u.gre.key = help->help.ct_pptp_info.pac_call_id; t.dst.u.gre.key = help->help.ct_pptp_info.pns_call_id; - if (!destroy_sibling_or_exp(&t)) + if (!destroy_sibling_or_exp(net, &t)) pr_debug("failed to timeout reply pac->pns ct/exp\n"); } @@ -594,15 +597,32 @@ static struct nf_conntrack_helper pptp __read_mostly = { .expect_policy = &pptp_exp_policy, }; +static void nf_conntrack_pptp_net_exit(struct net *net) +{ + nf_ct_gre_keymap_flush(net); +} + +static struct pernet_operations nf_conntrack_pptp_net_ops = { + .exit = nf_conntrack_pptp_net_exit, +}; + static int __init nf_conntrack_pptp_init(void) { - return nf_conntrack_helper_register(&pptp); + int rv; + + rv = nf_conntrack_helper_register(&pptp); + if (rv < 0) + return rv; + rv = register_pernet_subsys(&nf_conntrack_pptp_net_ops); + if (rv < 0) + nf_conntrack_helper_unregister(&pptp); + return rv; } static void __exit nf_conntrack_pptp_fini(void) { nf_conntrack_helper_unregister(&pptp); - nf_ct_gre_keymap_flush(); + unregister_pernet_subsys(&nf_conntrack_pptp_net_ops); } module_init(nf_conntrack_pptp_init); diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index a49fc932629b..592d73344d46 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -22,6 +22,7 @@ #include <linux/notifier.h> #include <linux/kernel.h> #include <linux/netdevice.h> +#include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l3proto.h> @@ -207,6 +208,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_register); void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) { + struct net *net; + BUG_ON(proto->l3proto >= AF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -219,7 +222,10 @@ void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l3proto, proto); + rtnl_lock(); + for_each_net(net) + nf_ct_iterate_cleanup(net, kill_l3proto, proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_unregister); @@ -316,6 +322,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_register); void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) { + struct net *net; + BUG_ON(l4proto->l3proto >= PF_MAX); mutex_lock(&nf_ct_proto_mutex); @@ -328,7 +336,10 @@ void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *l4proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_cleanup(kill_l4proto, l4proto); + rtnl_lock(); + for_each_net(net) + nf_ct_iterate_cleanup(net, kill_l4proto, l4proto); + rtnl_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_unregister); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index e7866dd3cde6..8fcf1762fabf 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -418,6 +418,7 @@ static bool dccp_invert_tuple(struct nf_conntrack_tuple *inv, static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { + struct net *net = nf_ct_net(ct); struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -445,7 +446,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, msg); return false; } @@ -461,8 +462,9 @@ static u64 dccp_ack_seq(const struct dccp_hdr *dh) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, unsigned int hooknum) + u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -524,13 +526,13 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid packet ignored "); return NF_ACCEPT; case CT_DCCP_INVALID: write_unlock_bh(&dccp_lock); - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_dccp: invalid state transition "); return -NF_ACCEPT; @@ -545,9 +547,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, return NF_ACCEPT; } -static int dccp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, - unsigned int hooknum) +static int dccp_error(struct net *net, struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, + u_int8_t pf, unsigned int hooknum) { struct dccp_hdr _dh, *dh; unsigned int dccp_len = skb->len - dataoff; @@ -575,7 +577,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff, } } - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_DCCP, pf)) { msg = "nf_ct_dccp: bad checksum "; @@ -590,7 +592,7 @@ static int dccp_error(struct sk_buff *skb, unsigned int dataoff, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(IPPROTO_DCCP)) + if (LOG_INVALID(net, IPPROTO_DCCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, msg); return -NF_ACCEPT; } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index e31b0e7bd0b1..dbe680af85d2 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -45,7 +45,7 @@ static int packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_generic_timeout); diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 9bd03967fea4..4ab62ad85dd4 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -29,8 +29,11 @@ #include <linux/list.h> #include <linux/seq_file.h> #include <linux/in.h> +#include <linux/netdevice.h> #include <linux/skbuff.h> - +#include <net/dst.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_core.h> @@ -40,19 +43,23 @@ #define GRE_TIMEOUT (30 * HZ) #define GRE_STREAM_TIMEOUT (180 * HZ) -static DEFINE_RWLOCK(nf_ct_gre_lock); -static LIST_HEAD(gre_keymap_list); +static int proto_gre_net_id; +struct netns_proto_gre { + rwlock_t keymap_lock; + struct list_head keymap_list; +}; -void nf_ct_gre_keymap_flush(void) +void nf_ct_gre_keymap_flush(struct net *net) { + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_ct_gre_keymap *km, *tmp; - write_lock_bh(&nf_ct_gre_lock); - list_for_each_entry_safe(km, tmp, &gre_keymap_list, list) { + write_lock_bh(&net_gre->keymap_lock); + list_for_each_entry_safe(km, tmp, &net_gre->keymap_list, list) { list_del(&km->list); kfree(km); } - write_unlock_bh(&nf_ct_gre_lock); + write_unlock_bh(&net_gre->keymap_lock); } EXPORT_SYMBOL(nf_ct_gre_keymap_flush); @@ -67,19 +74,20 @@ static inline int gre_key_cmpfn(const struct nf_ct_gre_keymap *km, } /* look up the source key for a given tuple */ -static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) +static __be16 gre_keymap_lookup(struct net *net, struct nf_conntrack_tuple *t) { + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_ct_gre_keymap *km; __be16 key = 0; - read_lock_bh(&nf_ct_gre_lock); - list_for_each_entry(km, &gre_keymap_list, list) { + read_lock_bh(&net_gre->keymap_lock); + list_for_each_entry(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t)) { key = km->tuple.src.u.gre.key; break; } } - read_unlock_bh(&nf_ct_gre_lock); + read_unlock_bh(&net_gre->keymap_lock); pr_debug("lookup src key 0x%x for ", key); nf_ct_dump_tuple(t); @@ -91,20 +99,22 @@ static __be16 gre_keymap_lookup(struct nf_conntrack_tuple *t) int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, struct nf_conntrack_tuple *t) { + struct net *net = nf_ct_net(ct); + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_conn_help *help = nfct_help(ct); struct nf_ct_gre_keymap **kmp, *km; kmp = &help->help.ct_pptp_info.keymap[dir]; if (*kmp) { /* check whether it's a retransmission */ - read_lock_bh(&nf_ct_gre_lock); - list_for_each_entry(km, &gre_keymap_list, list) { + read_lock_bh(&net_gre->keymap_lock); + list_for_each_entry(km, &net_gre->keymap_list, list) { if (gre_key_cmpfn(km, t) && km == *kmp) { - read_unlock_bh(&nf_ct_gre_lock); + read_unlock_bh(&net_gre->keymap_lock); return 0; } } - read_unlock_bh(&nf_ct_gre_lock); + read_unlock_bh(&net_gre->keymap_lock); pr_debug("trying to override keymap_%s for ct %p\n", dir == IP_CT_DIR_REPLY ? "reply" : "orig", ct); return -EEXIST; @@ -119,9 +129,9 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir, pr_debug("adding new entry %p: ", km); nf_ct_dump_tuple(&km->tuple); - write_lock_bh(&nf_ct_gre_lock); - list_add_tail(&km->list, &gre_keymap_list); - write_unlock_bh(&nf_ct_gre_lock); + write_lock_bh(&net_gre->keymap_lock); + list_add_tail(&km->list, &net_gre->keymap_list); + write_unlock_bh(&net_gre->keymap_lock); return 0; } @@ -130,12 +140,14 @@ EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_add); /* destroy the keymap entries associated with specified master ct */ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); struct nf_conn_help *help = nfct_help(ct); enum ip_conntrack_dir dir; pr_debug("entering for ct %p\n", ct); - write_lock_bh(&nf_ct_gre_lock); + write_lock_bh(&net_gre->keymap_lock); for (dir = IP_CT_DIR_ORIGINAL; dir < IP_CT_DIR_MAX; dir++) { if (help->help.ct_pptp_info.keymap[dir]) { pr_debug("removing %p from list\n", @@ -145,7 +157,7 @@ void nf_ct_gre_keymap_destroy(struct nf_conn *ct) help->help.ct_pptp_info.keymap[dir] = NULL; } } - write_unlock_bh(&nf_ct_gre_lock); + write_unlock_bh(&net_gre->keymap_lock); } EXPORT_SYMBOL_GPL(nf_ct_gre_keymap_destroy); @@ -164,6 +176,7 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { + struct net *net = dev_net(skb->dev ? skb->dev : skb->dst->dev); const struct gre_hdr_pptp *pgrehdr; struct gre_hdr_pptp _pgrehdr; __be16 srckey; @@ -190,7 +203,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, } tuple->dst.u.gre.key = pgrehdr->call_id; - srckey = gre_keymap_lookup(tuple); + srckey = gre_keymap_lookup(net, tuple); tuple->src.u.gre.key = srckey; return true; @@ -219,7 +232,7 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is a GRE connection. @@ -229,7 +242,7 @@ static int gre_packet(struct nf_conn *ct, ct->proto.gre.stream_timeout); /* Also, more likely to be important, and not a probe. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, ct->proto.gre.timeout); @@ -285,15 +298,53 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { #endif }; +static int proto_gre_net_init(struct net *net) +{ + struct netns_proto_gre *net_gre; + int rv; + + net_gre = kmalloc(sizeof(struct netns_proto_gre), GFP_KERNEL); + if (!net_gre) + return -ENOMEM; + rwlock_init(&net_gre->keymap_lock); + INIT_LIST_HEAD(&net_gre->keymap_list); + + rv = net_assign_generic(net, proto_gre_net_id, net_gre); + if (rv < 0) + kfree(net_gre); + return rv; +} + +static void proto_gre_net_exit(struct net *net) +{ + struct netns_proto_gre *net_gre = net_generic(net, proto_gre_net_id); + + nf_ct_gre_keymap_flush(net); + kfree(net_gre); +} + +static struct pernet_operations proto_gre_net_ops = { + .init = proto_gre_net_init, + .exit = proto_gre_net_exit, +}; + static int __init nf_ct_proto_gre_init(void) { - return nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + int rv; + + rv = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_gre4); + if (rv < 0) + return rv; + rv = register_pernet_gen_subsys(&proto_gre_net_id, &proto_gre_net_ops); + if (rv < 0) + nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); + return rv; } static void nf_ct_proto_gre_fini(void) { nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_gre4); - nf_ct_gre_keymap_flush(); + unregister_pernet_gen_subsys(proto_gre_net_id, &proto_gre_net_ops); } module_init(nf_ct_proto_gre_init); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 30aa5b94a771..ae8c2609e230 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -287,7 +287,7 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { enum sctp_conntrack new_state, old_state; @@ -369,7 +369,7 @@ static int sctp_packet(struct nf_conn *ct, ct->proto.sctp.state = new_state; if (old_state != new_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); } write_unlock_bh(&sctp_lock); @@ -380,7 +380,7 @@ static int sctp_packet(struct nf_conn *ct, new_state == SCTP_CONNTRACK_ESTABLISHED) { pr_debug("Setting assured bit\n"); set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } return NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 6f61261888ef..f947ec41e391 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -486,8 +486,9 @@ static bool tcp_in_window(const struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *tcph, - int pf) + u_int8_t pf) { + struct net *net = nf_ct_net(ct); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple; @@ -668,7 +669,7 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || nf_ct_tcp_be_liberal) res = true; - if (!res && LOG_INVALID(IPPROTO_TCP)) + if (!res && LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: %s ", before(seq, sender->td_maxend + 1) ? @@ -746,10 +747,11 @@ static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_ACK|TH_URG) + 1] = }; /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct sk_buff *skb, +static int tcp_error(struct net *net, + struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { const struct tcphdr *th; @@ -760,7 +762,7 @@ static int tcp_error(struct sk_buff *skb, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: short packet "); return -NF_ACCEPT; @@ -768,7 +770,7 @@ static int tcp_error(struct sk_buff *skb, /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: truncated/malformed packet "); return -NF_ACCEPT; @@ -779,9 +781,9 @@ static int tcp_error(struct sk_buff *skb, * because the checksum is assumed to be correct. */ /* FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: bad TCP checksum "); return -NF_ACCEPT; @@ -790,7 +792,7 @@ static int tcp_error(struct sk_buff *skb, /* Check TCP flags. */ tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR|TH_PUSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid TCP flag combination "); return -NF_ACCEPT; @@ -804,9 +806,10 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; enum ip_conntrack_dir dir; @@ -885,7 +888,7 @@ static int tcp_packet(struct nf_conn *ct, * thus initiate a clean new session. */ write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: killing out of sync session "); nf_ct_kill(ct); @@ -898,7 +901,7 @@ static int tcp_packet(struct nf_conn *ct, segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid packet ignored "); return NF_ACCEPT; @@ -907,7 +910,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) + if (LOG_INVALID(net, IPPROTO_TCP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_tcp: invalid state "); return -NF_ACCEPT; @@ -968,9 +971,9 @@ static int tcp_packet(struct nf_conn *ct, timeout = tcp_timeouts[new_state]; write_unlock_bh(&tcp_lock); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct); if (new_state != old_state) - nf_conntrack_event_cache(IPCT_PROTOINFO, skb); + nf_conntrack_event_cache(IPCT_PROTOINFO, ct); if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { /* If only reply is a RST, we can consider ourselves not to @@ -989,7 +992,7 @@ static int tcp_packet(struct nf_conn *ct, after SYN_RECV or a valid answer for a picked up connection. */ set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } nf_ct_refresh_acct(ct, ctinfo, skb, timeout); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8b21762e65de..7c2ca48698be 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -66,7 +66,7 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -75,7 +75,7 @@ static int udp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udp_timeout); @@ -89,9 +89,9 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udp_error(struct sk_buff *skb, unsigned int dataoff, +static int udp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -101,7 +101,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: short packet "); return -NF_ACCEPT; @@ -109,7 +109,7 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: truncated/malformed packet "); return -NF_ACCEPT; @@ -123,9 +123,9 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, * We skip checking packets on the outgoing path * because the checksum is assumed to be correct. * FIXME: Source route IP option packets --RR */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDP)) + if (LOG_INVALID(net, IPPROTO_UDP)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udp: bad UDP checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 1fa62f3c24f1..d22d839e4f94 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -65,7 +65,7 @@ static int udplite_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { /* If we've seen traffic both ways, this is some kind of UDP @@ -75,7 +75,7 @@ static int udplite_packet(struct nf_conn *ct, nf_ct_udplite_timeout_stream); /* Also, more likely to be important, and not a probe */ if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_STATUS, skb); + nf_conntrack_event_cache(IPCT_STATUS, ct); } else nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_udplite_timeout); @@ -89,9 +89,11 @@ static bool udplite_new(struct nf_conn *ct, const struct sk_buff *skb, return true; } -static int udplite_error(struct sk_buff *skb, unsigned int dataoff, +static int udplite_error(struct net *net, + struct sk_buff *skb, + unsigned int dataoff, enum ip_conntrack_info *ctinfo, - int pf, + u_int8_t pf, unsigned int hooknum) { unsigned int udplen = skb->len - dataoff; @@ -102,7 +104,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: short packet "); return -NF_ACCEPT; @@ -112,7 +114,7 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, if (cscov == 0) cscov = udplen; else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: invalid checksum coverage "); return -NF_ACCEPT; @@ -120,17 +122,17 @@ static int udplite_error(struct sk_buff *skb, unsigned int dataoff, /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: checksum missing "); return -NF_ACCEPT; } /* Checksum invalid? Ignore. */ - if (nf_conntrack_checksum && hooknum == NF_INET_PRE_ROUTING && + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(IPPROTO_UDPLITE)) + if (LOG_INVALID(net, IPPROTO_UDPLITE)) nf_log_packet(pf, 0, skb, NULL, NULL, NULL, "nf_ct_udplite: bad UDPLite checksum "); return -NF_ACCEPT; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 1fa306be60fb..6813f1c8863f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -736,6 +736,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, struct nf_conntrack_expect *exp, *rtp_exp, *rtcp_exp; enum ip_conntrack_info ctinfo; struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); union nf_inet_addr *saddr; struct nf_conntrack_tuple tuple; @@ -775,7 +776,7 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, rcu_read_lock(); do { - exp = __nf_ct_expect_find(&tuple); + exp = __nf_ct_expect_find(net, &tuple); if (!exp || exp->master == ct || nfct_help(exp->master)->helper != nfct_help(ct)->helper || diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 8509db14670b..98106d4e89f0 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -40,18 +40,20 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL_GPL(print_tuple); struct ct_iter_state { + struct seq_net_private p; unsigned int bucket; }; static struct hlist_node *ct_get_first(struct seq_file *seq) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; struct hlist_node *n; for (st->bucket = 0; st->bucket < nf_conntrack_htable_size; st->bucket++) { - n = rcu_dereference(nf_conntrack_hash[st->bucket].first); + n = rcu_dereference(net->ct.hash[st->bucket].first); if (n) return n; } @@ -61,13 +63,14 @@ static struct hlist_node *ct_get_first(struct seq_file *seq) static struct hlist_node *ct_get_next(struct seq_file *seq, struct hlist_node *head) { + struct net *net = seq_file_net(seq); struct ct_iter_state *st = seq->private; head = rcu_dereference(head->next); while (head == NULL) { if (++st->bucket >= nf_conntrack_htable_size) return NULL; - head = rcu_dereference(nf_conntrack_hash[st->bucket].first); + head = rcu_dereference(net->ct.hash[st->bucket].first); } return head; } @@ -177,7 +180,7 @@ static const struct seq_operations ct_seq_ops = { static int ct_open(struct inode *inode, struct file *file) { - return seq_open_private(file, &ct_seq_ops, + return seq_open_net(inode, file, &ct_seq_ops, sizeof(struct ct_iter_state)); } @@ -186,11 +189,12 @@ static const struct file_operations ct_file_ops = { .open = ct_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, + .release = seq_release_net, }; static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; if (*pos == 0) @@ -200,7 +204,7 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -208,13 +212,14 @@ static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) { + struct net *net = seq_file_net(seq); int cpu; for (cpu = *pos; cpu < NR_CPUS; ++cpu) { if (!cpu_possible(cpu)) continue; *pos = cpu + 1; - return &per_cpu(nf_conntrack_stat, cpu); + return per_cpu_ptr(net->ct.stat, cpu); } return NULL; @@ -226,7 +231,8 @@ static void ct_cpu_seq_stop(struct seq_file *seq, void *v) static int ct_cpu_seq_show(struct seq_file *seq, void *v) { - unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); const struct ip_conntrack_stat *st = v; if (v == SEQ_START_TOKEN) { @@ -266,7 +272,8 @@ static const struct seq_operations ct_cpu_seq_ops = { static int ct_cpu_seq_open(struct inode *inode, struct file *file) { - return seq_open(file, &ct_cpu_seq_ops); + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); } static const struct file_operations ct_cpu_seq_fops = { @@ -274,56 +281,52 @@ static const struct file_operations ct_cpu_seq_fops = { .open = ct_cpu_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release, + .release = seq_release_net, }; -static int nf_conntrack_standalone_init_proc(void) +static int nf_conntrack_standalone_init_proc(struct net *net) { struct proc_dir_entry *pde; - pde = proc_net_fops_create(&init_net, "nf_conntrack", 0440, &ct_file_ops); + pde = proc_net_fops_create(net, "nf_conntrack", 0440, &ct_file_ops); if (!pde) goto out_nf_conntrack; - pde = proc_create("nf_conntrack", S_IRUGO, init_net.proc_net_stat, + pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat, &ct_cpu_seq_fops); if (!pde) goto out_stat_nf_conntrack; return 0; out_stat_nf_conntrack: - proc_net_remove(&init_net, "nf_conntrack"); + proc_net_remove(net, "nf_conntrack"); out_nf_conntrack: return -ENOMEM; } -static void nf_conntrack_standalone_fini_proc(void) +static void nf_conntrack_standalone_fini_proc(struct net *net) { - remove_proc_entry("nf_conntrack", init_net.proc_net_stat); - proc_net_remove(&init_net, "nf_conntrack"); + remove_proc_entry("nf_conntrack", net->proc_net_stat); + proc_net_remove(net, "nf_conntrack"); } #else -static int nf_conntrack_standalone_init_proc(void) +static int nf_conntrack_standalone_init_proc(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_proc(void) +static void nf_conntrack_standalone_fini_proc(struct net *net) { } #endif /* CONFIG_PROC_FS */ /* Sysctl support */ -int nf_conntrack_checksum __read_mostly = 1; -EXPORT_SYMBOL_GPL(nf_conntrack_checksum); - #ifdef CONFIG_SYSCTL /* Log invalid packets of a given protocol */ static int log_invalid_proto_min = 0; static int log_invalid_proto_max = 255; -static struct ctl_table_header *nf_ct_sysctl_header; static struct ctl_table_header *nf_ct_netfilter_header; static ctl_table nf_ct_sysctl_table[] = { @@ -338,7 +341,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_COUNT, .procname = "nf_conntrack_count", - .data = &nf_conntrack_count, + .data = &init_net.ct.count, .maxlen = sizeof(int), .mode = 0444, .proc_handler = &proc_dointvec, @@ -354,7 +357,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_CHECKSUM, .procname = "nf_conntrack_checksum", - .data = &nf_conntrack_checksum, + .data = &init_net.ct.sysctl_checksum, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec, @@ -362,7 +365,7 @@ static ctl_table nf_ct_sysctl_table[] = { { .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, .procname = "nf_conntrack_log_invalid", - .data = &nf_ct_log_invalid, + .data = &init_net.ct.sysctl_log_invalid, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = &proc_dointvec_minmax, @@ -400,74 +403,109 @@ static struct ctl_path nf_ct_path[] = { { } }; -EXPORT_SYMBOL_GPL(nf_ct_log_invalid); - -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { - nf_ct_netfilter_header = - register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); - if (!nf_ct_netfilter_header) - goto out; - - nf_ct_sysctl_header = - register_sysctl_paths(nf_net_netfilter_sysctl_path, - nf_ct_sysctl_table); - if (!nf_ct_sysctl_header) + struct ctl_table *table; + + if (net_eq(net, &init_net)) { + nf_ct_netfilter_header = + register_sysctl_paths(nf_ct_path, nf_ct_netfilter_table); + if (!nf_ct_netfilter_header) + goto out; + } + + table = kmemdup(nf_ct_sysctl_table, sizeof(nf_ct_sysctl_table), + GFP_KERNEL); + if (!table) + goto out_kmemdup; + + table[1].data = &net->ct.count; + table[3].data = &net->ct.sysctl_checksum; + table[4].data = &net->ct.sysctl_log_invalid; + + net->ct.sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, table); + if (!net->ct.sysctl_header) goto out_unregister_netfilter; return 0; out_unregister_netfilter: - unregister_sysctl_table(nf_ct_netfilter_header); + kfree(table); +out_kmemdup: + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); out: printk("nf_conntrack: can't register to sysctl.\n"); return -ENOMEM; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { - unregister_sysctl_table(nf_ct_netfilter_header); - unregister_sysctl_table(nf_ct_sysctl_header); + struct ctl_table *table; + + if (net_eq(net, &init_net)) + unregister_sysctl_table(nf_ct_netfilter_header); + table = net->ct.sysctl_header->ctl_table_arg; + unregister_net_sysctl_table(net->ct.sysctl_header); + kfree(table); } #else -static int nf_conntrack_standalone_init_sysctl(void) +static int nf_conntrack_standalone_init_sysctl(struct net *net) { return 0; } -static void nf_conntrack_standalone_fini_sysctl(void) +static void nf_conntrack_standalone_fini_sysctl(struct net *net) { } #endif /* CONFIG_SYSCTL */ -static int __init nf_conntrack_standalone_init(void) +static int nf_conntrack_net_init(struct net *net) { int ret; - ret = nf_conntrack_init(); + ret = nf_conntrack_init(net); if (ret < 0) - goto out; - ret = nf_conntrack_standalone_init_proc(); + goto out_init; + ret = nf_conntrack_standalone_init_proc(net); if (ret < 0) goto out_proc; - ret = nf_conntrack_standalone_init_sysctl(); + net->ct.sysctl_checksum = 1; + net->ct.sysctl_log_invalid = 0; + ret = nf_conntrack_standalone_init_sysctl(net); if (ret < 0) goto out_sysctl; return 0; out_sysctl: - nf_conntrack_standalone_fini_proc(); + nf_conntrack_standalone_fini_proc(net); out_proc: - nf_conntrack_cleanup(); -out: + nf_conntrack_cleanup(net); +out_init: return ret; } +static void nf_conntrack_net_exit(struct net *net) +{ + nf_conntrack_standalone_fini_sysctl(net); + nf_conntrack_standalone_fini_proc(net); + nf_conntrack_cleanup(net); +} + +static struct pernet_operations nf_conntrack_net_ops = { + .init = nf_conntrack_net_init, + .exit = nf_conntrack_net_exit, +}; + +static int __init nf_conntrack_standalone_init(void) +{ + return register_pernet_subsys(&nf_conntrack_net_ops); +} + static void __exit nf_conntrack_standalone_fini(void) { - nf_conntrack_standalone_fini_sysctl(); - nf_conntrack_standalone_fini_proc(); - nf_conntrack_cleanup(); + unregister_pernet_subsys(&nf_conntrack_net_ops); } module_init(nf_conntrack_standalone_init); diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 196269c1e586..bf6609978af7 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -15,7 +15,7 @@ /* core.c */ extern unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - int hook, + unsigned int hook, const struct net_device *indev, const struct net_device *outdev, struct list_head **i, @@ -25,7 +25,7 @@ extern unsigned int nf_iterate(struct list_head *head, /* nf_queue.c */ extern int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index 9fda6ee95a31..fa8ae5d2659c 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -15,16 +15,16 @@ #define NF_LOG_PREFIXLEN 128 -static const struct nf_logger *nf_loggers[NPROTO] __read_mostly; +static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); /* return EBUSY if somebody else is registered, EEXIST if the same logger * is registred, 0 on success. */ -int nf_log_register(int pf, const struct nf_logger *logger) +int nf_log_register(u_int8_t pf, const struct nf_logger *logger) { int ret; - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(nf_loggers)) return -EINVAL; /* Any setup of logging members must be done before @@ -45,9 +45,9 @@ int nf_log_register(int pf, const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(int pf) +void nf_log_unregister_pf(u_int8_t pf) { - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(nf_loggers)) return; mutex_lock(&nf_log_mutex); rcu_assign_pointer(nf_loggers[pf], NULL); @@ -63,7 +63,7 @@ void nf_log_unregister(const struct nf_logger *logger) int i; mutex_lock(&nf_log_mutex); - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { if (nf_loggers[i] == logger) rcu_assign_pointer(nf_loggers[i], NULL); } @@ -73,7 +73,7 @@ void nf_log_unregister(const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); -void nf_log_packet(int pf, +void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -103,7 +103,7 @@ static void *seq_start(struct seq_file *seq, loff_t *pos) { rcu_read_lock(); - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(nf_loggers)) return NULL; return pos; @@ -113,7 +113,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(nf_loggers)) return NULL; return pos; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 582ec3efc8a5..4f2310c93e01 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -16,17 +16,17 @@ * long term mutex. The handler must provide an an outfn() to accept packets * for queueing and must reinject all packets it receives, no matter what. */ -static const struct nf_queue_handler *queue_handler[NPROTO]; +static const struct nf_queue_handler *queue_handler[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(queue_handler_mutex); /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { int ret; - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); @@ -45,9 +45,9 @@ int nf_register_queue_handler(int pf, const struct nf_queue_handler *qh) EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -int nf_unregister_queue_handler(int pf, const struct nf_queue_handler *qh) +int nf_unregister_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh) { - if (pf >= NPROTO) + if (pf >= ARRAY_SIZE(queue_handler)) return -EINVAL; mutex_lock(&queue_handler_mutex); @@ -67,10 +67,10 @@ EXPORT_SYMBOL(nf_unregister_queue_handler); void nf_unregister_queue_handlers(const struct nf_queue_handler *qh) { - int pf; + u_int8_t pf; mutex_lock(&queue_handler_mutex); - for (pf = 0; pf < NPROTO; pf++) { + for (pf = 0; pf < ARRAY_SIZE(queue_handler); pf++) { if (queue_handler[pf] == qh) rcu_assign_pointer(queue_handler[pf], NULL); } @@ -107,7 +107,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry) */ static int __nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -191,7 +191,7 @@ err: int nf_queue(struct sk_buff *skb, struct list_head *elem, - int pf, unsigned int hook, + u_int8_t pf, unsigned int hook, struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sk_buff *), @@ -285,7 +285,7 @@ EXPORT_SYMBOL(nf_reinject); #ifdef CONFIG_PROC_FS static void *seq_start(struct seq_file *seq, loff_t *pos) { - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; @@ -295,7 +295,7 @@ static void *seq_next(struct seq_file *s, void *v, loff_t *pos) { (*pos)++; - if (*pos >= NPROTO) + if (*pos >= ARRAY_SIZE(queue_handler)) return NULL; return pos; diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index 01489681fa96..8ab829f86574 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -60,14 +60,11 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg) } EXPORT_SYMBOL(nf_unregister_sockopt); -static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, int pf, +static struct nf_sockopt_ops *nf_sockopt_find(struct sock *sk, u_int8_t pf, int val, int get) { struct nf_sockopt_ops *ops; - if (!net_eq(sock_net(sk), &init_net)) - return ERR_PTR(-ENOPROTOOPT); - if (mutex_lock_interruptible(&nf_sockopt_mutex) != 0) return ERR_PTR(-EINTR); @@ -96,7 +93,7 @@ out: } /* Call get/setsockopt() */ -static int nf_sockopt(struct sock *sk, int pf, int val, +static int nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -115,21 +112,22 @@ static int nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int nf_setsockopt(struct sock *sk, int pf, int val, char __user *opt, +int nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(nf_setsockopt); -int nf_getsockopt(struct sock *sk, int pf, int val, char __user *opt, int *len) +int nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, + int *len) { return nf_sockopt(sk, pf, val, opt, len, 1); } EXPORT_SYMBOL(nf_getsockopt); #ifdef CONFIG_COMPAT -static int compat_nf_sockopt(struct sock *sk, int pf, int val, +static int compat_nf_sockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len, int get) { struct nf_sockopt_ops *ops; @@ -155,14 +153,14 @@ static int compat_nf_sockopt(struct sock *sk, int pf, int val, return ret; } -int compat_nf_setsockopt(struct sock *sk, int pf, +int compat_nf_setsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int len) { return compat_nf_sockopt(sk, pf, val, opt, &len, 0); } EXPORT_SYMBOL(compat_nf_setsockopt); -int compat_nf_getsockopt(struct sock *sk, int pf, +int compat_nf_getsockopt(struct sock *sk, u_int8_t pf, int val, char __user *opt, int *len) { return compat_nf_sockopt(sk, pf, val, opt, len, 1); diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c new file mode 100644 index 000000000000..cdc97f3105a3 --- /dev/null +++ b/net/netfilter/nf_tproxy_core.c @@ -0,0 +1,95 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> + +#include <linux/net.h> +#include <linux/if.h> +#include <linux/netdevice.h> +#include <net/udp.h> +#include <net/netfilter/nf_tproxy_core.h> + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening_only) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + if (listening_only) + sk = __inet_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + else + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +static void +nf_tproxy_destructor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb->sk = NULL; + skb->destructor = NULL; + + if (sk) + nf_tproxy_put_sock(sk); +} + +/* consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + if (inet_sk(sk)->transparent) { + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; + return 1; + } else + nf_tproxy_put_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); + +static int __init nf_tproxy_init(void) +{ + pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); + pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); + return 0; +} + +module_init(nf_tproxy_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Transparent proxy support core routines"); diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index b75c9c4a995d..9c0ba17a1ddb 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -44,15 +44,17 @@ static struct sock *nfnl = NULL; static const struct nfnetlink_subsystem *subsys_table[NFNL_SUBSYS_COUNT]; static DEFINE_MUTEX(nfnl_mutex); -static inline void nfnl_lock(void) +void nfnl_lock(void) { mutex_lock(&nfnl_mutex); } +EXPORT_SYMBOL_GPL(nfnl_lock); -static inline void nfnl_unlock(void) +void nfnl_unlock(void) { mutex_unlock(&nfnl_mutex); } +EXPORT_SYMBOL_GPL(nfnl_unlock); int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n) { @@ -132,9 +134,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return 0; type = nlh->nlmsg_type; +replay: ss = nfnetlink_get_subsys(type); if (!ss) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES nfnl_unlock(); request_module("nfnetlink-subsys-%d", NFNL_SUBSYS_ID(type)); nfnl_lock(); @@ -165,7 +168,10 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } else return -EINVAL; - return nc->call(nfnl, skb, nlh, cda); + err = nc->call(nfnl, skb, nlh, cda); + if (err == -EAGAIN) + goto replay; + return err; } } diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9a35b57ab76d..41e0105d3828 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -359,7 +359,7 @@ static inline int __build_packet_message(struct nfulnl_instance *inst, const struct sk_buff *skb, unsigned int data_len, - unsigned int pf, + u_int8_t pf, unsigned int hooknum, const struct net_device *indev, const struct net_device *outdev, @@ -534,7 +534,7 @@ static struct nf_loginfo default_loginfo = { /* log handler for internal netfilter logging api */ static void -nfulnl_log_packet(unsigned int pf, +nfulnl_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 5d75cd86ebb3..89837a4eef76 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -30,7 +30,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("[ip,ip6,arp]_tables backend module"); +MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) @@ -58,17 +58,20 @@ static struct xt_af *xt; #define duprintf(format, args...) #endif -static const char *const xt_prefix[NPROTO] = { - [AF_INET] = "ip", - [AF_INET6] = "ip6", - [NF_ARP] = "arp", +static const char *const xt_prefix[NFPROTO_NUMPROTO] = { + [NFPROTO_UNSPEC] = "x", + [NFPROTO_IPV4] = "ip", + [NFPROTO_ARP] = "arp", + [NFPROTO_BRIDGE] = "eb", + [NFPROTO_IPV6] = "ip6", }; /* Registration hooks for targets. */ int xt_register_target(struct xt_target *target) { - int ret, af = target->family; + u_int8_t af = target->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -82,7 +85,7 @@ EXPORT_SYMBOL(xt_register_target); void xt_unregister_target(struct xt_target *target) { - int af = target->family; + u_int8_t af = target->family; mutex_lock(&xt[af].mutex); list_del(&target->list); @@ -123,7 +126,8 @@ EXPORT_SYMBOL(xt_unregister_targets); int xt_register_match(struct xt_match *match) { - int ret, af = match->family; + u_int8_t af = match->family; + int ret; ret = mutex_lock_interruptible(&xt[af].mutex); if (ret != 0) @@ -139,7 +143,7 @@ EXPORT_SYMBOL(xt_register_match); void xt_unregister_match(struct xt_match *match) { - int af = match->family; + u_int8_t af = match->family; mutex_lock(&xt[af].mutex); list_del(&match->list); @@ -185,7 +189,7 @@ EXPORT_SYMBOL(xt_unregister_matches); */ /* Find match, grabs ref. Returns ERR_PTR() on error. */ -struct xt_match *xt_find_match(int af, const char *name, u8 revision) +struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) { struct xt_match *m; int err = 0; @@ -205,12 +209,17 @@ struct xt_match *xt_find_match(int af, const char *name, u8 revision) } } mutex_unlock(&xt[af].mutex); + + if (af != NFPROTO_UNSPEC) + /* Try searching again in the family-independent list */ + return xt_find_match(NFPROTO_UNSPEC, name, revision); + return ERR_PTR(err); } EXPORT_SYMBOL(xt_find_match); /* Find target, grabs ref. Returns ERR_PTR() on error. */ -struct xt_target *xt_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) { struct xt_target *t; int err = 0; @@ -230,11 +239,16 @@ struct xt_target *xt_find_target(int af, const char *name, u8 revision) } } mutex_unlock(&xt[af].mutex); + + if (af != NFPROTO_UNSPEC) + /* Try searching again in the family-independent list */ + return xt_find_target(NFPROTO_UNSPEC, name, revision); + return ERR_PTR(err); } EXPORT_SYMBOL(xt_find_target); -struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) +struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision) { struct xt_target *target; @@ -246,7 +260,7 @@ struct xt_target *xt_request_find_target(int af, const char *name, u8 revision) } EXPORT_SYMBOL_GPL(xt_request_find_target); -static int match_revfn(int af, const char *name, u8 revision, int *bestp) +static int match_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_match *m; int have_rev = 0; @@ -262,7 +276,7 @@ static int match_revfn(int af, const char *name, u8 revision, int *bestp) return have_rev; } -static int target_revfn(int af, const char *name, u8 revision, int *bestp) +static int target_revfn(u8 af, const char *name, u8 revision, int *bestp) { const struct xt_target *t; int have_rev = 0; @@ -279,7 +293,7 @@ static int target_revfn(int af, const char *name, u8 revision, int *bestp) } /* Returns true or false (if no such extension at all) */ -int xt_find_revision(int af, const char *name, u8 revision, int target, +int xt_find_revision(u8 af, const char *name, u8 revision, int target, int *err) { int have_rev, best = -1; @@ -307,37 +321,47 @@ int xt_find_revision(int af, const char *name, u8 revision, int target, } EXPORT_SYMBOL_GPL(xt_find_revision); -int xt_check_match(const struct xt_match *match, unsigned short family, - unsigned int size, const char *table, unsigned int hook_mask, - unsigned short proto, int inv_proto) +int xt_check_match(struct xt_mtchk_param *par, + unsigned int size, u_int8_t proto, bool inv_proto) { - if (XT_ALIGN(match->matchsize) != size) { + if (XT_ALIGN(par->match->matchsize) != size && + par->match->matchsize != -1) { + /* + * ebt_among is exempt from centralized matchsize checking + * because it uses a dynamic-size data set. + */ printk("%s_tables: %s match: invalid size %Zu != %u\n", - xt_prefix[family], match->name, - XT_ALIGN(match->matchsize), size); + xt_prefix[par->family], par->match->name, + XT_ALIGN(par->match->matchsize), size); return -EINVAL; } - if (match->table && strcmp(match->table, table)) { + if (par->match->table != NULL && + strcmp(par->match->table, par->table) != 0) { printk("%s_tables: %s match: only valid in %s table, not %s\n", - xt_prefix[family], match->name, match->table, table); + xt_prefix[par->family], par->match->name, + par->match->table, par->table); return -EINVAL; } - if (match->hooks && (hook_mask & ~match->hooks) != 0) { - printk("%s_tables: %s match: bad hook_mask %u/%u\n", - xt_prefix[family], match->name, hook_mask, match->hooks); + if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { + printk("%s_tables: %s match: bad hook_mask %#x/%#x\n", + xt_prefix[par->family], par->match->name, + par->hook_mask, par->match->hooks); return -EINVAL; } - if (match->proto && (match->proto != proto || inv_proto)) { + if (par->match->proto && (par->match->proto != proto || inv_proto)) { printk("%s_tables: %s match: only valid for protocol %u\n", - xt_prefix[family], match->name, match->proto); + xt_prefix[par->family], par->match->name, + par->match->proto); return -EINVAL; } + if (par->match->checkentry != NULL && !par->match->checkentry(par)) + return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_add_offset(int af, unsigned int offset, short delta) +int xt_compat_add_offset(u_int8_t af, unsigned int offset, short delta) { struct compat_delta *tmp; @@ -359,7 +383,7 @@ int xt_compat_add_offset(int af, unsigned int offset, short delta) } EXPORT_SYMBOL_GPL(xt_compat_add_offset); -void xt_compat_flush_offsets(int af) +void xt_compat_flush_offsets(u_int8_t af) { struct compat_delta *tmp, *next; @@ -373,7 +397,7 @@ void xt_compat_flush_offsets(int af) } EXPORT_SYMBOL_GPL(xt_compat_flush_offsets); -short xt_compat_calc_jump(int af, unsigned int offset) +short xt_compat_calc_jump(u_int8_t af, unsigned int offset) { struct compat_delta *tmp; short delta; @@ -448,32 +472,36 @@ int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, EXPORT_SYMBOL_GPL(xt_compat_match_to_user); #endif /* CONFIG_COMPAT */ -int xt_check_target(const struct xt_target *target, unsigned short family, - unsigned int size, const char *table, unsigned int hook_mask, - unsigned short proto, int inv_proto) +int xt_check_target(struct xt_tgchk_param *par, + unsigned int size, u_int8_t proto, bool inv_proto) { - if (XT_ALIGN(target->targetsize) != size) { + if (XT_ALIGN(par->target->targetsize) != size) { printk("%s_tables: %s target: invalid size %Zu != %u\n", - xt_prefix[family], target->name, - XT_ALIGN(target->targetsize), size); + xt_prefix[par->family], par->target->name, + XT_ALIGN(par->target->targetsize), size); return -EINVAL; } - if (target->table && strcmp(target->table, table)) { + if (par->target->table != NULL && + strcmp(par->target->table, par->table) != 0) { printk("%s_tables: %s target: only valid in %s table, not %s\n", - xt_prefix[family], target->name, target->table, table); + xt_prefix[par->family], par->target->name, + par->target->table, par->table); return -EINVAL; } - if (target->hooks && (hook_mask & ~target->hooks) != 0) { - printk("%s_tables: %s target: bad hook_mask %u/%u\n", - xt_prefix[family], target->name, hook_mask, - target->hooks); + if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { + printk("%s_tables: %s target: bad hook_mask %#x/%#x\n", + xt_prefix[par->family], par->target->name, + par->hook_mask, par->target->hooks); return -EINVAL; } - if (target->proto && (target->proto != proto || inv_proto)) { + if (par->target->proto && (par->target->proto != proto || inv_proto)) { printk("%s_tables: %s target: only valid for protocol %u\n", - xt_prefix[family], target->name, target->proto); + xt_prefix[par->family], par->target->name, + par->target->proto); return -EINVAL; } + if (par->target->checkentry != NULL && !par->target->checkentry(par)) + return -EINVAL; return 0; } EXPORT_SYMBOL_GPL(xt_check_target); @@ -590,7 +618,8 @@ void xt_free_table_info(struct xt_table_info *info) EXPORT_SYMBOL(xt_free_table_info); /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ -struct xt_table *xt_find_table_lock(struct net *net, int af, const char *name) +struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, + const char *name) { struct xt_table *t; @@ -612,13 +641,13 @@ void xt_table_unlock(struct xt_table *table) EXPORT_SYMBOL_GPL(xt_table_unlock); #ifdef CONFIG_COMPAT -void xt_compat_lock(int af) +void xt_compat_lock(u_int8_t af) { mutex_lock(&xt[af].compat_mutex); } EXPORT_SYMBOL_GPL(xt_compat_lock); -void xt_compat_unlock(int af) +void xt_compat_unlock(u_int8_t af) { mutex_unlock(&xt[af].compat_mutex); } @@ -722,13 +751,13 @@ EXPORT_SYMBOL_GPL(xt_unregister_table); #ifdef CONFIG_PROC_FS struct xt_names_priv { struct seq_net_private p; - int af; + u_int8_t af; }; static void *xt_table_seq_start(struct seq_file *seq, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; mutex_lock(&xt[af].mutex); return seq_list_start(&net->xt.tables[af], *pos); @@ -738,7 +767,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct xt_names_priv *priv = seq->private; struct net *net = seq_file_net(seq); - int af = priv->af; + u_int8_t af = priv->af; return seq_list_next(v, &net->xt.tables[af], pos); } @@ -746,7 +775,7 @@ static void *xt_table_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void xt_table_seq_stop(struct seq_file *seq, void *v) { struct xt_names_priv *priv = seq->private; - int af = priv->af; + u_int8_t af = priv->af; mutex_unlock(&xt[af].mutex); } @@ -922,14 +951,14 @@ static const struct file_operations xt_target_ops = { #endif /* CONFIG_PROC_FS */ -int xt_proto_init(struct net *net, int af) +int xt_proto_init(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; struct proc_dir_entry *proc; #endif - if (af >= NPROTO) + if (af >= ARRAY_SIZE(xt_prefix)) return -EINVAL; @@ -974,7 +1003,7 @@ out: } EXPORT_SYMBOL_GPL(xt_proto_init); -void xt_proto_fini(struct net *net, int af) +void xt_proto_fini(struct net *net, u_int8_t af) { #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; @@ -998,7 +1027,7 @@ static int __net_init xt_net_init(struct net *net) { int i; - for (i = 0; i < NPROTO; i++) + for (i = 0; i < NFPROTO_NUMPROTO; i++) INIT_LIST_HEAD(&net->xt.tables[i]); return 0; } @@ -1011,11 +1040,11 @@ static int __init xt_init(void) { int i, rv; - xt = kmalloc(sizeof(struct xt_af) * NPROTO, GFP_KERNEL); + xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL); if (!xt) return -ENOMEM; - for (i = 0; i < NPROTO; i++) { + for (i = 0; i < NFPROTO_NUMPROTO; i++) { mutex_init(&xt[i].mutex); #ifdef CONFIG_COMPAT mutex_init(&xt[i].compat_mutex); diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c index 77a52bf83225..011bc80dd2a1 100644 --- a/net/netfilter/xt_CLASSIFY.c +++ b/net/netfilter/xt_CLASSIFY.c @@ -27,50 +27,34 @@ MODULE_ALIAS("ipt_CLASSIFY"); MODULE_ALIAS("ip6t_CLASSIFY"); static unsigned int -classify_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +classify_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_classify_target_info *clinfo = targinfo; + const struct xt_classify_target_info *clinfo = par->targinfo; skb->priority = clinfo->priority; return XT_CONTINUE; } -static struct xt_target classify_tg_reg[] __read_mostly = { - { - .family = AF_INET, - .name = "CLASSIFY", - .target = classify_tg, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "CLASSIFY", - .family = AF_INET6, - .target = classify_tg, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, +static struct xt_target classify_tg_reg __read_mostly = { + .name = "CLASSIFY", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "mangle", + .hooks = (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_POST_ROUTING), + .target = classify_tg, + .targetsize = sizeof(struct xt_classify_target_info), + .me = THIS_MODULE, }; static int __init classify_tg_init(void) { - return xt_register_targets(classify_tg_reg, - ARRAY_SIZE(classify_tg_reg)); + return xt_register_target(&classify_tg_reg); } static void __exit classify_tg_exit(void) { - xt_unregister_targets(classify_tg_reg, ARRAY_SIZE(classify_tg_reg)); + xt_unregister_target(&classify_tg_reg); } module_init(classify_tg_init); diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c index 5fecfb4794b1..d6e5ab463277 100644 --- a/net/netfilter/xt_CONNMARK.c +++ b/net/netfilter/xt_CONNMARK.c @@ -36,11 +36,9 @@ MODULE_ALIAS("ip6t_CONNMARK"); #include <net/netfilter/nf_conntrack_ecache.h> static unsigned int -connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connmark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connmark_target_info *markinfo = targinfo; + const struct xt_connmark_target_info *markinfo = par->targinfo; struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int32_t diff; @@ -54,7 +52,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~markinfo->mask) | markinfo->mark; if (newmark != ct->mark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -62,7 +60,7 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, (skb->mark & markinfo->mask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -77,11 +75,9 @@ connmark_tg_v0(struct sk_buff *skb, const struct net_device *in, } static unsigned int -connmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connmark_tginfo1 *info = targinfo; + const struct xt_connmark_tginfo1 *info = par->targinfo; enum ip_conntrack_info ctinfo; struct nf_conn *ct; u_int32_t newmark; @@ -95,7 +91,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, newmark = (ct->mark & ~info->ctmask) ^ info->ctmark; if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_SAVE: @@ -103,7 +99,7 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, (skb->mark & info->nfmask); if (ct->mark != newmark) { ct->mark = newmark; - nf_conntrack_event_cache(IPCT_MARK, skb); + nf_conntrack_event_cache(IPCT_MARK, ct); } break; case XT_CONNMARK_RESTORE: @@ -116,18 +112,15 @@ connmark_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -connmark_tg_check_v0(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connmark_tg_check_v0(const struct xt_tgchk_param *par) { - const struct xt_connmark_target_info *matchinfo = targinfo; + const struct xt_connmark_target_info *matchinfo = par->targinfo; if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(tablename, "mangle") != 0) { + if (strcmp(par->table, "mangle") != 0) { printk(KERN_WARNING "CONNMARK: restore can only be " "called from \"mangle\" table, not \"%s\"\n", - tablename); + par->table); return false; } } @@ -135,31 +128,27 @@ connmark_tg_check_v0(const char *tablename, const void *entry, printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return false; } - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static bool -connmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connmark_tg_check(const struct xt_tgchk_param *par) { - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void connmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -197,7 +186,7 @@ static struct xt_target connmark_tg_reg[] __read_mostly = { { .name = "CONNMARK", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = connmark_tg_check_v0, .destroy = connmark_tg_destroy, .target = connmark_tg_v0, @@ -210,34 +199,9 @@ static struct xt_target connmark_tg_reg[] __read_mostly = { .me = THIS_MODULE }, { - .name = "CONNMARK", - .revision = 0, - .family = AF_INET6, - .checkentry = connmark_tg_check_v0, - .destroy = connmark_tg_destroy, - .target = connmark_tg_v0, - .targetsize = sizeof(struct xt_connmark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_target_info), - .compat_from_user = connmark_tg_compat_from_user_v0, - .compat_to_user = connmark_tg_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "CONNMARK", - .revision = 1, - .family = AF_INET, - .checkentry = connmark_tg_check, - .target = connmark_tg, - .targetsize = sizeof(struct xt_connmark_tginfo1), - .destroy = connmark_tg_destroy, - .me = THIS_MODULE, - }, - { .name = "CONNMARK", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = connmark_tg_check, .target = connmark_tg, .targetsize = sizeof(struct xt_connmark_tginfo1), diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 76ca1f2421eb..b54c3756fdc3 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -43,7 +43,7 @@ static void secmark_save(const struct sk_buff *skb) ct = nf_ct_get(skb, &ctinfo); if (ct && !ct->secmark) { ct->secmark = skb->secmark; - nf_conntrack_event_cache(IPCT_SECMARK, skb); + nf_conntrack_event_cache(IPCT_SECMARK, ct); } } } @@ -65,11 +65,9 @@ static void secmark_restore(struct sk_buff *skb) } static unsigned int -connsecmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = par->targinfo; switch (info->mode) { case CONNSECMARK_SAVE: @@ -87,16 +85,14 @@ connsecmark_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -connsecmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool connsecmark_tg_check(const struct xt_tgchk_param *par) { - const struct xt_connsecmark_target_info *info = targinfo; + const struct xt_connsecmark_target_info *info = par->targinfo; - if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "security") != 0) { printk(KERN_INFO PFX "target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", tablename); + "or \'security\' tables, not \'%s\'.\n", par->table); return false; } @@ -110,51 +106,38 @@ connsecmark_tg_check(const char *tablename, const void *entry, return false; } - if (nf_ct_l3proto_try_module_get(target->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", target->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connsecmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void connsecmark_tg_destroy(const struct xt_tgdtor_param *par) { - nf_ct_l3proto_module_put(target->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_target connsecmark_tg_reg[] __read_mostly = { - { - .name = "CONNSECMARK", - .family = AF_INET, - .checkentry = connsecmark_tg_check, - .destroy = connsecmark_tg_destroy, - .target = connsecmark_tg, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .me = THIS_MODULE, - }, - { - .name = "CONNSECMARK", - .family = AF_INET6, - .checkentry = connsecmark_tg_check, - .destroy = connsecmark_tg_destroy, - .target = connsecmark_tg, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .me = THIS_MODULE, - }, +static struct xt_target connsecmark_tg_reg __read_mostly = { + .name = "CONNSECMARK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connsecmark_tg_check, + .destroy = connsecmark_tg_destroy, + .target = connsecmark_tg, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .me = THIS_MODULE, }; static int __init connsecmark_tg_init(void) { - return xt_register_targets(connsecmark_tg_reg, - ARRAY_SIZE(connsecmark_tg_reg)); + return xt_register_target(&connsecmark_tg_reg); } static void __exit connsecmark_tg_exit(void) { - xt_unregister_targets(connsecmark_tg_reg, - ARRAY_SIZE(connsecmark_tg_reg)); + xt_unregister_target(&connsecmark_tg_reg); } module_init(connsecmark_tg_init); diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 97efd74c04fe..6a347e768f86 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -29,11 +29,9 @@ MODULE_ALIAS("ipt_TOS"); MODULE_ALIAS("ip6t_TOS"); static unsigned int -dscp_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +dscp_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_DSCP_info *dinfo = targinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { @@ -48,11 +46,9 @@ dscp_tg(struct sk_buff *skb, const struct net_device *in, } static unsigned int -dscp_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_DSCP_info *dinfo = targinfo; + const struct xt_DSCP_info *dinfo = par->targinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; if (dscp != dinfo->dscp) { @@ -65,26 +61,21 @@ dscp_tg6(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -dscp_tg_check(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool dscp_tg_check(const struct xt_tgchk_param *par) { - const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; + const struct xt_DSCP_info *info = par->targinfo; - if (dscp > XT_DSCP_MAX) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + if (info->dscp > XT_DSCP_MAX) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", info->dscp); return false; } return true; } static unsigned int -tos_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct ipt_tos_target_info *info = targinfo; + const struct ipt_tos_target_info *info = par->targinfo; struct iphdr *iph = ip_hdr(skb); u_int8_t oldtos; @@ -101,12 +92,10 @@ tos_tg_v0(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static bool -tos_tg_check_v0(const char *tablename, const void *e_void, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tos_tg_check_v0(const struct xt_tgchk_param *par) { - const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; + const struct ipt_tos_target_info *info = par->targinfo; + const uint8_t tos = info->tos; if (tos != IPTOS_LOWDELAY && tos != IPTOS_THROUGHPUT && tos != IPTOS_RELIABILITY && tos != IPTOS_MINCOST && @@ -119,11 +108,9 @@ tos_tg_check_v0(const char *tablename, const void *e_void, } static unsigned int -tos_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_tos_target_info *info = targinfo; + const struct xt_tos_target_info *info = par->targinfo; struct iphdr *iph = ip_hdr(skb); u_int8_t orig, nv; @@ -141,11 +128,9 @@ tos_tg(struct sk_buff *skb, const struct net_device *in, } static unsigned int -tos_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tos_tg6(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_tos_target_info *info = targinfo; + const struct xt_tos_target_info *info = par->targinfo; struct ipv6hdr *iph = ipv6_hdr(skb); u_int8_t orig, nv; @@ -165,7 +150,7 @@ tos_tg6(struct sk_buff *skb, const struct net_device *in, static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "DSCP", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dscp_tg_check, .target = dscp_tg, .targetsize = sizeof(struct xt_DSCP_info), @@ -174,7 +159,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { }, { .name = "DSCP", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dscp_tg_check, .target = dscp_tg6, .targetsize = sizeof(struct xt_DSCP_info), @@ -184,7 +169,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .target = tos_tg_v0, .targetsize = sizeof(struct ipt_tos_target_info), @@ -194,7 +179,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .target = tos_tg, .targetsize = sizeof(struct xt_tos_target_info), @@ -203,7 +188,7 @@ static struct xt_target dscp_tg_reg[] __read_mostly = { { .name = "TOS", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .table = "mangle", .target = tos_tg6, .targetsize = sizeof(struct xt_tos_target_info), diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c index f9ce20b58981..67574bcfb8ac 100644 --- a/net/netfilter/xt_MARK.c +++ b/net/netfilter/xt_MARK.c @@ -25,22 +25,18 @@ MODULE_ALIAS("ipt_MARK"); MODULE_ALIAS("ip6t_MARK"); static unsigned int -mark_tg_v0(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg_v0(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = par->targinfo; skb->mark = markinfo->mark; return XT_CONTINUE; } static unsigned int -mark_tg_v1(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg_v1(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = par->targinfo; int mark = 0; switch (markinfo->mode) { @@ -62,22 +58,17 @@ mark_tg_v1(struct sk_buff *skb, const struct net_device *in, } static unsigned int -mark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +mark_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_mark_tginfo2 *info = targinfo; + const struct xt_mark_tginfo2 *info = par->targinfo; skb->mark = (skb->mark & ~info->mask) ^ info->mark; return XT_CONTINUE; } -static bool -mark_tg_check_v0(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool mark_tg_check_v0(const struct xt_tgchk_param *par) { - const struct xt_mark_target_info *markinfo = targinfo; + const struct xt_mark_target_info *markinfo = par->targinfo; if (markinfo->mark > 0xffffffff) { printk(KERN_WARNING "MARK: Only supports 32bit wide mark\n"); @@ -86,12 +77,9 @@ mark_tg_check_v0(const char *tablename, const void *entry, return true; } -static bool -mark_tg_check_v1(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool mark_tg_check_v1(const struct xt_tgchk_param *par) { - const struct xt_mark_target_info_v1 *markinfo = targinfo; + const struct xt_mark_target_info_v1 *markinfo = par->targinfo; if (markinfo->mode != XT_MARK_SET && markinfo->mode != XT_MARK_AND @@ -161,7 +149,7 @@ static int mark_tg_compat_to_user_v1(void __user *dst, void *src) static struct xt_target mark_tg_reg[] __read_mostly = { { .name = "MARK", - .family = AF_INET, + .family = NFPROTO_UNSPEC, .revision = 0, .checkentry = mark_tg_check_v0, .target = mark_tg_v0, @@ -176,7 +164,7 @@ static struct xt_target mark_tg_reg[] __read_mostly = { }, { .name = "MARK", - .family = AF_INET, + .family = NFPROTO_UNSPEC, .revision = 1, .checkentry = mark_tg_check_v1, .target = mark_tg_v1, @@ -190,47 +178,9 @@ static struct xt_target mark_tg_reg[] __read_mostly = { .me = THIS_MODULE, }, { - .name = "MARK", - .family = AF_INET6, - .revision = 0, - .checkentry = mark_tg_check_v0, - .target = mark_tg_v0, - .targetsize = sizeof(struct xt_mark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info), - .compat_from_user = mark_tg_compat_from_user_v0, - .compat_to_user = mark_tg_compat_to_user_v0, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .family = AF_INET6, - .revision = 1, - .checkentry = mark_tg_check_v1, - .target = mark_tg_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info_v1), - .compat_from_user = mark_tg_compat_from_user_v1, - .compat_to_user = mark_tg_compat_to_user_v1, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .revision = 2, - .family = AF_INET, - .target = mark_tg, - .targetsize = sizeof(struct xt_mark_tginfo2), - .me = THIS_MODULE, - }, - { .name = "MARK", .revision = 2, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .target = mark_tg, .targetsize = sizeof(struct xt_mark_tginfo2), .me = THIS_MODULE, diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c index 19ae8efae655..50e3a52d3b31 100644 --- a/net/netfilter/xt_NFLOG.c +++ b/net/netfilter/xt_NFLOG.c @@ -21,11 +21,9 @@ MODULE_ALIAS("ipt_NFLOG"); MODULE_ALIAS("ip6t_NFLOG"); static unsigned int -nflog_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +nflog_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_nflog_info *info = targinfo; + const struct xt_nflog_info *info = par->targinfo; struct nf_loginfo li; li.type = NF_LOG_TYPE_ULOG; @@ -33,17 +31,14 @@ nflog_tg(struct sk_buff *skb, const struct net_device *in, li.u.ulog.group = info->group; li.u.ulog.qthreshold = info->threshold; - nf_log_packet(target->family, hooknum, skb, in, out, &li, - "%s", info->prefix); + nf_log_packet(par->family, par->hooknum, skb, par->in, + par->out, &li, "%s", info->prefix); return XT_CONTINUE; } -static bool -nflog_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targetinfo, - unsigned int hookmask) +static bool nflog_tg_check(const struct xt_tgchk_param *par) { - const struct xt_nflog_info *info = targetinfo; + const struct xt_nflog_info *info = par->targinfo; if (info->flags & ~XT_NFLOG_MASK) return false; @@ -52,33 +47,24 @@ nflog_tg_check(const char *tablename, const void *entry, return true; } -static struct xt_target nflog_tg_reg[] __read_mostly = { - { - .name = "NFLOG", - .family = AF_INET, - .checkentry = nflog_tg_check, - .target = nflog_tg, - .targetsize = sizeof(struct xt_nflog_info), - .me = THIS_MODULE, - }, - { - .name = "NFLOG", - .family = AF_INET6, - .checkentry = nflog_tg_check, - .target = nflog_tg, - .targetsize = sizeof(struct xt_nflog_info), - .me = THIS_MODULE, - }, +static struct xt_target nflog_tg_reg __read_mostly = { + .name = "NFLOG", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = nflog_tg_check, + .target = nflog_tg, + .targetsize = sizeof(struct xt_nflog_info), + .me = THIS_MODULE, }; static int __init nflog_tg_init(void) { - return xt_register_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg)); + return xt_register_target(&nflog_tg_reg); } static void __exit nflog_tg_exit(void) { - xt_unregister_targets(nflog_tg_reg, ARRAY_SIZE(nflog_tg_reg)); + xt_unregister_target(&nflog_tg_reg); } module_init(nflog_tg_init); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index beb24d19a56f..f9977b3311f7 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -24,11 +24,9 @@ MODULE_ALIAS("ip6t_NFQUEUE"); MODULE_ALIAS("arpt_NFQUEUE"); static unsigned int -nfqueue_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_NFQ_info *tinfo = targinfo; + const struct xt_NFQ_info *tinfo = par->targinfo; return NF_QUEUE_NR(tinfo->queuenum); } @@ -36,21 +34,21 @@ nfqueue_tg(struct sk_buff *skb, const struct net_device *in, static struct xt_target nfqueue_tg_reg[] __read_mostly = { { .name = "NFQUEUE", - .family = AF_INET, + .family = NFPROTO_IPV4, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", - .family = AF_INET6, + .family = NFPROTO_IPV6, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, }, { .name = "NFQUEUE", - .family = NF_ARP, + .family = NFPROTO_ARP, .target = nfqueue_tg, .targetsize = sizeof(struct xt_NFQ_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c index 6c9de611eb8d..e7a0a54fd4ea 100644 --- a/net/netfilter/xt_NOTRACK.c +++ b/net/netfilter/xt_NOTRACK.c @@ -13,9 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK"); MODULE_ALIAS("ip6t_NOTRACK"); static unsigned int -notrack_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +notrack_tg(struct sk_buff *skb, const struct xt_target_param *par) { /* Previously seen (loopback)? Ignore. */ if (skb->nfct != NULL) @@ -32,31 +30,23 @@ notrack_tg(struct sk_buff *skb, const struct net_device *in, return XT_CONTINUE; } -static struct xt_target notrack_tg_reg[] __read_mostly = { - { - .name = "NOTRACK", - .family = AF_INET, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, - }, - { - .name = "NOTRACK", - .family = AF_INET6, - .target = notrack_tg, - .table = "raw", - .me = THIS_MODULE, - }, +static struct xt_target notrack_tg_reg __read_mostly = { + .name = "NOTRACK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = notrack_tg, + .table = "raw", + .me = THIS_MODULE, }; static int __init notrack_tg_init(void) { - return xt_register_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg)); + return xt_register_target(¬rack_tg_reg); } static void __exit notrack_tg_exit(void) { - xt_unregister_targets(notrack_tg_reg, ARRAY_SIZE(notrack_tg_reg)); + xt_unregister_target(¬rack_tg_reg); } module_init(notrack_tg_init); diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 64d6ad380293..43f5676b1af4 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -71,14 +71,9 @@ void xt_rateest_put(struct xt_rateest *est) EXPORT_SYMBOL_GPL(xt_rateest_put); static unsigned int -xt_rateest_tg(struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) +xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par) { - const struct xt_rateest_target_info *info = targinfo; + const struct xt_rateest_target_info *info = par->targinfo; struct gnet_stats_basic *stats = &info->est->bstats; spin_lock_bh(&info->est->lock); @@ -89,14 +84,9 @@ xt_rateest_tg(struct sk_buff *skb, return XT_CONTINUE; } -static bool -xt_rateest_tg_checkentry(const char *tablename, - const void *entry, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) +static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) { - struct xt_rateest_target_info *info = targinfo; + struct xt_rateest_target_info *info = par->targinfo; struct xt_rateest *est; struct { struct nlattr opt; @@ -149,33 +139,22 @@ err1: return false; } -static void xt_rateest_tg_destroy(const struct xt_target *target, - void *targinfo) +static void xt_rateest_tg_destroy(const struct xt_tgdtor_param *par) { - struct xt_rateest_target_info *info = targinfo; + struct xt_rateest_target_info *info = par->targinfo; xt_rateest_put(info->est); } -static struct xt_target xt_rateest_target[] __read_mostly = { - { - .family = AF_INET, - .name = "RATEEST", - .target = xt_rateest_tg, - .checkentry = xt_rateest_tg_checkentry, - .destroy = xt_rateest_tg_destroy, - .targetsize = sizeof(struct xt_rateest_target_info), - .me = THIS_MODULE, - }, - { - .family = AF_INET6, - .name = "RATEEST", - .target = xt_rateest_tg, - .checkentry = xt_rateest_tg_checkentry, - .destroy = xt_rateest_tg_destroy, - .targetsize = sizeof(struct xt_rateest_target_info), - .me = THIS_MODULE, - }, +static struct xt_target xt_rateest_tg_reg __read_mostly = { + .name = "RATEEST", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = xt_rateest_tg, + .checkentry = xt_rateest_tg_checkentry, + .destroy = xt_rateest_tg_destroy, + .targetsize = sizeof(struct xt_rateest_target_info), + .me = THIS_MODULE, }; static int __init xt_rateest_tg_init(void) @@ -186,13 +165,12 @@ static int __init xt_rateest_tg_init(void) INIT_HLIST_HEAD(&rateest_hash[i]); get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); - return xt_register_targets(xt_rateest_target, - ARRAY_SIZE(xt_rateest_target)); + return xt_register_target(&xt_rateest_tg_reg); } static void __exit xt_rateest_tg_fini(void) { - xt_unregister_targets(xt_rateest_target, ARRAY_SIZE(xt_rateest_target)); + xt_unregister_target(&xt_rateest_tg_reg); } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 94f87ee7552b..7a6f9e6f5dfa 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -29,12 +29,10 @@ MODULE_ALIAS("ip6t_SECMARK"); static u8 mode; static unsigned int -secmark_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +secmark_tg(struct sk_buff *skb, const struct xt_target_param *par) { u32 secmark = 0; - const struct xt_secmark_target_info *info = targinfo; + const struct xt_secmark_target_info *info = par->targinfo; BUG_ON(info->mode != mode); @@ -82,16 +80,14 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info) return true; } -static bool -secmark_tg_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool secmark_tg_check(const struct xt_tgchk_param *par) { - struct xt_secmark_target_info *info = targinfo; + struct xt_secmark_target_info *info = par->targinfo; - if (strcmp(tablename, "mangle") && strcmp(tablename, "security")) { + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "security") != 0) { printk(KERN_INFO PFX "target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", tablename); + "or \'security\' tables, not \'%s\'.\n", par->table); return false; } @@ -117,7 +113,7 @@ secmark_tg_check(const char *tablename, const void *entry, return true; } -static void secmark_tg_destroy(const struct xt_target *target, void *targinfo) +static void secmark_tg_destroy(const struct xt_tgdtor_param *par) { switch (mode) { case SECMARK_MODE_SEL: @@ -125,35 +121,25 @@ static void secmark_tg_destroy(const struct xt_target *target, void *targinfo) } } -static struct xt_target secmark_tg_reg[] __read_mostly = { - { - .name = "SECMARK", - .family = AF_INET, - .checkentry = secmark_tg_check, - .destroy = secmark_tg_destroy, - .target = secmark_tg, - .targetsize = sizeof(struct xt_secmark_target_info), - .me = THIS_MODULE, - }, - { - .name = "SECMARK", - .family = AF_INET6, - .checkentry = secmark_tg_check, - .destroy = secmark_tg_destroy, - .target = secmark_tg, - .targetsize = sizeof(struct xt_secmark_target_info), - .me = THIS_MODULE, - }, +static struct xt_target secmark_tg_reg __read_mostly = { + .name = "SECMARK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = secmark_tg_check, + .destroy = secmark_tg_destroy, + .target = secmark_tg, + .targetsize = sizeof(struct xt_secmark_target_info), + .me = THIS_MODULE, }; static int __init secmark_tg_init(void) { - return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); + return xt_register_target(&secmark_tg_reg); } static void __exit secmark_tg_exit(void) { - xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg)); + xt_unregister_target(&secmark_tg_reg); } module_init(secmark_tg_init); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index beb5094703cb..4f3b1f808795 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -174,15 +174,13 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb, } static unsigned int -tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par) { struct iphdr *iph = ip_hdr(skb); __be16 newlen; int ret; - ret = tcpmss_mangle_packet(skb, targinfo, + ret = tcpmss_mangle_packet(skb, par->targinfo, tcpmss_reverse_mtu(skb, PF_INET), iph->ihl * 4, sizeof(*iph) + sizeof(struct tcphdr)); @@ -199,9 +197,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct net_device *in, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) static unsigned int -tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); u8 nexthdr; @@ -212,7 +208,7 @@ tcpmss_tg6(struct sk_buff *skb, const struct net_device *in, tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); if (tcphoff < 0) return NF_DROP; - ret = tcpmss_mangle_packet(skb, targinfo, + ret = tcpmss_mangle_packet(skb, par->targinfo, tcpmss_reverse_mtu(skb, PF_INET6), tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); @@ -241,16 +237,13 @@ static inline bool find_syn_match(const struct xt_entry_match *m) return false; } -static bool -tcpmss_tg4_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tcpmss_tg4_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = targinfo; - const struct ipt_entry *e = entry; + const struct xt_tcpmss_info *info = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_INET_FORWARD) | + (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " @@ -264,16 +257,13 @@ tcpmss_tg4_check(const char *tablename, const void *entry, } #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) -static bool -tcpmss_tg6_check(const char *tablename, const void *entry, - const struct xt_target *target, void *targinfo, - unsigned int hook_mask) +static bool tcpmss_tg6_check(const struct xt_tgchk_param *par) { - const struct xt_tcpmss_info *info = targinfo; - const struct ip6t_entry *e = entry; + const struct xt_tcpmss_info *info = par->targinfo; + const struct ip6t_entry *e = par->entryinfo; if (info->mss == XT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_INET_FORWARD) | + (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { printk("xt_TCPMSS: path-MTU clamping only supported in " @@ -289,7 +279,7 @@ tcpmss_tg6_check(const char *tablename, const void *entry, static struct xt_target tcpmss_tg_reg[] __read_mostly = { { - .family = AF_INET, + .family = NFPROTO_IPV4, .name = "TCPMSS", .checkentry = tcpmss_tg4_check, .target = tcpmss_tg4, @@ -299,7 +289,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = { }, #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { - .family = AF_INET6, + .family = NFPROTO_IPV6, .name = "TCPMSS", .checkentry = tcpmss_tg6_check, .target = tcpmss_tg6, diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9685b6fcbc81..9dd8c8ef63eb 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -75,19 +75,15 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb, } static unsigned int -tcpoptstrip_tg4(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par) { - return tcpoptstrip_mangle_packet(skb, targinfo, ip_hdrlen(skb), + return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb), sizeof(struct iphdr) + sizeof(struct tcphdr)); } #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) static unsigned int -tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par) { struct ipv6hdr *ipv6h = ipv6_hdr(skb); int tcphoff; @@ -98,7 +94,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, if (tcphoff < 0) return NF_DROP; - return tcpoptstrip_mangle_packet(skb, targinfo, tcphoff, + return tcpoptstrip_mangle_packet(skb, par->targinfo, tcphoff, sizeof(*ipv6h) + sizeof(struct tcphdr)); } #endif @@ -106,7 +102,7 @@ tcpoptstrip_tg6(struct sk_buff *skb, const struct net_device *in, static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { { .name = "TCPOPTSTRIP", - .family = AF_INET, + .family = NFPROTO_IPV4, .table = "mangle", .proto = IPPROTO_TCP, .target = tcpoptstrip_tg4, @@ -116,7 +112,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = { #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) { .name = "TCPOPTSTRIP", - .family = AF_INET6, + .family = NFPROTO_IPV6, .table = "mangle", .proto = IPPROTO_TCP, .target = tcpoptstrip_tg6, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c new file mode 100644 index 000000000000..1340c2fa3621 --- /dev/null +++ b/net/netfilter/xt_TPROXY.c @@ -0,0 +1,102 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <net/checksum.h> +#include <net/udp.h> +#include <net/inet_sock.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/xt_TPROXY.h> + +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <net/netfilter/nf_tproxy_core.h> + +static unsigned int +tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct xt_tproxy_target_info *tgi = par->targinfo; + struct udphdr _hdr, *hp; + struct sock *sk; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) + return NF_DROP; + + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, tgi->lport ? tgi->lport : hp->dest, + par->in, true); + + /* NOTE: assign_sock consumes our sk reference */ + if (sk && nf_tproxy_assign_sock(skb, sk)) { + /* This should be in a separate target, but we don't do multiple + targets on the same rule yet */ + skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; + + pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_ACCEPT; + } + + pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_DROP; +} + +static bool tproxy_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_ip *i = par->entryinfo; + + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) + && !(i->invflags & IPT_INV_PROTO)) + return true; + + pr_info("xt_TPROXY: Can be used only in combination with " + "either -p tcp or -p udp\n"); + return false; +} + +static struct xt_target tproxy_tg_reg __read_mostly = { + .name = "TPROXY", + .family = AF_INET, + .table = "mangle", + .target = tproxy_tg, + .targetsize = sizeof(struct xt_tproxy_target_info), + .checkentry = tproxy_tg_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init tproxy_tg_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_target(&tproxy_tg_reg); +} + +static void __exit tproxy_tg_exit(void) +{ + xt_unregister_target(&tproxy_tg_reg); +} + +module_init(tproxy_tg_init); +module_exit(tproxy_tg_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); +MODULE_ALIAS("ipt_TPROXY"); diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c index 30dab79a3438..fbb04b86c46b 100644 --- a/net/netfilter/xt_TRACE.c +++ b/net/netfilter/xt_TRACE.c @@ -11,39 +11,29 @@ MODULE_ALIAS("ipt_TRACE"); MODULE_ALIAS("ip6t_TRACE"); static unsigned int -trace_tg(struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const struct xt_target *target, const void *targinfo) +trace_tg(struct sk_buff *skb, const struct xt_target_param *par) { skb->nf_trace = 1; return XT_CONTINUE; } -static struct xt_target trace_tg_reg[] __read_mostly = { - { - .name = "TRACE", - .family = AF_INET, - .target = trace_tg, - .table = "raw", - .me = THIS_MODULE, - }, - { - .name = "TRACE", - .family = AF_INET6, - .target = trace_tg, - .table = "raw", - .me = THIS_MODULE, - }, +static struct xt_target trace_tg_reg __read_mostly = { + .name = "TRACE", + .revision = 0, + .family = NFPROTO_UNSPEC, + .table = "raw", + .target = trace_tg, + .me = THIS_MODULE, }; static int __init trace_tg_init(void) { - return xt_register_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); + return xt_register_target(&trace_tg_reg); } static void __exit trace_tg_exit(void) { - xt_unregister_targets(trace_tg_reg, ARRAY_SIZE(trace_tg_reg)); + xt_unregister_target(&trace_tg_reg); } module_init(trace_tg_init); diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c index 89f47364e848..e82179832acd 100644 --- a/net/netfilter/xt_comment.c +++ b/net/netfilter/xt_comment.c @@ -16,40 +16,29 @@ MODULE_ALIAS("ipt_comment"); MODULE_ALIAS("ip6t_comment"); static bool -comment_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protooff, - bool *hotdrop) +comment_mt(const struct sk_buff *skb, const struct xt_match_param *par) { /* We always match */ return true; } -static struct xt_match comment_mt_reg[] __read_mostly = { - { - .name = "comment", - .family = AF_INET, - .match = comment_mt, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, - { - .name = "comment", - .family = AF_INET6, - .match = comment_mt, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, +static struct xt_match comment_mt_reg __read_mostly = { + .name = "comment", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = comment_mt, + .matchsize = sizeof(struct xt_comment_info), + .me = THIS_MODULE, }; static int __init comment_mt_init(void) { - return xt_register_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg)); + return xt_register_match(&comment_mt_reg); } static void __exit comment_mt_exit(void) { - xt_unregister_matches(comment_mt_reg, ARRAY_SIZE(comment_mt_reg)); + xt_unregister_match(&comment_mt_reg); } module_init(comment_mt_init); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 3e39c4fe1931..955e6598a7f0 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -17,12 +17,9 @@ MODULE_ALIAS("ipt_connbytes"); MODULE_ALIAS("ip6t_connbytes"); static bool -connbytes_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connbytes_info *sinfo = matchinfo; + const struct xt_connbytes_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; u_int64_t what = 0; /* initialize to make gcc happy */ @@ -95,12 +92,9 @@ connbytes_mt(const struct sk_buff *skb, const struct net_device *in, return what >= sinfo->count.from; } -static bool -connbytes_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connbytes_mt_check(const struct xt_mtchk_param *par) { - const struct xt_connbytes_info *sinfo = matchinfo; + const struct xt_connbytes_info *sinfo = par->matchinfo; if (sinfo->what != XT_CONNBYTES_PKTS && sinfo->what != XT_CONNBYTES_BYTES && @@ -112,51 +106,39 @@ connbytes_mt_check(const char *tablename, const void *ip, sinfo->direction != XT_CONNBYTES_DIR_BOTH) return false; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connbytes_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connbytes_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_match connbytes_mt_reg[] __read_mostly = { - { - .name = "connbytes", - .family = AF_INET, - .checkentry = connbytes_mt_check, - .match = connbytes_mt, - .destroy = connbytes_mt_destroy, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, - { - .name = "connbytes", - .family = AF_INET6, - .checkentry = connbytes_mt_check, - .match = connbytes_mt, - .destroy = connbytes_mt_destroy, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, +static struct xt_match connbytes_mt_reg __read_mostly = { + .name = "connbytes", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connbytes_mt_check, + .match = connbytes_mt, + .destroy = connbytes_mt_destroy, + .matchsize = sizeof(struct xt_connbytes_info), + .me = THIS_MODULE, }; static int __init connbytes_mt_init(void) { - return xt_register_matches(connbytes_mt_reg, - ARRAY_SIZE(connbytes_mt_reg)); + return xt_register_match(&connbytes_mt_reg); } static void __exit connbytes_mt_exit(void) { - xt_unregister_matches(connbytes_mt_reg, ARRAY_SIZE(connbytes_mt_reg)); + xt_unregister_match(&connbytes_mt_reg); } module_init(connbytes_mt_init); diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 70907f6baac3..7f404cc64c83 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -82,9 +82,9 @@ static inline bool already_closed(const struct nf_conn *conn) static inline unsigned int same_source_net(const union nf_inet_addr *addr, const union nf_inet_addr *mask, - const union nf_inet_addr *u3, unsigned int family) + const union nf_inet_addr *u3, u_int8_t family) { - if (family == AF_INET) { + if (family == NFPROTO_IPV4) { return (addr->ip & mask->ip) == (u3->ip & mask->ip); } else { union nf_inet_addr lh, rh; @@ -114,7 +114,7 @@ static int count_them(struct xt_connlimit_data *data, int matches = 0; - if (match->family == AF_INET6) + if (match->family == NFPROTO_IPV6) hash = &data->iphash[connlimit_iphash6(addr, mask)]; else hash = &data->iphash[connlimit_iphash(addr->ip & mask->ip)]; @@ -123,7 +123,7 @@ static int count_them(struct xt_connlimit_data *data, /* check the saved connections */ list_for_each_entry_safe(conn, tmp, hash, list) { - found = __nf_conntrack_find(&conn->tuple); + found = __nf_conntrack_find(&init_net, &conn->tuple); found_ct = NULL; if (found != NULL) @@ -178,12 +178,9 @@ static int count_them(struct xt_connlimit_data *data, } static bool -connlimit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connlimit_info *info = matchinfo; + const struct xt_connlimit_info *info = par->matchinfo; union nf_inet_addr addr; struct nf_conntrack_tuple tuple; const struct nf_conntrack_tuple *tuple_ptr = &tuple; @@ -195,10 +192,10 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in, if (ct != NULL) tuple_ptr = &ct->tuplehash[0].tuple; else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), - match->family, &tuple)) + par->family, &tuple)) goto hotdrop; - if (match->family == AF_INET6) { + if (par->family == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); memcpy(&addr.ip6, &iph->saddr, sizeof(iph->saddr)); } else { @@ -208,40 +205,37 @@ connlimit_mt(const struct sk_buff *skb, const struct net_device *in, spin_lock_bh(&info->data->lock); connections = count_them(info->data, tuple_ptr, &addr, - &info->mask, match); + &info->mask, par->match); spin_unlock_bh(&info->data->lock); if (connections < 0) { /* kmalloc failed, drop it entirely */ - *hotdrop = true; + *par->hotdrop = true; return false; } return (connections > info->limit) ^ info->inverse; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } -static bool -connlimit_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connlimit_mt_check(const struct xt_mtchk_param *par) { - struct xt_connlimit_info *info = matchinfo; + struct xt_connlimit_info *info = par->matchinfo; unsigned int i; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "address family %u\n", match->family); + "address family %u\n", par->family); return false; } /* init private data */ info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL); if (info->data == NULL) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); return false; } @@ -252,16 +246,15 @@ connlimit_mt_check(const char *tablename, const void *ip, return true; } -static void -connlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_connlimit_info *info = matchinfo; + const struct xt_connlimit_info *info = par->matchinfo; struct xt_connlimit_conn *conn; struct xt_connlimit_conn *tmp; struct list_head *hash = info->data->iphash; unsigned int i; - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) { list_for_each_entry_safe(conn, tmp, &hash[i], list) { @@ -273,41 +266,30 @@ connlimit_mt_destroy(const struct xt_match *match, void *matchinfo) kfree(info->data); } -static struct xt_match connlimit_mt_reg[] __read_mostly = { - { - .name = "connlimit", - .family = AF_INET, - .checkentry = connlimit_mt_check, - .match = connlimit_mt, - .matchsize = sizeof(struct xt_connlimit_info), - .destroy = connlimit_mt_destroy, - .me = THIS_MODULE, - }, - { - .name = "connlimit", - .family = AF_INET6, - .checkentry = connlimit_mt_check, - .match = connlimit_mt, - .matchsize = sizeof(struct xt_connlimit_info), - .destroy = connlimit_mt_destroy, - .me = THIS_MODULE, - }, +static struct xt_match connlimit_mt_reg __read_mostly = { + .name = "connlimit", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = connlimit_mt_check, + .match = connlimit_mt, + .matchsize = sizeof(struct xt_connlimit_info), + .destroy = connlimit_mt_destroy, + .me = THIS_MODULE, }; static int __init connlimit_mt_init(void) { - return xt_register_matches(connlimit_mt_reg, - ARRAY_SIZE(connlimit_mt_reg)); + return xt_register_match(&connlimit_mt_reg); } static void __exit connlimit_mt_exit(void) { - xt_unregister_matches(connlimit_mt_reg, ARRAY_SIZE(connlimit_mt_reg)); + xt_unregister_match(&connlimit_mt_reg); } module_init(connlimit_mt_init); module_exit(connlimit_mt_exit); -MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>"); MODULE_DESCRIPTION("Xtables: Number of connections matching"); MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_connlimit"); diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index aaa1b96691f9..86cacab7a4a3 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -34,12 +34,9 @@ MODULE_ALIAS("ipt_connmark"); MODULE_ALIAS("ip6t_connmark"); static bool -connmark_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connmark_mtinfo1 *info = matchinfo; + const struct xt_connmark_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; @@ -51,12 +48,9 @@ connmark_mt(const struct sk_buff *skb, const struct net_device *in, } static bool -connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +connmark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_connmark_info *info = matchinfo; + const struct xt_connmark_info *info = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -67,42 +61,35 @@ connmark_mt_v0(const struct sk_buff *skb, const struct net_device *in, return ((ct->mark & info->mask) == info->mark) ^ info->invert; } -static bool -connmark_mt_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connmark_mt_check_v0(const struct xt_mtchk_param *par) { - const struct xt_connmark_info *cm = matchinfo; + const struct xt_connmark_info *cm = par->matchinfo; if (cm->mark > 0xffffffff || cm->mask > 0xffffffff) { printk(KERN_WARNING "connmark: only support 32bit mark\n"); return false; } - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static bool -connmark_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool connmark_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "cannot load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -connmark_mt_destroy(const struct xt_match *match, void *matchinfo) +static void connmark_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -140,7 +127,7 @@ static struct xt_match connmark_mt_reg[] __read_mostly = { { .name = "connmark", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = connmark_mt_check_v0, .match = connmark_mt_v0, .destroy = connmark_mt_destroy, @@ -153,34 +140,9 @@ static struct xt_match connmark_mt_reg[] __read_mostly = { .me = THIS_MODULE }, { - .name = "connmark", - .revision = 0, - .family = AF_INET6, - .checkentry = connmark_mt_check_v0, - .match = connmark_mt_v0, - .destroy = connmark_mt_destroy, - .matchsize = sizeof(struct xt_connmark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_info), - .compat_from_user = connmark_mt_compat_from_user_v0, - .compat_to_user = connmark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE - }, - { - .name = "connmark", - .revision = 1, - .family = AF_INET, - .checkentry = connmark_mt_check, - .match = connmark_mt, - .matchsize = sizeof(struct xt_connmark_mtinfo1), - .destroy = connmark_mt_destroy, - .me = THIS_MODULE, - }, - { .name = "connmark", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = connmark_mt_check, .match = connmark_mt, .matchsize = sizeof(struct xt_connmark_mtinfo1), diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index d61412f58ef7..0b7139f3dd78 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_conntrack"); MODULE_ALIAS("ip6t_conntrack"); static bool -conntrack_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +conntrack_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_info *sinfo = matchinfo; + const struct xt_conntrack_info *sinfo = par->matchinfo; const struct nf_conn *ct; enum ip_conntrack_info ctinfo; unsigned int statebit; @@ -121,9 +118,9 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, const union nf_inet_addr *uaddr, const union nf_inet_addr *umask, unsigned int l3proto) { - if (l3proto == AF_INET) + if (l3proto == NFPROTO_IPV4) return ((kaddr->ip ^ uaddr->ip) & umask->ip) == 0; - else if (l3proto == AF_INET6) + else if (l3proto == NFPROTO_IPV6) return ipv6_masked_addr_cmp(&kaddr->in6, &umask->in6, &uaddr->in6) == 0; else @@ -133,7 +130,7 @@ conntrack_addrcmp(const union nf_inet_addr *kaddr, static inline bool conntrack_mt_origsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &info->origsrc_addr, &info->origsrc_mask, family); @@ -142,7 +139,7 @@ conntrack_mt_origsrc(const struct nf_conn *ct, static inline bool conntrack_mt_origdst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3, &info->origdst_addr, &info->origdst_mask, family); @@ -151,7 +148,7 @@ conntrack_mt_origdst(const struct nf_conn *ct, static inline bool conntrack_mt_replsrc(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3, &info->replsrc_addr, &info->replsrc_mask, family); @@ -160,7 +157,7 @@ conntrack_mt_replsrc(const struct nf_conn *ct, static inline bool conntrack_mt_repldst(const struct nf_conn *ct, const struct xt_conntrack_mtinfo1 *info, - unsigned int family) + u_int8_t family) { return conntrack_addrcmp(&ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3, &info->repldst_addr, &info->repldst_mask, family); @@ -205,12 +202,9 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo1 *info, } static bool -conntrack_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_conntrack_mtinfo1 *info = matchinfo; + const struct xt_conntrack_mtinfo1 *info = par->matchinfo; enum ip_conntrack_info ctinfo; const struct nf_conn *ct; unsigned int statebit; @@ -244,22 +238,22 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, return false; if (info->match_flags & XT_CONNTRACK_ORIGSRC) - if (conntrack_mt_origsrc(ct, info, match->family) ^ + if (conntrack_mt_origsrc(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_ORIGSRC)) return false; if (info->match_flags & XT_CONNTRACK_ORIGDST) - if (conntrack_mt_origdst(ct, info, match->family) ^ + if (conntrack_mt_origdst(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_ORIGDST)) return false; if (info->match_flags & XT_CONNTRACK_REPLSRC) - if (conntrack_mt_replsrc(ct, info, match->family) ^ + if (conntrack_mt_replsrc(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_REPLSRC)) return false; if (info->match_flags & XT_CONNTRACK_REPLDST) - if (conntrack_mt_repldst(ct, info, match->family) ^ + if (conntrack_mt_repldst(ct, info, par->family) ^ !(info->invert_flags & XT_CONNTRACK_REPLDST)) return false; @@ -284,23 +278,19 @@ conntrack_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -conntrack_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool conntrack_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } return true; } -static void -conntrack_mt_destroy(const struct xt_match *match, void *matchinfo) +static void conntrack_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } #ifdef CONFIG_COMPAT @@ -356,7 +346,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = conntrack_mt_v0, .checkentry = conntrack_mt_check, .destroy = conntrack_mt_destroy, @@ -371,17 +361,7 @@ static struct xt_match conntrack_mt_reg[] __read_mostly = { { .name = "conntrack", .revision = 1, - .family = AF_INET, - .matchsize = sizeof(struct xt_conntrack_mtinfo1), - .match = conntrack_mt, - .checkentry = conntrack_mt_check, - .destroy = conntrack_mt_destroy, - .me = THIS_MODULE, - }, - { - .name = "conntrack", - .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .matchsize = sizeof(struct xt_conntrack_mtinfo1), .match = conntrack_mt, .checkentry = conntrack_mt_check, diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 8b6522186d9f..e5d3e8673287 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -93,20 +93,18 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff, } static bool -dccp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dccp_info *info = matchinfo; + const struct xt_dccp_info *info = par->matchinfo; const struct dccp_hdr *dh; struct dccp_hdr _dh; - if (offset) + if (par->fragoff != 0) return false; - dh = skb_header_pointer(skb, protoff, sizeof(_dh), &_dh); + dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh); if (dh == NULL) { - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -118,17 +116,14 @@ dccp_mt(const struct sk_buff *skb, const struct net_device *in, XT_DCCP_DEST_PORTS, info->flags, info->invflags) && DCCHECK(match_types(dh, info->typemask), XT_DCCP_TYPE, info->flags, info->invflags) - && DCCHECK(match_option(info->option, skb, protoff, dh, - hotdrop), + && DCCHECK(match_option(info->option, skb, par->thoff, dh, + par->hotdrop), XT_DCCP_OPTION, info->flags, info->invflags); } -static bool -dccp_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool dccp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_dccp_info *info = matchinfo; + const struct xt_dccp_info *info = par->matchinfo; return !(info->flags & ~XT_DCCP_VALID_FLAGS) && !(info->invflags & ~XT_DCCP_VALID_FLAGS) @@ -138,7 +133,7 @@ dccp_mt_check(const char *tablename, const void *inf, static struct xt_match dccp_mt_reg[] __read_mostly = { { .name = "dccp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), @@ -147,7 +142,7 @@ static struct xt_match dccp_mt_reg[] __read_mostly = { }, { .name = "dccp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dccp_mt_check, .match = dccp_mt, .matchsize = sizeof(struct xt_dccp_info), diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 26f4aab9c429..c3f8085460d7 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -26,61 +26,48 @@ MODULE_ALIAS("ipt_tos"); MODULE_ALIAS("ip6t_tos"); static bool -dscp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dscp_info *info = matchinfo; + const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } static bool -dscp_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_dscp_info *info = matchinfo; + const struct xt_dscp_info *info = par->matchinfo; u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT; return (dscp == info->dscp) ^ !!info->invert; } -static bool -dscp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool dscp_mt_check(const struct xt_mtchk_param *par) { - const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; + const struct xt_dscp_info *info = par->matchinfo; - if (dscp > XT_DSCP_MAX) { - printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); + if (info->dscp > XT_DSCP_MAX) { + printk(KERN_ERR "xt_dscp: dscp %x out of range\n", info->dscp); return false; } return true; } -static bool tos_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, const void *matchinfo, - int offset, unsigned int protoff, bool *hotdrop) +static bool +tos_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_tos_info *info = matchinfo; + const struct ipt_tos_info *info = par->matchinfo; return (ip_hdr(skb)->tos == info->tos) ^ info->invert; } -static bool tos_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_tos_match_info *info = matchinfo; + const struct xt_tos_match_info *info = par->matchinfo; - if (match->family == AF_INET) + if (par->match->family == NFPROTO_IPV4) return ((ip_hdr(skb)->tos & info->tos_mask) == info->tos_value) ^ !!info->invert; else @@ -91,7 +78,7 @@ static bool tos_mt(const struct sk_buff *skb, const struct net_device *in, static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "dscp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = dscp_mt_check, .match = dscp_mt, .matchsize = sizeof(struct xt_dscp_info), @@ -99,7 +86,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { }, { .name = "dscp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = dscp_mt_check, .match = dscp_mt6, .matchsize = sizeof(struct xt_dscp_info), @@ -108,7 +95,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tos_mt_v0, .matchsize = sizeof(struct ipt_tos_info), .me = THIS_MODULE, @@ -116,7 +103,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, @@ -124,7 +111,7 @@ static struct xt_match dscp_mt_reg[] __read_mostly = { { .name = "tos", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = tos_mt, .matchsize = sizeof(struct xt_tos_match_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c index a133eb9b23e1..609439967c2c 100644 --- a/net/netfilter/xt_esp.c +++ b/net/netfilter/xt_esp.c @@ -42,26 +42,23 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) return r; } -static bool -esp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct ip_esp_hdr *eh; struct ip_esp_hdr _esp; - const struct xt_esp *espinfo = matchinfo; + const struct xt_esp *espinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - eh = skb_header_pointer(skb, protoff, sizeof(_esp), &_esp); + eh = skb_header_pointer(skb, par->thoff, sizeof(_esp), &_esp); if (eh == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ESP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -69,13 +66,9 @@ esp_mt(const struct sk_buff *skb, const struct net_device *in, !!(espinfo->invflags & XT_ESP_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static bool -esp_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool esp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_esp *espinfo = matchinfo; + const struct xt_esp *espinfo = par->matchinfo; if (espinfo->invflags & ~XT_ESP_INV_MASK) { duprintf("xt_esp: unknown flags %X\n", espinfo->invflags); @@ -88,7 +81,7 @@ esp_mt_check(const char *tablename, const void *ip_void, static struct xt_match esp_mt_reg[] __read_mostly = { { .name = "esp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), @@ -97,7 +90,7 @@ static struct xt_match esp_mt_reg[] __read_mostly = { }, { .name = "esp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = esp_mt_check, .match = esp_mt, .matchsize = sizeof(struct xt_esp), diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index d9418a267812..6fc4292d46e6 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -80,7 +80,7 @@ struct dsthash_ent { struct xt_hashlimit_htable { struct hlist_node node; /* global list of all htables */ atomic_t use; - int family; + u_int8_t family; struct hashlimit_cfg1 cfg; /* config */ @@ -185,7 +185,7 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(unsigned long htlong); -static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) +static int htable_create_v0(struct xt_hashlimit_info *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -218,7 +218,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->cfg.gc_interval = minfo->cfg.gc_interval; hinfo->cfg.expire = minfo->cfg.expire; - if (family == AF_INET) + if (family == NFPROTO_IPV4) hinfo->cfg.srcmask = hinfo->cfg.dstmask = 32; else hinfo->cfg.srcmask = hinfo->cfg.dstmask = 128; @@ -237,11 +237,10 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) hinfo->family = family; hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = - proc_create_data(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6, - &dl_file_ops, hinfo); + hinfo->pde = proc_create_data(minfo->name, 0, + (family == NFPROTO_IPV4) ? + hashlimit_procdir4 : hashlimit_procdir6, + &dl_file_ops, hinfo); if (!hinfo->pde) { vfree(hinfo); return -1; @@ -258,8 +257,7 @@ static int htable_create_v0(struct xt_hashlimit_info *minfo, int family) return 0; } -static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, - unsigned int family) +static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, u_int8_t family) { struct xt_hashlimit_htable *hinfo; unsigned int size; @@ -301,11 +299,10 @@ static int htable_create(struct xt_hashlimit_mtinfo1 *minfo, hinfo->rnd_initialized = 0; spin_lock_init(&hinfo->lock); - hinfo->pde = - proc_create_data(minfo->name, 0, - family == AF_INET ? hashlimit_procdir4 : - hashlimit_procdir6, - &dl_file_ops, hinfo); + hinfo->pde = proc_create_data(minfo->name, 0, + (family == NFPROTO_IPV4) ? + hashlimit_procdir4 : hashlimit_procdir6, + &dl_file_ops, hinfo); if (hinfo->pde == NULL) { vfree(hinfo); return -1; @@ -371,14 +368,14 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) /* remove proc entry */ remove_proc_entry(hinfo->pde->name, - hinfo->family == AF_INET ? hashlimit_procdir4 : + hinfo->family == NFPROTO_IPV4 ? hashlimit_procdir4 : hashlimit_procdir6); htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } static struct xt_hashlimit_htable *htable_find_get(const char *name, - int family) + u_int8_t family) { struct xt_hashlimit_htable *hinfo; struct hlist_node *pos; @@ -502,7 +499,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, memset(dst, 0, sizeof(*dst)); switch (hinfo->family) { - case AF_INET: + case NFPROTO_IPV4: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) dst->ip.dst = maskl(ip_hdr(skb)->daddr, hinfo->cfg.dstmask); @@ -516,7 +513,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, nexthdr = ip_hdr(skb)->protocol; break; #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - case AF_INET6: + case NFPROTO_IPV6: if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) { memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr, sizeof(dst->ip6.dst)); @@ -566,19 +563,16 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, } static bool -hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { const struct xt_hashlimit_info *r = - ((const struct xt_hashlimit_info *)matchinfo)->u.master; + ((const struct xt_hashlimit_info *)par->matchinfo)->u.master; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; - if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; spin_lock_bh(&hinfo->lock); @@ -616,23 +610,20 @@ hashlimit_mt_v0(const struct sk_buff *skb, const struct net_device *in, return false; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } static bool -hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_hashlimit_mtinfo1 *info = matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; - if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0) + if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; spin_lock_bh(&hinfo->lock); @@ -669,16 +660,13 @@ hashlimit_mt(const struct sk_buff *skb, const struct net_device *in, return info->cfg.mode & XT_HASHLIMIT_INVERT; hotdrop: - *hotdrop = true; + *par->hotdrop = true; return false; } -static bool -hashlimit_mt_check_v0(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) { - struct xt_hashlimit_info *r = matchinfo; + struct xt_hashlimit_info *r = par->matchinfo; /* Check for overflow. */ if (r->cfg.burst == 0 || @@ -707,8 +695,8 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf, * the list of htable's in htable_create(), since then we would * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); - r->hinfo = htable_find_get(r->name, match->family); - if (!r->hinfo && htable_create_v0(r, match->family) != 0) { + r->hinfo = htable_find_get(r->name, par->match->family); + if (!r->hinfo && htable_create_v0(r, par->match->family) != 0) { mutex_unlock(&hlimit_mutex); return false; } @@ -719,12 +707,9 @@ hashlimit_mt_check_v0(const char *tablename, const void *inf, return true; } -static bool -hashlimit_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool hashlimit_mt_check(const struct xt_mtchk_param *par) { - struct xt_hashlimit_mtinfo1 *info = matchinfo; + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; /* Check for overflow. */ if (info->cfg.burst == 0 || @@ -738,7 +723,7 @@ hashlimit_mt_check(const char *tablename, const void *inf, return false; if (info->name[sizeof(info->name)-1] != '\0') return false; - if (match->family == AF_INET) { + if (par->match->family == NFPROTO_IPV4) { if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) return false; } else { @@ -753,8 +738,8 @@ hashlimit_mt_check(const char *tablename, const void *inf, * the list of htable's in htable_create(), since then we would * create duplicate proc files. -HW */ mutex_lock(&hlimit_mutex); - info->hinfo = htable_find_get(info->name, match->family); - if (!info->hinfo && htable_create(info, match->family) != 0) { + info->hinfo = htable_find_get(info->name, par->match->family); + if (!info->hinfo && htable_create(info, par->match->family) != 0) { mutex_unlock(&hlimit_mutex); return false; } @@ -763,17 +748,16 @@ hashlimit_mt_check(const char *tablename, const void *inf, } static void -hashlimit_mt_destroy_v0(const struct xt_match *match, void *matchinfo) +hashlimit_mt_destroy_v0(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_info *r = matchinfo; + const struct xt_hashlimit_info *r = par->matchinfo; htable_put(r->hinfo); } -static void -hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo) +static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_mtinfo1 *info = matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; htable_put(info->hinfo); } @@ -806,7 +790,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT @@ -821,7 +805,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), .checkentry = hashlimit_mt_check, @@ -831,7 +815,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) { .name = "hashlimit", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hashlimit_mt_v0, .matchsize = sizeof(struct xt_hashlimit_info), #ifdef CONFIG_COMPAT @@ -846,7 +830,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { { .name = "hashlimit", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = hashlimit_mt, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), .checkentry = hashlimit_mt_check, @@ -901,14 +885,14 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, int family, +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { /* recalculate to show accurate numbers */ rateinfo_recalc(ent, jiffies); switch (family) { - case AF_INET: + case NFPROTO_IPV4: return seq_printf(s, "%ld %u.%u.%u.%u:%u->" "%u.%u.%u.%u:%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, @@ -919,7 +903,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, int family, ent->rateinfo.credit, ent->rateinfo.credit_cap, ent->rateinfo.cost); #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) - case AF_INET6: + case NFPROTO_IPV6: return seq_printf(s, "%ld " NIP6_FMT ":%u->" NIP6_FMT ":%u %u %u %u\n", (long)(ent->expires - jiffies)/HZ, diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index dada2905d66e..64fc7f277221 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -24,12 +24,9 @@ MODULE_ALIAS("ip6t_helper"); static bool -helper_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +helper_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_helper_info *info = matchinfo; + const struct xt_helper_info *info = par->matchinfo; const struct nf_conn *ct; const struct nf_conn_help *master_help; const struct nf_conntrack_helper *helper; @@ -57,56 +54,43 @@ helper_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -helper_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool helper_mt_check(const struct xt_mtchk_param *par) { - struct xt_helper_info *info = matchinfo; + struct xt_helper_info *info = par->matchinfo; - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->family); return false; } info->name[29] = '\0'; return true; } -static void helper_mt_destroy(const struct xt_match *match, void *matchinfo) +static void helper_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->family); } -static struct xt_match helper_mt_reg[] __read_mostly = { - { - .name = "helper", - .family = AF_INET, - .checkentry = helper_mt_check, - .match = helper_mt, - .destroy = helper_mt_destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, - { - .name = "helper", - .family = AF_INET6, - .checkentry = helper_mt_check, - .match = helper_mt, - .destroy = helper_mt_destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, +static struct xt_match helper_mt_reg __read_mostly = { + .name = "helper", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = helper_mt_check, + .match = helper_mt, + .destroy = helper_mt_destroy, + .matchsize = sizeof(struct xt_helper_info), + .me = THIS_MODULE, }; static int __init helper_mt_init(void) { - return xt_register_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg)); + return xt_register_match(&helper_mt_reg); } static void __exit helper_mt_exit(void) { - xt_unregister_matches(helper_mt_reg, ARRAY_SIZE(helper_mt_reg)); + xt_unregister_match(&helper_mt_reg); } module_init(helper_mt_init); diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c index c63e9333c755..7ac54eab0b00 100644 --- a/net/netfilter/xt_iprange.c +++ b/net/netfilter/xt_iprange.c @@ -17,12 +17,9 @@ #include <linux/netfilter_ipv4/ipt_iprange.h> static bool -iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_iprange_info *info = matchinfo; + const struct ipt_iprange_info *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); if (info->flags & IPRANGE_SRC) { @@ -55,19 +52,16 @@ iprange_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -iprange_mt4(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_iprange_mtinfo *info = matchinfo; + const struct xt_iprange_mtinfo *info = par->matchinfo; const struct iphdr *iph = ip_hdr(skb); bool m; if (info->flags & IPRANGE_SRC) { m = ntohl(iph->saddr) < ntohl(info->src_min.ip); m |= ntohl(iph->saddr) > ntohl(info->src_max.ip); - m ^= info->flags & IPRANGE_SRC_INV; + m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) { pr_debug("src IP " NIPQUAD_FMT " NOT in range %s" NIPQUAD_FMT "-" NIPQUAD_FMT "\n", @@ -81,7 +75,7 @@ iprange_mt4(const struct sk_buff *skb, const struct net_device *in, if (info->flags & IPRANGE_DST) { m = ntohl(iph->daddr) < ntohl(info->dst_min.ip); m |= ntohl(iph->daddr) > ntohl(info->dst_max.ip); - m ^= info->flags & IPRANGE_DST_INV; + m ^= !!(info->flags & IPRANGE_DST_INV); if (m) { pr_debug("dst IP " NIPQUAD_FMT " NOT in range %s" NIPQUAD_FMT "-" NIPQUAD_FMT "\n", @@ -111,26 +105,23 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b) } static bool -iprange_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_iprange_mtinfo *info = matchinfo; + const struct xt_iprange_mtinfo *info = par->matchinfo; const struct ipv6hdr *iph = ipv6_hdr(skb); bool m; if (info->flags & IPRANGE_SRC) { m = iprange_ipv6_sub(&iph->saddr, &info->src_min.in6) < 0; m |= iprange_ipv6_sub(&iph->saddr, &info->src_max.in6) > 0; - m ^= info->flags & IPRANGE_SRC_INV; + m ^= !!(info->flags & IPRANGE_SRC_INV); if (m) return false; } if (info->flags & IPRANGE_DST) { m = iprange_ipv6_sub(&iph->daddr, &info->dst_min.in6) < 0; m |= iprange_ipv6_sub(&iph->daddr, &info->dst_max.in6) > 0; - m ^= info->flags & IPRANGE_DST_INV; + m ^= !!(info->flags & IPRANGE_DST_INV); if (m) return false; } @@ -141,7 +132,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = iprange_mt_v0, .matchsize = sizeof(struct ipt_iprange_info), .me = THIS_MODULE, @@ -149,7 +140,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 1, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = iprange_mt4, .matchsize = sizeof(struct xt_iprange_mtinfo), .me = THIS_MODULE, @@ -157,7 +148,7 @@ static struct xt_match iprange_mt_reg[] __read_mostly = { { .name = "iprange", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = iprange_mt6, .matchsize = sizeof(struct xt_iprange_mtinfo), .me = THIS_MODULE, diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c index b8640f972950..c4871ca6c86d 100644 --- a/net/netfilter/xt_length.c +++ b/net/netfilter/xt_length.c @@ -21,24 +21,18 @@ MODULE_ALIAS("ipt_length"); MODULE_ALIAS("ip6t_length"); static bool -length_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +length_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_length_info *info = matchinfo; + const struct xt_length_info *info = par->matchinfo; u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len); return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } static bool -length_mt6(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +length_mt6(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_length_info *info = matchinfo; + const struct xt_length_info *info = par->matchinfo; const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); @@ -48,14 +42,14 @@ length_mt6(const struct sk_buff *skb, const struct net_device *in, static struct xt_match length_mt_reg[] __read_mostly = { { .name = "length", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = length_mt, .matchsize = sizeof(struct xt_length_info), .me = THIS_MODULE, }, { .name = "length", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = length_mt6, .matchsize = sizeof(struct xt_length_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index aad9ab8d2046..c908d69a5595 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -58,13 +58,10 @@ static DEFINE_SPINLOCK(limit_lock); #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) static bool -limit_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct xt_rateinfo *r = - ((const struct xt_rateinfo *)matchinfo)->master; + ((const struct xt_rateinfo *)par->matchinfo)->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); @@ -95,12 +92,9 @@ user2credits(u_int32_t user) return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE; } -static bool -limit_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool limit_mt_check(const struct xt_mtchk_param *par) { - struct xt_rateinfo *r = matchinfo; + struct xt_rateinfo *r = par->matchinfo; /* Check for overflow. */ if (r->burst == 0 @@ -167,43 +161,29 @@ static int limit_mt_compat_to_user(void __user *dst, void *src) } #endif /* CONFIG_COMPAT */ -static struct xt_match limit_mt_reg[] __read_mostly = { - { - .name = "limit", - .family = AF_INET, - .checkentry = limit_mt_check, - .match = limit_mt, - .matchsize = sizeof(struct xt_rateinfo), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_rateinfo), - .compat_from_user = limit_mt_compat_from_user, - .compat_to_user = limit_mt_compat_to_user, -#endif - .me = THIS_MODULE, - }, - { - .name = "limit", - .family = AF_INET6, - .checkentry = limit_mt_check, - .match = limit_mt, - .matchsize = sizeof(struct xt_rateinfo), +static struct xt_match limit_mt_reg __read_mostly = { + .name = "limit", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = limit_mt, + .checkentry = limit_mt_check, + .matchsize = sizeof(struct xt_rateinfo), #ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_rateinfo), - .compat_from_user = limit_mt_compat_from_user, - .compat_to_user = limit_mt_compat_to_user, + .compatsize = sizeof(struct compat_xt_rateinfo), + .compat_from_user = limit_mt_compat_from_user, + .compat_to_user = limit_mt_compat_to_user, #endif - .me = THIS_MODULE, - }, + .me = THIS_MODULE, }; static int __init limit_mt_init(void) { - return xt_register_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg)); + return xt_register_match(&limit_mt_reg); } static void __exit limit_mt_exit(void) { - xt_unregister_matches(limit_mt_reg, ARRAY_SIZE(limit_mt_reg)); + xt_unregister_match(&limit_mt_reg); } module_init(limit_mt_init); diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index b3e96a0ec176..c2007116ce5b 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -24,12 +24,9 @@ MODULE_DESCRIPTION("Xtables: MAC address match"); MODULE_ALIAS("ipt_mac"); MODULE_ALIAS("ip6t_mac"); -static bool -mac_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mac_info *info = matchinfo; + const struct xt_mac_info *info = par->matchinfo; /* Is mac pointer valid? */ return skb_mac_header(skb) >= skb->head && @@ -39,37 +36,25 @@ mac_mt(const struct sk_buff *skb, const struct net_device *in, ^ info->invert); } -static struct xt_match mac_mt_reg[] __read_mostly = { - { - .name = "mac", - .family = AF_INET, - .match = mac_mt, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD), - .me = THIS_MODULE, - }, - { - .name = "mac", - .family = AF_INET6, - .match = mac_mt, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_LOCAL_IN) | - (1 << NF_INET_FORWARD), - .me = THIS_MODULE, - }, +static struct xt_match mac_mt_reg __read_mostly = { + .name = "mac", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = mac_mt, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_FORWARD), + .me = THIS_MODULE, }; static int __init mac_mt_init(void) { - return xt_register_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); + return xt_register_match(&mac_mt_reg); } static void __exit mac_mt_exit(void) { - xt_unregister_matches(mac_mt_reg, ARRAY_SIZE(mac_mt_reg)); + xt_unregister_match(&mac_mt_reg); } module_init(mac_mt_init); diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c index 9f78f6120fbd..10b9e34bbc5b 100644 --- a/net/netfilter/xt_mark.c +++ b/net/netfilter/xt_mark.c @@ -23,32 +23,24 @@ MODULE_ALIAS("ipt_mark"); MODULE_ALIAS("ip6t_mark"); static bool -mark_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +mark_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mark_info *info = matchinfo; + const struct xt_mark_info *info = par->matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } static bool -mark_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +mark_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_mark_mtinfo1 *info = matchinfo; + const struct xt_mark_mtinfo1 *info = par->matchinfo; return ((skb->mark & info->mask) == info->mark) ^ info->invert; } -static bool -mark_mt_check_v0(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool mark_mt_check_v0(const struct xt_mtchk_param *par) { - const struct xt_mark_info *minfo = matchinfo; + const struct xt_mark_info *minfo = par->matchinfo; if (minfo->mark > 0xffffffff || minfo->mask > 0xffffffff) { printk(KERN_WARNING "mark: only supports 32bit mark\n"); @@ -92,7 +84,7 @@ static struct xt_match mark_mt_reg[] __read_mostly = { { .name = "mark", .revision = 0, - .family = AF_INET, + .family = NFPROTO_UNSPEC, .checkentry = mark_mt_check_v0, .match = mark_mt_v0, .matchsize = sizeof(struct xt_mark_info), @@ -104,31 +96,9 @@ static struct xt_match mark_mt_reg[] __read_mostly = { .me = THIS_MODULE, }, { - .name = "mark", - .revision = 0, - .family = AF_INET6, - .checkentry = mark_mt_check_v0, - .match = mark_mt_v0, - .matchsize = sizeof(struct xt_mark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_info), - .compat_from_user = mark_mt_compat_from_user_v0, - .compat_to_user = mark_mt_compat_to_user_v0, -#endif - .me = THIS_MODULE, - }, - { - .name = "mark", - .revision = 1, - .family = AF_INET, - .match = mark_mt, - .matchsize = sizeof(struct xt_mark_mtinfo1), - .me = THIS_MODULE, - }, - { .name = "mark", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .match = mark_mt, .matchsize = sizeof(struct xt_mark_mtinfo1), .me = THIS_MODULE, diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c index fd88c489b70e..d06bb2dd3900 100644 --- a/net/netfilter/xt_multiport.c +++ b/net/netfilter/xt_multiport.c @@ -95,25 +95,22 @@ ports_match_v1(const struct xt_multiport_v1 *minfo, } static bool -multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +multiport_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { const __be16 *pptr; __be16 _ports[2]; - const struct xt_multiport *multiinfo = matchinfo; + const struct xt_multiport *multiinfo = par->matchinfo; - if (offset) + if (par->fragoff != 0) return false; - pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); if (pptr == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -122,25 +119,22 @@ multiport_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -multiport_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const __be16 *pptr; __be16 _ports[2]; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; - if (offset) + if (par->fragoff != 0) return false; - pptr = skb_header_pointer(skb, protoff, sizeof(_ports), _ports); + pptr = skb_header_pointer(skb, par->thoff, sizeof(_ports), _ports); if (pptr == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("xt_multiport: Dropping evil offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -164,50 +158,37 @@ check(u_int16_t proto, && count <= XT_MULTI_PORTS; } -/* Called when user tries to insert an entry of this type. */ -static bool -multiport_mt_check_v0(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt_check_v0(const struct xt_mtchk_param *par) { - const struct ipt_ip *ip = info; - const struct xt_multiport *multiinfo = matchinfo; + const struct ipt_ip *ip = par->entryinfo; + const struct xt_multiport *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ip *ip = info; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct ipt_ip *ip = par->entryinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt6_check_v0(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par) { - const struct ip6t_ip6 *ip = info; - const struct xt_multiport *multiinfo = matchinfo; + const struct ip6t_ip6 *ip = par->entryinfo; + const struct xt_multiport *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); } -static bool -multiport_mt6_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool multiport_mt6_check(const struct xt_mtchk_param *par) { - const struct ip6t_ip6 *ip = info; - const struct xt_multiport_v1 *multiinfo = matchinfo; + const struct ip6t_ip6 *ip = par->entryinfo; + const struct xt_multiport_v1 *multiinfo = par->matchinfo; return check(ip->proto, ip->invflags, multiinfo->flags, multiinfo->count); @@ -216,7 +197,7 @@ multiport_mt6_check(const char *tablename, const void *info, static struct xt_match multiport_mt_reg[] __read_mostly = { { .name = "multiport", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 0, .checkentry = multiport_mt_check_v0, .match = multiport_mt_v0, @@ -225,7 +206,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET, + .family = NFPROTO_IPV4, .revision = 1, .checkentry = multiport_mt_check, .match = multiport_mt, @@ -234,7 +215,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET6, + .family = NFPROTO_IPV6, .revision = 0, .checkentry = multiport_mt6_check_v0, .match = multiport_mt_v0, @@ -243,7 +224,7 @@ static struct xt_match multiport_mt_reg[] __read_mostly = { }, { .name = "multiport", - .family = AF_INET6, + .family = NFPROTO_IPV6, .revision = 1, .checkentry = multiport_mt6_check, .match = multiport_mt, diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 9059c16144c3..f19ebd9b78f5 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -21,12 +21,9 @@ #include <linux/netfilter_ipv6/ip6t_owner.h> static bool -owner_mt_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_owner_info *info = matchinfo; + const struct ipt_owner_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -50,12 +47,9 @@ owner_mt_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt6_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ip6t_owner_info *info = matchinfo; + const struct ip6t_owner_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -79,12 +73,9 @@ owner_mt6_v0(const struct sk_buff *skb, const struct net_device *in, } static bool -owner_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +owner_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_owner_match_info *info = matchinfo; + const struct xt_owner_match_info *info = par->matchinfo; const struct file *filp; if (skb->sk == NULL || skb->sk->sk_socket == NULL) @@ -116,12 +107,9 @@ owner_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -owner_mt_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool owner_mt_check_v0(const struct xt_mtchk_param *par) { - const struct ipt_owner_info *info = matchinfo; + const struct ipt_owner_info *info = par->matchinfo; if (info->match & (IPT_OWNER_PID | IPT_OWNER_SID | IPT_OWNER_COMM)) { printk(KERN_WARNING KBUILD_MODNAME @@ -133,12 +121,9 @@ owner_mt_check_v0(const char *tablename, const void *ip, return true; } -static bool -owner_mt6_check_v0(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool owner_mt6_check_v0(const struct xt_mtchk_param *par) { - const struct ip6t_owner_info *info = matchinfo; + const struct ip6t_owner_info *info = par->matchinfo; if (info->match & (IP6T_OWNER_PID | IP6T_OWNER_SID)) { printk(KERN_WARNING KBUILD_MODNAME @@ -153,7 +138,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 0, - .family = AF_INET, + .family = NFPROTO_IPV4, .match = owner_mt_v0, .matchsize = sizeof(struct ipt_owner_info), .checkentry = owner_mt_check_v0, @@ -164,7 +149,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 0, - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = owner_mt6_v0, .matchsize = sizeof(struct ip6t_owner_info), .checkentry = owner_mt6_check_v0, @@ -175,17 +160,7 @@ static struct xt_match owner_mt_reg[] __read_mostly = { { .name = "owner", .revision = 1, - .family = AF_INET, - .match = owner_mt, - .matchsize = sizeof(struct xt_owner_match_info), - .hooks = (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "owner", - .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .match = owner_mt, .matchsize = sizeof(struct xt_owner_match_info), .hooks = (1 << NF_INET_LOCAL_OUT) | diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 4ec1094bda92..1bcdfc12cf59 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -21,14 +21,11 @@ MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); static bool -physdev_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { int i; static const char nulldevname[IFNAMSIZ]; - const struct xt_physdev_info *info = matchinfo; + const struct xt_physdev_info *info = par->matchinfo; bool ret; const char *indev, *outdev; const struct nf_bridge_info *nf_bridge; @@ -94,12 +91,9 @@ match_outdev: return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); } -static bool -physdev_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool physdev_mt_check(const struct xt_mtchk_param *par) { - const struct xt_physdev_info *info = matchinfo; + const struct xt_physdev_info *info = par->matchinfo; if (!(info->bitmask & XT_PHYSDEV_OP_MASK) || info->bitmask & ~XT_PHYSDEV_OP_MASK) @@ -107,44 +101,35 @@ physdev_mt_check(const char *tablename, const void *ip, if (info->bitmask & XT_PHYSDEV_OP_OUT && (!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) || info->invert & XT_PHYSDEV_OP_BRIDGED) && - hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | - (1 << NF_INET_POST_ROUTING))) { + par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { printk(KERN_WARNING "physdev match: using --physdev-out in the " "OUTPUT, FORWARD and POSTROUTING chains for non-bridged " "traffic is not supported anymore.\n"); - if (hook_mask & (1 << NF_INET_LOCAL_OUT)) + if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return false; } return true; } -static struct xt_match physdev_mt_reg[] __read_mostly = { - { - .name = "physdev", - .family = AF_INET, - .checkentry = physdev_mt_check, - .match = physdev_mt, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, - { - .name = "physdev", - .family = AF_INET6, - .checkentry = physdev_mt_check, - .match = physdev_mt, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, +static struct xt_match physdev_mt_reg __read_mostly = { + .name = "physdev", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = physdev_mt_check, + .match = physdev_mt, + .matchsize = sizeof(struct xt_physdev_info), + .me = THIS_MODULE, }; static int __init physdev_mt_init(void) { - return xt_register_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); + return xt_register_match(&physdev_mt_reg); } static void __exit physdev_mt_exit(void) { - xt_unregister_matches(physdev_mt_reg, ARRAY_SIZE(physdev_mt_reg)); + xt_unregister_match(&physdev_mt_reg); } module_init(physdev_mt_init); diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c index 7936f7e23254..69da1d3a1d85 100644 --- a/net/netfilter/xt_pkttype.c +++ b/net/netfilter/xt_pkttype.c @@ -23,20 +23,17 @@ MODULE_ALIAS("ipt_pkttype"); MODULE_ALIAS("ip6t_pkttype"); static bool -pkttype_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_pkttype_info *info = matchinfo; + const struct xt_pkttype_info *info = par->matchinfo; u_int8_t type; if (skb->pkt_type != PACKET_LOOPBACK) type = skb->pkt_type; - else if (match->family == AF_INET && + else if (par->family == NFPROTO_IPV4 && ipv4_is_multicast(ip_hdr(skb)->daddr)) type = PACKET_MULTICAST; - else if (match->family == AF_INET6 && + else if (par->family == NFPROTO_IPV6 && ipv6_hdr(skb)->daddr.s6_addr[0] == 0xFF) type = PACKET_MULTICAST; else @@ -45,31 +42,23 @@ pkttype_mt(const struct sk_buff *skb, const struct net_device *in, return (type == info->pkttype) ^ info->invert; } -static struct xt_match pkttype_mt_reg[] __read_mostly = { - { - .name = "pkttype", - .family = AF_INET, - .match = pkttype_mt, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, - { - .name = "pkttype", - .family = AF_INET6, - .match = pkttype_mt, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, +static struct xt_match pkttype_mt_reg __read_mostly = { + .name = "pkttype", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = pkttype_mt, + .matchsize = sizeof(struct xt_pkttype_info), + .me = THIS_MODULE, }; static int __init pkttype_mt_init(void) { - return xt_register_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg)); + return xt_register_match(&pkttype_mt_reg); } static void __exit pkttype_mt_exit(void) { - xt_unregister_matches(pkttype_mt_reg, ARRAY_SIZE(pkttype_mt_reg)); + xt_unregister_match(&pkttype_mt_reg); } module_init(pkttype_mt_init); diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index d351582b2a3d..328bd20ddd25 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -26,9 +26,9 @@ xt_addr_cmp(const union nf_inet_addr *a1, const union nf_inet_addr *m, const union nf_inet_addr *a2, unsigned short family) { switch (family) { - case AF_INET: + case NFPROTO_IPV4: return ((a1->ip ^ a2->ip) & m->ip) == 0; - case AF_INET6: + case NFPROTO_IPV6: return ipv6_masked_addr_cmp(&a1->in6, &m->in6, &a2->in6) == 0; } return false; @@ -110,18 +110,15 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info, } static bool -policy_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +policy_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_policy_info *info = matchinfo; + const struct xt_policy_info *info = par->matchinfo; int ret; if (info->flags & XT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info, match->family); + ret = match_policy_in(skb, info, par->match->family); else - ret = match_policy_out(skb, info, match->family); + ret = match_policy_out(skb, info, par->match->family); if (ret < 0) ret = info->flags & XT_POLICY_MATCH_NONE ? true : false; @@ -131,26 +128,23 @@ policy_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -policy_mt_check(const char *tablename, const void *ip_void, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool policy_mt_check(const struct xt_mtchk_param *par) { - const struct xt_policy_info *info = matchinfo; + const struct xt_policy_info *info = par->matchinfo; if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { printk(KERN_ERR "xt_policy: neither incoming nor " "outgoing policy selected\n"); return false; } - if (hook_mask & (1 << NF_INET_PRE_ROUTING | 1 << NF_INET_LOCAL_IN) - && info->flags & XT_POLICY_MATCH_OUT) { + if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { printk(KERN_ERR "xt_policy: output policy not valid in " "PRE_ROUTING and INPUT\n"); return false; } - if (hook_mask & (1 << NF_INET_POST_ROUTING | 1 << NF_INET_LOCAL_OUT) - && info->flags & XT_POLICY_MATCH_IN) { + if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { printk(KERN_ERR "xt_policy: input policy not valid in " "POST_ROUTING and OUTPUT\n"); return false; @@ -165,7 +159,7 @@ policy_mt_check(const char *tablename, const void *ip_void, static struct xt_match policy_mt_reg[] __read_mostly = { { .name = "policy", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), @@ -173,7 +167,7 @@ static struct xt_match policy_mt_reg[] __read_mostly = { }, { .name = "policy", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = policy_mt_check, .match = policy_mt, .matchsize = sizeof(struct xt_policy_info), diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index 3b021d0c522a..c84fce5e0f3e 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -18,13 +18,10 @@ MODULE_ALIAS("ip6t_quota"); static DEFINE_SPINLOCK(quota_lock); static bool -quota_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) { struct xt_quota_info *q = - ((const struct xt_quota_info *)matchinfo)->master; + ((const struct xt_quota_info *)par->matchinfo)->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); @@ -40,12 +37,9 @@ quota_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -quota_mt_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool quota_mt_check(const struct xt_mtchk_param *par) { - struct xt_quota_info *q = matchinfo; + struct xt_quota_info *q = par->matchinfo; if (q->flags & ~XT_QUOTA_MASK) return false; @@ -54,33 +48,24 @@ quota_mt_check(const char *tablename, const void *entry, return true; } -static struct xt_match quota_mt_reg[] __read_mostly = { - { - .name = "quota", - .family = AF_INET, - .checkentry = quota_mt_check, - .match = quota_mt, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, - { - .name = "quota", - .family = AF_INET6, - .checkentry = quota_mt_check, - .match = quota_mt, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, +static struct xt_match quota_mt_reg __read_mostly = { + .name = "quota", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = quota_mt, + .checkentry = quota_mt_check, + .matchsize = sizeof(struct xt_quota_info), + .me = THIS_MODULE, }; static int __init quota_mt_init(void) { - return xt_register_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg)); + return xt_register_match("a_mt_reg); } static void __exit quota_mt_exit(void) { - xt_unregister_matches(quota_mt_reg, ARRAY_SIZE(quota_mt_reg)); + xt_unregister_match("a_mt_reg); } module_init(quota_mt_init); diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c index ebd84f1b4f62..220a1d588ee0 100644 --- a/net/netfilter/xt_rateest.c +++ b/net/netfilter/xt_rateest.c @@ -14,16 +14,10 @@ #include <net/netfilter/xt_rateest.h> -static bool xt_rateest_mt(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - bool *hotdrop) +static bool +xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_rateest_match_info *info = matchinfo; + const struct xt_rateest_match_info *info = par->matchinfo; struct gnet_stats_rate_est *r; u_int32_t bps1, bps2, pps1, pps2; bool ret = true; @@ -80,13 +74,9 @@ static bool xt_rateest_mt(const struct sk_buff *skb, return ret; } -static bool xt_rateest_mt_checkentry(const char *tablename, - const void *ip, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) +static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par) { - struct xt_rateest_match_info *info = matchinfo; + struct xt_rateest_match_info *info = par->matchinfo; struct xt_rateest *est1, *est2; if (hweight32(info->flags & (XT_RATEEST_MATCH_ABS | @@ -127,46 +117,34 @@ err1: return false; } -static void xt_rateest_mt_destroy(const struct xt_match *match, - void *matchinfo) +static void xt_rateest_mt_destroy(const struct xt_mtdtor_param *par) { - struct xt_rateest_match_info *info = matchinfo; + struct xt_rateest_match_info *info = par->matchinfo; xt_rateest_put(info->est1); if (info->est2) xt_rateest_put(info->est2); } -static struct xt_match xt_rateest_match[] __read_mostly = { - { - .family = AF_INET, - .name = "rateest", - .match = xt_rateest_mt, - .checkentry = xt_rateest_mt_checkentry, - .destroy = xt_rateest_mt_destroy, - .matchsize = sizeof(struct xt_rateest_match_info), - .me = THIS_MODULE, - }, - { - .family = AF_INET6, - .name = "rateest", - .match = xt_rateest_mt, - .checkentry = xt_rateest_mt_checkentry, - .destroy = xt_rateest_mt_destroy, - .matchsize = sizeof(struct xt_rateest_match_info), - .me = THIS_MODULE, - }, +static struct xt_match xt_rateest_mt_reg __read_mostly = { + .name = "rateest", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = xt_rateest_mt, + .checkentry = xt_rateest_mt_checkentry, + .destroy = xt_rateest_mt_destroy, + .matchsize = sizeof(struct xt_rateest_match_info), + .me = THIS_MODULE, }; static int __init xt_rateest_mt_init(void) { - return xt_register_matches(xt_rateest_match, - ARRAY_SIZE(xt_rateest_match)); + return xt_register_match(&xt_rateest_mt_reg); } static void __exit xt_rateest_mt_fini(void) { - xt_unregister_matches(xt_rateest_match, ARRAY_SIZE(xt_rateest_match)); + xt_unregister_match(&xt_rateest_mt_reg); } MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c index 7df1627c536f..67419287bc7e 100644 --- a/net/netfilter/xt_realm.c +++ b/net/netfilter/xt_realm.c @@ -22,12 +22,9 @@ MODULE_DESCRIPTION("Xtables: Routing realm match"); MODULE_ALIAS("ipt_realm"); static bool -realm_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +realm_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_realm_info *info = matchinfo; + const struct xt_realm_info *info = par->matchinfo; const struct dst_entry *dst = skb->dst; return (info->id == (dst->tclassid & info->mask)) ^ info->invert; @@ -39,7 +36,7 @@ static struct xt_match realm_mt_reg __read_mostly = { .matchsize = sizeof(struct xt_realm_info), .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .family = AF_INET, + .family = NFPROTO_UNSPEC, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/netfilter/xt_recent.c index 3974d7cae5c0..280c471bcdf4 100644 --- a/net/ipv4/netfilter/ipt_recent.c +++ b/net/netfilter/xt_recent.c @@ -1,5 +1,6 @@ /* * Copyright (c) 2006 Patrick McHardy <kaber@trash.net> + * Copyright © CC Computer Consultants GmbH, 2007 - 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -13,6 +14,8 @@ */ #include <linux/init.h> #include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/module.h> #include <linux/moduleparam.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -27,11 +30,14 @@ #include <net/net_namespace.h> #include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ipt_recent.h> +#include <linux/netfilter/xt_recent.h> MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_AUTHOR("Jan Engelhardt <jengelh@computergmbh.de>"); MODULE_DESCRIPTION("Xtables: \"recently-seen\" host matching for IPv4"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_recent"); +MODULE_ALIAS("ip6t_recent"); static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; @@ -48,14 +54,15 @@ module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); -MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); +MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/xt_recent/* files"); +MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/xt_recent/* files"); +MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/xt_recent/* files"); struct recent_entry { struct list_head list; struct list_head lru_list; - __be32 addr; + union nf_inet_addr addr; + u_int16_t family; u_int8_t ttl; u_int8_t index; u_int16_t nstamps; @@ -64,9 +71,9 @@ struct recent_entry { struct recent_table { struct list_head list; - char name[IPT_RECENT_NAME_LEN]; + char name[XT_RECENT_NAME_LEN]; #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc; + struct proc_dir_entry *proc_old, *proc; #endif unsigned int refcnt; unsigned int entries; @@ -79,31 +86,53 @@ static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -static struct proc_dir_entry *proc_dir; -static const struct file_operations recent_fops; +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT +static struct proc_dir_entry *proc_old_dir; +#endif +static struct proc_dir_entry *recent_proc_dir; +static const struct file_operations recent_old_fops, recent_mt_fops; #endif static u_int32_t hash_rnd; -static int hash_rnd_initted; +static bool hash_rnd_initted; + +static unsigned int recent_entry_hash4(const union nf_inet_addr *addr) +{ + if (!hash_rnd_initted) { + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd_initted = true; + } + return jhash_1word((__force u32)addr->ip, hash_rnd) & + (ip_list_hash_size - 1); +} -static unsigned int recent_entry_hash(__be32 addr) +static unsigned int recent_entry_hash6(const union nf_inet_addr *addr) { if (!hash_rnd_initted) { - get_random_bytes(&hash_rnd, 4); - hash_rnd_initted = 1; + get_random_bytes(&hash_rnd, sizeof(hash_rnd)); + hash_rnd_initted = true; } - return jhash_1word((__force u32)addr, hash_rnd) & (ip_list_hash_size - 1); + return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), hash_rnd) & + (ip_list_hash_size - 1); } static struct recent_entry * -recent_entry_lookup(const struct recent_table *table, __be32 addr, u_int8_t ttl) +recent_entry_lookup(const struct recent_table *table, + const union nf_inet_addr *addrp, u_int16_t family, + u_int8_t ttl) { struct recent_entry *e; unsigned int h; - h = recent_entry_hash(addr); + if (family == NFPROTO_IPV4) + h = recent_entry_hash4(addrp); + else + h = recent_entry_hash6(addrp); + list_for_each_entry(e, &table->iphash[h], list) - if (e->addr == addr && (ttl == e->ttl || !ttl || !e->ttl)) + if (e->family == family && + memcmp(&e->addr, addrp, sizeof(e->addr)) == 0 && + (ttl == e->ttl || ttl == 0 || e->ttl == 0)) return e; return NULL; } @@ -117,7 +146,8 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e) } static struct recent_entry * -recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) +recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr, + u_int16_t family, u_int8_t ttl) { struct recent_entry *e; @@ -129,12 +159,16 @@ recent_entry_init(struct recent_table *t, __be32 addr, u_int8_t ttl) GFP_ATOMIC); if (e == NULL) return NULL; - e->addr = addr; + memcpy(&e->addr, addr, sizeof(e->addr)); e->ttl = ttl; e->stamps[0] = jiffies; e->nstamps = 1; e->index = 1; - list_add_tail(&e->list, &t->iphash[recent_entry_hash(addr)]); + e->family = family; + if (family == NFPROTO_IPV4) + list_add_tail(&e->list, &t->iphash[recent_entry_hash4(addr)]); + else + list_add_tail(&e->list, &t->iphash[recent_entry_hash6(addr)]); list_add_tail(&e->lru_list, &t->lru_list); t->entries++; return e; @@ -170,48 +204,59 @@ static void recent_table_flush(struct recent_table *t) } static bool -recent_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +recent_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; struct recent_entry *e; - __be32 addr; + union nf_inet_addr addr = {}; u_int8_t ttl; bool ret = info->invert; - if (info->side == IPT_RECENT_DEST) - addr = ip_hdr(skb)->daddr; - else - addr = ip_hdr(skb)->saddr; + if (par->match->family == NFPROTO_IPV4) { + const struct iphdr *iph = ip_hdr(skb); + + if (info->side == XT_RECENT_DEST) + addr.ip = iph->daddr; + else + addr.ip = iph->saddr; + + ttl = iph->ttl; + } else { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + if (info->side == XT_RECENT_DEST) + memcpy(&addr.in6, &iph->daddr, sizeof(addr.in6)); + else + memcpy(&addr.in6, &iph->saddr, sizeof(addr.in6)); + + ttl = iph->hop_limit; + } - ttl = ip_hdr(skb)->ttl; /* use TTL as seen before forwarding */ - if (out && !skb->sk) + if (par->out != NULL && skb->sk == NULL) ttl++; spin_lock_bh(&recent_lock); t = recent_table_lookup(info->name); - e = recent_entry_lookup(t, addr, - info->check_set & IPT_RECENT_TTL ? ttl : 0); + e = recent_entry_lookup(t, &addr, par->match->family, + (info->check_set & XT_RECENT_TTL) ? ttl : 0); if (e == NULL) { - if (!(info->check_set & IPT_RECENT_SET)) + if (!(info->check_set & XT_RECENT_SET)) goto out; - e = recent_entry_init(t, addr, ttl); + e = recent_entry_init(t, &addr, par->match->family, ttl); if (e == NULL) - *hotdrop = true; + *par->hotdrop = true; ret = !ret; goto out; } - if (info->check_set & IPT_RECENT_SET) + if (info->check_set & XT_RECENT_SET) ret = !ret; - else if (info->check_set & IPT_RECENT_REMOVE) { + else if (info->check_set & XT_RECENT_REMOVE) { recent_entry_remove(t, e); ret = !ret; - } else if (info->check_set & (IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) { + } else if (info->check_set & (XT_RECENT_CHECK | XT_RECENT_UPDATE)) { unsigned long time = jiffies - info->seconds * HZ; unsigned int i, hits = 0; @@ -225,8 +270,8 @@ recent_mt(const struct sk_buff *skb, const struct net_device *in, } } - if (info->check_set & IPT_RECENT_SET || - (info->check_set & IPT_RECENT_UPDATE && ret)) { + if (info->check_set & XT_RECENT_SET || + (info->check_set & XT_RECENT_UPDATE && ret)) { recent_entry_update(t, e); e->ttl = ttl; } @@ -235,27 +280,24 @@ out: return ret; } -static bool -recent_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool recent_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; unsigned i; bool ret = false; if (hweight8(info->check_set & - (IPT_RECENT_SET | IPT_RECENT_REMOVE | - IPT_RECENT_CHECK | IPT_RECENT_UPDATE)) != 1) + (XT_RECENT_SET | XT_RECENT_REMOVE | + XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1) return false; - if ((info->check_set & (IPT_RECENT_SET | IPT_RECENT_REMOVE)) && + if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) && (info->seconds || info->hit_count)) return false; if (info->hit_count > ip_pkt_list_tot) return false; if (info->name[0] == '\0' || - strnlen(info->name, IPT_RECENT_NAME_LEN) == IPT_RECENT_NAME_LEN) + strnlen(info->name, XT_RECENT_NAME_LEN) == XT_RECENT_NAME_LEN) return false; mutex_lock(&recent_mutex); @@ -276,14 +318,25 @@ recent_mt_check(const char *tablename, const void *ip, for (i = 0; i < ip_list_hash_size; i++) INIT_LIST_HEAD(&t->iphash[i]); #ifdef CONFIG_PROC_FS - t->proc = proc_create(t->name, ip_list_perms, proc_dir, &recent_fops); + t->proc = proc_create_data(t->name, ip_list_perms, recent_proc_dir, + &recent_mt_fops, t); if (t->proc == NULL) { kfree(t); goto out; } +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + t->proc_old = proc_create_data(t->name, ip_list_perms, proc_old_dir, + &recent_old_fops, t); + if (t->proc_old == NULL) { + remove_proc_entry(t->name, proc_old_dir); + kfree(t); + goto out; + } + t->proc_old->uid = ip_list_uid; + t->proc_old->gid = ip_list_gid; +#endif t->proc->uid = ip_list_uid; t->proc->gid = ip_list_gid; - t->proc->data = t; #endif spin_lock_bh(&recent_lock); list_add_tail(&t->list, &tables); @@ -294,9 +347,9 @@ out: return ret; } -static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) +static void recent_mt_destroy(const struct xt_mtdtor_param *par) { - const struct ipt_recent_info *info = matchinfo; + const struct xt_recent_mtinfo *info = par->matchinfo; struct recent_table *t; mutex_lock(&recent_mutex); @@ -306,7 +359,10 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, proc_dir); +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + remove_proc_entry(t->name, proc_old_dir); +#endif + remove_proc_entry(t->name, recent_proc_dir); #endif recent_table_flush(t); kfree(t); @@ -316,7 +372,7 @@ static void recent_mt_destroy(const struct xt_match *match, void *matchinfo) #ifdef CONFIG_PROC_FS struct recent_iter_state { - struct recent_table *table; + const struct recent_table *table; unsigned int bucket; }; @@ -341,8 +397,8 @@ static void *recent_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct recent_iter_state *st = seq->private; const struct recent_table *t = st->table; - struct recent_entry *e = v; - struct list_head *head = e->list.next; + const struct recent_entry *e = v; + const struct list_head *head = e->list.next; while (head == &t->iphash[st->bucket]) { if (++st->bucket >= ip_list_hash_size) @@ -365,8 +421,14 @@ static int recent_seq_show(struct seq_file *seq, void *v) unsigned int i; i = (e->index - 1) % ip_pkt_list_tot; - seq_printf(seq, "src=%u.%u.%u.%u ttl: %u last_seen: %lu oldest_pkt: %u", - NIPQUAD(e->addr), e->ttl, e->stamps[i], e->index); + if (e->family == NFPROTO_IPV4) + seq_printf(seq, "src=" NIPQUAD_FMT " ttl: %u last_seen: %lu " + "oldest_pkt: %u", NIPQUAD(e->addr.ip), e->ttl, + e->stamps[i], e->index); + else + seq_printf(seq, "src=" NIP6_FMT " ttl: %u last_seen: %lu " + "oldest_pkt: %u", NIP6(e->addr.in6), e->ttl, + e->stamps[i], e->index); for (i = 0; i < e->nstamps; i++) seq_printf(seq, "%s %lu", i ? "," : "", e->stamps[i]); seq_printf(seq, "\n"); @@ -393,8 +455,22 @@ static int recent_seq_open(struct inode *inode, struct file *file) return 0; } -static ssize_t recent_proc_write(struct file *file, const char __user *input, - size_t size, loff_t *loff) +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT +static int recent_old_seq_open(struct inode *inode, struct file *filp) +{ + static bool warned_of_old; + + if (unlikely(!warned_of_old)) { + printk(KERN_INFO KBUILD_MODNAME ": Use of /proc/net/ipt_recent" + " is deprecated; use /proc/net/xt_recent.\n"); + warned_of_old = true; + } + return recent_seq_open(inode, filp); +} + +static ssize_t recent_old_proc_write(struct file *file, + const char __user *input, + size_t size, loff_t *loff) { const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); struct recent_table *t = pde->data; @@ -407,6 +483,7 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, size = sizeof(buf); if (copy_from_user(buf, input, size)) return -EFAULT; + while (isspace(*c)) c++; @@ -434,10 +511,11 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, addr = in_aton(c); spin_lock_bh(&recent_lock); - e = recent_entry_lookup(t, addr, 0); + e = recent_entry_lookup(t, (const void *)&addr, NFPROTO_IPV4, 0); if (e == NULL) { if (add) - recent_entry_init(t, addr, 0); + recent_entry_init(t, (const void *)&addr, + NFPROTO_IPV4, 0); } else { if (add) recent_entry_update(t, e); @@ -448,23 +526,118 @@ static ssize_t recent_proc_write(struct file *file, const char __user *input, return size; } -static const struct file_operations recent_fops = { - .open = recent_seq_open, +static const struct file_operations recent_old_fops = { + .open = recent_old_seq_open, .read = seq_read, - .write = recent_proc_write, + .write = recent_old_proc_write, .release = seq_release_private, .owner = THIS_MODULE, }; +#endif + +static ssize_t +recent_mt_proc_write(struct file *file, const char __user *input, + size_t size, loff_t *loff) +{ + const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode); + struct recent_table *t = pde->data; + struct recent_entry *e; + char buf[sizeof("+b335:1d35:1e55:dead:c0de:1715:5afe:c0de")]; + const char *c = buf; + union nf_inet_addr addr; + u_int16_t family; + bool add, succ; + + if (size == 0) + return 0; + if (size > sizeof(buf)) + size = sizeof(buf); + if (copy_from_user(buf, input, size) != 0) + return -EFAULT; + + /* Strict protocol! */ + if (*loff != 0) + return -ESPIPE; + switch (*c) { + case '/': /* flush table */ + spin_lock_bh(&recent_lock); + recent_table_flush(t); + spin_unlock_bh(&recent_lock); + return size; + case '-': /* remove address */ + add = false; + break; + case '+': /* add address */ + add = true; + break; + default: + printk(KERN_INFO KBUILD_MODNAME ": Need +ip, -ip or /\n"); + return -EINVAL; + } + + ++c; + --size; + if (strnchr(c, size, ':') != NULL) { + family = NFPROTO_IPV6; + succ = in6_pton(c, size, (void *)&addr, '\n', NULL); + } else { + family = NFPROTO_IPV4; + succ = in4_pton(c, size, (void *)&addr, '\n', NULL); + } + + if (!succ) { + printk(KERN_INFO KBUILD_MODNAME ": illegal address written " + "to procfs\n"); + return -EINVAL; + } + + spin_lock_bh(&recent_lock); + e = recent_entry_lookup(t, &addr, family, 0); + if (e == NULL) { + if (add) + recent_entry_init(t, &addr, family, 0); + } else { + if (add) + recent_entry_update(t, e); + else + recent_entry_remove(t, e); + } + spin_unlock_bh(&recent_lock); + /* Note we removed one above */ + *loff += size + 1; + return size + 1; +} + +static const struct file_operations recent_mt_fops = { + .open = recent_seq_open, + .read = seq_read, + .write = recent_mt_proc_write, + .release = seq_release_private, + .owner = THIS_MODULE, +}; #endif /* CONFIG_PROC_FS */ -static struct xt_match recent_mt_reg __read_mostly = { - .name = "recent", - .family = AF_INET, - .match = recent_mt, - .matchsize = sizeof(struct ipt_recent_info), - .checkentry = recent_mt_check, - .destroy = recent_mt_destroy, - .me = THIS_MODULE, +static struct xt_match recent_mt_reg[] __read_mostly = { + { + .name = "recent", + .revision = 0, + .family = NFPROTO_IPV4, + .match = recent_mt, + .matchsize = sizeof(struct xt_recent_mtinfo), + .checkentry = recent_mt_check, + .destroy = recent_mt_destroy, + .me = THIS_MODULE, + }, + { + .name = "recent", + .revision = 0, + .family = NFPROTO_IPV6, + .match = recent_mt, + .matchsize = sizeof(struct xt_recent_mtinfo), + .checkentry = recent_mt_check, + .destroy = recent_mt_destroy, + .me = THIS_MODULE, + }, }; static int __init recent_mt_init(void) @@ -475,26 +648,39 @@ static int __init recent_mt_init(void) return -EINVAL; ip_list_hash_size = 1 << fls(ip_list_tot); - err = xt_register_match(&recent_mt_reg); + err = xt_register_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); #ifdef CONFIG_PROC_FS if (err) return err; - proc_dir = proc_mkdir("ipt_recent", init_net.proc_net); - if (proc_dir == NULL) { - xt_unregister_match(&recent_mt_reg); + recent_proc_dir = proc_mkdir("xt_recent", init_net.proc_net); + if (recent_proc_dir == NULL) { + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); + err = -ENOMEM; + } +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT + if (err < 0) + return err; + proc_old_dir = proc_mkdir("ipt_recent", init_net.proc_net); + if (proc_old_dir == NULL) { + remove_proc_entry("xt_recent", init_net.proc_net); + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); err = -ENOMEM; } #endif +#endif return err; } static void __exit recent_mt_exit(void) { BUG_ON(!list_empty(&tables)); - xt_unregister_match(&recent_mt_reg); + xt_unregister_matches(recent_mt_reg, ARRAY_SIZE(recent_mt_reg)); #ifdef CONFIG_PROC_FS +#ifdef CONFIG_NETFILTER_XT_MATCH_RECENT_PROC_COMPAT remove_proc_entry("ipt_recent", init_net.proc_net); #endif + remove_proc_entry("xt_recent", init_net.proc_net); +#endif } module_init(recent_mt_init); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index e6e4681fa047..e223cb43ae8e 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -117,23 +117,21 @@ match_packet(const struct sk_buff *skb, } static bool -sctp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_sctp_info *info = matchinfo; + const struct xt_sctp_info *info = par->matchinfo; const sctp_sctphdr_t *sh; sctp_sctphdr_t _sh; - if (offset) { + if (par->fragoff != 0) { duprintf("Dropping non-first fragment.. FIXME\n"); return false; } - sh = skb_header_pointer(skb, protoff, sizeof(_sh), &_sh); + sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh); if (sh == NULL) { duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } duprintf("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest)); @@ -144,17 +142,14 @@ sctp_mt(const struct sk_buff *skb, const struct net_device *in, && SCCHECK(ntohs(sh->dest) >= info->dpts[0] && ntohs(sh->dest) <= info->dpts[1], XT_SCTP_DEST_PORTS, info->flags, info->invflags) - && SCCHECK(match_packet(skb, protoff + sizeof (sctp_sctphdr_t), - info, hotdrop), + && SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t), + info, par->hotdrop), XT_SCTP_CHUNK_TYPES, info->flags, info->invflags); } -static bool -sctp_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool sctp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_sctp_info *info = matchinfo; + const struct xt_sctp_info *info = par->matchinfo; return !(info->flags & ~XT_SCTP_VALID_FLAGS) && !(info->invflags & ~XT_SCTP_VALID_FLAGS) @@ -169,7 +164,7 @@ sctp_mt_check(const char *tablename, const void *inf, static struct xt_match sctp_mt_reg[] __read_mostly = { { .name = "sctp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = sctp_mt_check, .match = sctp_mt, .matchsize = sizeof(struct xt_sctp_info), @@ -178,7 +173,7 @@ static struct xt_match sctp_mt_reg[] __read_mostly = { }, { .name = "sctp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = sctp_mt_check, .match = sctp_mt, .matchsize = sizeof(struct xt_sctp_info), diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c new file mode 100644 index 000000000000..02a8fed21082 --- /dev/null +++ b/net/netfilter/xt_socket.c @@ -0,0 +1,185 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (C) 2007-2008 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <net/tcp.h> +#include <net/udp.h> +#include <net/icmp.h> +#include <net/sock.h> +#include <net/inet_sock.h> +#include <net/netfilter/nf_tproxy_core.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#define XT_SOCKET_HAVE_CONNTRACK 1 +#include <net/netfilter/nf_conntrack.h> +#endif + +static int +extract_icmp_fields(const struct sk_buff *skb, + u8 *protocol, + __be32 *raddr, + __be32 *laddr, + __be16 *rport, + __be16 *lport) +{ + unsigned int outside_hdrlen = ip_hdrlen(skb); + struct iphdr *inside_iph, _inside_iph; + struct icmphdr *icmph, _icmph; + __be16 *ports, _ports[2]; + + icmph = skb_header_pointer(skb, outside_hdrlen, + sizeof(_icmph), &_icmph); + if (icmph == NULL) + return 1; + + switch (icmph->type) { + case ICMP_DEST_UNREACH: + case ICMP_SOURCE_QUENCH: + case ICMP_REDIRECT: + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + break; + default: + return 1; + } + + inside_iph = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr), + sizeof(_inside_iph), &_inside_iph); + if (inside_iph == NULL) + return 1; + + if (inside_iph->protocol != IPPROTO_TCP && + inside_iph->protocol != IPPROTO_UDP) + return 1; + + ports = skb_header_pointer(skb, outside_hdrlen + + sizeof(struct icmphdr) + + (inside_iph->ihl << 2), + sizeof(_ports), &_ports); + if (ports == NULL) + return 1; + + /* the inside IP packet is the one quoted from our side, thus + * its saddr is the local address */ + *protocol = inside_iph->protocol; + *laddr = inside_iph->saddr; + *lport = ports[0]; + *raddr = inside_iph->daddr; + *rport = ports[1]; + + return 0; +} + + +static bool +socket_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr _hdr, *hp = NULL; + struct sock *sk; + __be32 daddr, saddr; + __be16 dport, sport; + u8 protocol; +#ifdef XT_SOCKET_HAVE_CONNTRACK + struct nf_conn const *ct; + enum ip_conntrack_info ctinfo; +#endif + + if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) { + hp = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_hdr), &_hdr); + if (hp == NULL) + return false; + + protocol = iph->protocol; + saddr = iph->saddr; + sport = hp->source; + daddr = iph->daddr; + dport = hp->dest; + + } else if (iph->protocol == IPPROTO_ICMP) { + if (extract_icmp_fields(skb, &protocol, &saddr, &daddr, + &sport, &dport)) + return false; + } else { + return false; + } + +#ifdef XT_SOCKET_HAVE_CONNTRACK + /* Do the lookup with the original socket address in case this is a + * reply packet of an established SNAT-ted connection. */ + + ct = nf_ct_get(skb, &ctinfo); + if (ct && (ct != &nf_conntrack_untracked) && + ((iph->protocol != IPPROTO_ICMP && + ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) || + (iph->protocol == IPPROTO_ICMP && + ctinfo == IP_CT_IS_REPLY + IP_CT_RELATED)) && + (ct->status & IPS_SRC_NAT_DONE)) { + + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, + saddr, daddr, sport, dport, par->in, false); + if (sk != NULL) { + bool wildcard = (inet_sk(sk)->rcv_saddr == 0); + + nf_tproxy_put_sock(sk); + if (wildcard) + sk = NULL; + } + + pr_debug("socket match: proto %u %08x:%u -> %08x:%u " + "(orig %08x:%u) sock %p\n", + protocol, ntohl(saddr), ntohs(sport), + ntohl(daddr), ntohs(dport), + ntohl(iph->daddr), hp ? ntohs(hp->dest) : 0, sk); + + return (sk != NULL); +} + +static struct xt_match socket_mt_reg __read_mostly = { + .name = "socket", + .family = AF_INET, + .match = socket_mt, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init socket_mt_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_match(&socket_mt_reg); +} + +static void __exit socket_mt_exit(void) +{ + xt_unregister_match(&socket_mt_reg); +} + +module_init(socket_mt_init); +module_exit(socket_mt_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs, Balazs Scheidler"); +MODULE_DESCRIPTION("x_tables socket match module"); +MODULE_ALIAS("ipt_socket"); diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index a776dc36a193..4c946cbd731f 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -21,12 +21,9 @@ MODULE_ALIAS("ipt_state"); MODULE_ALIAS("ip6t_state"); static bool -state_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +state_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_state_info *sinfo = matchinfo; + const struct xt_state_info *sinfo = par->matchinfo; enum ip_conntrack_info ctinfo; unsigned int statebit; @@ -40,28 +37,25 @@ state_mt(const struct sk_buff *skb, const struct net_device *in, return (sinfo->statemask & statebit); } -static bool -state_mt_check(const char *tablename, const void *inf, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool state_mt_check(const struct xt_mtchk_param *par) { - if (nf_ct_l3proto_try_module_get(match->family) < 0) { + if (nf_ct_l3proto_try_module_get(par->match->family) < 0) { printk(KERN_WARNING "can't load conntrack support for " - "proto=%u\n", match->family); + "proto=%u\n", par->match->family); return false; } return true; } -static void state_mt_destroy(const struct xt_match *match, void *matchinfo) +static void state_mt_destroy(const struct xt_mtdtor_param *par) { - nf_ct_l3proto_module_put(match->family); + nf_ct_l3proto_module_put(par->match->family); } static struct xt_match state_mt_reg[] __read_mostly = { { .name = "state", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = state_mt_check, .match = state_mt, .destroy = state_mt_destroy, @@ -70,7 +64,7 @@ static struct xt_match state_mt_reg[] __read_mostly = { }, { .name = "state", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = state_mt_check, .match = state_mt, .destroy = state_mt_destroy, diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 43133080da7d..0d75141139d5 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ip6t_statistic"); static DEFINE_SPINLOCK(nth_lock); static bool -statistic_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; + struct xt_statistic_info *info = (void *)par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { @@ -52,12 +49,9 @@ statistic_mt(const struct sk_buff *skb, const struct net_device *in, return ret; } -static bool -statistic_mt_check(const char *tablename, const void *entry, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool statistic_mt_check(const struct xt_mtchk_param *par) { - struct xt_statistic_info *info = matchinfo; + struct xt_statistic_info *info = par->matchinfo; if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) @@ -66,35 +60,24 @@ statistic_mt_check(const char *tablename, const void *entry, return true; } -static struct xt_match statistic_mt_reg[] __read_mostly = { - { - .name = "statistic", - .family = AF_INET, - .checkentry = statistic_mt_check, - .match = statistic_mt, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, - { - .name = "statistic", - .family = AF_INET6, - .checkentry = statistic_mt_check, - .match = statistic_mt, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, +static struct xt_match xt_statistic_mt_reg __read_mostly = { + .name = "statistic", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = statistic_mt, + .checkentry = statistic_mt_check, + .matchsize = sizeof(struct xt_statistic_info), + .me = THIS_MODULE, }; static int __init statistic_mt_init(void) { - return xt_register_matches(statistic_mt_reg, - ARRAY_SIZE(statistic_mt_reg)); + return xt_register_match(&xt_statistic_mt_reg); } static void __exit statistic_mt_exit(void) { - xt_unregister_matches(statistic_mt_reg, - ARRAY_SIZE(statistic_mt_reg)); + xt_unregister_match(&xt_statistic_mt_reg); } module_init(statistic_mt_init); diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c index 4903182a062b..b4d774111311 100644 --- a/net/netfilter/xt_string.c +++ b/net/netfilter/xt_string.c @@ -22,18 +22,15 @@ MODULE_ALIAS("ipt_string"); MODULE_ALIAS("ip6t_string"); static bool -string_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +string_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_string_info *conf = matchinfo; + const struct xt_string_info *conf = par->matchinfo; struct ts_state state; int invert; memset(&state, 0, sizeof(struct ts_state)); - invert = (match->revision == 0 ? conf->u.v0.invert : + invert = (par->match->revision == 0 ? conf->u.v0.invert : conf->u.v1.flags & XT_STRING_FLAG_INVERT); return (skb_find_text((struct sk_buff *)skb, conf->from_offset, @@ -43,12 +40,9 @@ string_mt(const struct sk_buff *skb, const struct net_device *in, #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m)) -static bool -string_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool string_mt_check(const struct xt_mtchk_param *par) { - struct xt_string_info *conf = matchinfo; + struct xt_string_info *conf = par->matchinfo; struct ts_config *ts_conf; int flags = TS_AUTOLOAD; @@ -59,7 +53,7 @@ string_mt_check(const char *tablename, const void *ip, return false; if (conf->patlen > XT_STRING_MAX_PATTERN_SIZE) return false; - if (match->revision == 1) { + if (par->match->revision == 1) { if (conf->u.v1.flags & ~(XT_STRING_FLAG_IGNORECASE | XT_STRING_FLAG_INVERT)) return false; @@ -76,36 +70,16 @@ string_mt_check(const char *tablename, const void *ip, return true; } -static void string_mt_destroy(const struct xt_match *match, void *matchinfo) +static void string_mt_destroy(const struct xt_mtdtor_param *par) { - textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); + textsearch_destroy(STRING_TEXT_PRIV(par->matchinfo)->config); } -static struct xt_match string_mt_reg[] __read_mostly = { - { - .name = "string", - .revision = 0, - .family = AF_INET, - .checkentry = string_mt_check, - .match = string_mt, - .destroy = string_mt_destroy, - .matchsize = sizeof(struct xt_string_info), - .me = THIS_MODULE - }, - { - .name = "string", - .revision = 1, - .family = AF_INET, - .checkentry = string_mt_check, - .match = string_mt, - .destroy = string_mt_destroy, - .matchsize = sizeof(struct xt_string_info), - .me = THIS_MODULE - }, +static struct xt_match xt_string_mt_reg[] __read_mostly = { { .name = "string", .revision = 0, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = string_mt_check, .match = string_mt, .destroy = string_mt_destroy, @@ -115,7 +89,7 @@ static struct xt_match string_mt_reg[] __read_mostly = { { .name = "string", .revision = 1, - .family = AF_INET6, + .family = NFPROTO_UNSPEC, .checkentry = string_mt_check, .match = string_mt, .destroy = string_mt_destroy, @@ -126,12 +100,13 @@ static struct xt_match string_mt_reg[] __read_mostly = { static int __init string_mt_init(void) { - return xt_register_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg)); + return xt_register_matches(xt_string_mt_reg, + ARRAY_SIZE(xt_string_mt_reg)); } static void __exit string_mt_exit(void) { - xt_unregister_matches(string_mt_reg, ARRAY_SIZE(string_mt_reg)); + xt_unregister_matches(xt_string_mt_reg, ARRAY_SIZE(xt_string_mt_reg)); } module_init(string_mt_init); diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c index 6771bf01275b..4809b34b10f8 100644 --- a/net/netfilter/xt_tcpmss.c +++ b/net/netfilter/xt_tcpmss.c @@ -25,12 +25,9 @@ MODULE_ALIAS("ipt_tcpmss"); MODULE_ALIAS("ip6t_tcpmss"); static bool -tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, - bool *hotdrop) +tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_tcpmss_match_info *info = matchinfo; + const struct xt_tcpmss_match_info *info = par->matchinfo; const struct tcphdr *th; struct tcphdr _tcph; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ @@ -39,7 +36,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, unsigned int i, optlen; /* If we don't have the whole header, drop packet. */ - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) goto dropit; @@ -52,7 +49,7 @@ tcpmss_mt(const struct sk_buff *skb, const struct net_device *in, goto out; /* Truncated options. */ - op = skb_header_pointer(skb, protoff + sizeof(*th), optlen, _opt); + op = skb_header_pointer(skb, par->thoff + sizeof(*th), optlen, _opt); if (op == NULL) goto dropit; @@ -76,14 +73,14 @@ out: return info->invert; dropit: - *hotdrop = true; + *par->hotdrop = true; return false; } static struct xt_match tcpmss_mt_reg[] __read_mostly = { { .name = "tcpmss", - .family = AF_INET, + .family = NFPROTO_IPV4, .match = tcpmss_mt, .matchsize = sizeof(struct xt_tcpmss_match_info), .proto = IPPROTO_TCP, @@ -91,7 +88,7 @@ static struct xt_match tcpmss_mt_reg[] __read_mostly = { }, { .name = "tcpmss", - .family = AF_INET6, + .family = NFPROTO_IPV6, .match = tcpmss_mt, .matchsize = sizeof(struct xt_tcpmss_match_info), .proto = IPPROTO_TCP, diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c index 951b06b8d701..1ebdc4934eed 100644 --- a/net/netfilter/xt_tcpudp.c +++ b/net/netfilter/xt_tcpudp.c @@ -68,25 +68,22 @@ tcp_find_option(u_int8_t option, return invert; } -static bool -tcp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct tcphdr *th; struct tcphdr _tcph; - const struct xt_tcp *tcpinfo = matchinfo; + const struct xt_tcp *tcpinfo = par->matchinfo; - if (offset) { + if (par->fragoff != 0) { /* To quote Alan: Don't allow a fragment of TCP 8 bytes in. Nobody normal causes this. Its a cracker trying to break in by doing a flag overwrite to pass the direction checks. */ - if (offset == 1) { + if (par->fragoff == 1) { duprintf("Dropping evil TCP offset=1 frag.\n"); - *hotdrop = true; + *par->hotdrop = true; } /* Must not be a fragment. */ return false; @@ -94,12 +91,12 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in, #define FWINVTCP(bool, invflg) ((bool) ^ !!(tcpinfo->invflags & (invflg))) - th = skb_header_pointer(skb, protoff, sizeof(_tcph), &_tcph); + th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); if (th == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil TCP offset=0 tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -117,49 +114,42 @@ tcp_mt(const struct sk_buff *skb, const struct net_device *in, return false; if (tcpinfo->option) { if (th->doff * 4 < sizeof(_tcph)) { - *hotdrop = true; + *par->hotdrop = true; return false; } - if (!tcp_find_option(tcpinfo->option, skb, protoff, + if (!tcp_find_option(tcpinfo->option, skb, par->thoff, th->doff*4 - sizeof(_tcph), tcpinfo->invflags & XT_TCP_INV_OPTION, - hotdrop)) + par->hotdrop)) return false; } return true; } -/* Called when user tries to insert an entry of this type. */ -static bool -tcp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool tcp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_tcp *tcpinfo = matchinfo; + const struct xt_tcp *tcpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(tcpinfo->invflags & ~XT_TCP_INV_MASK); } -static bool -udp_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par) { const struct udphdr *uh; struct udphdr _udph; - const struct xt_udp *udpinfo = matchinfo; + const struct xt_udp *udpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) + if (par->fragoff != 0) return false; - uh = skb_header_pointer(skb, protoff, sizeof(_udph), &_udph); + uh = skb_header_pointer(skb, par->thoff, sizeof(_udph), &_udph); if (uh == NULL) { /* We've been asked to examine this packet, and we can't. Hence, no choice but to drop. */ duprintf("Dropping evil UDP tinygram.\n"); - *hotdrop = true; + *par->hotdrop = true; return false; } @@ -171,13 +161,9 @@ udp_mt(const struct sk_buff *skb, const struct net_device *in, !!(udpinfo->invflags & XT_UDP_INV_DSTPT)); } -/* Called when user tries to insert an entry of this type. */ -static bool -udp_mt_check(const char *tablename, const void *info, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool udp_mt_check(const struct xt_mtchk_param *par) { - const struct xt_udp *udpinfo = matchinfo; + const struct xt_udp *udpinfo = par->matchinfo; /* Must specify no unknown invflags */ return !(udpinfo->invflags & ~XT_UDP_INV_MASK); @@ -186,7 +172,7 @@ udp_mt_check(const char *tablename, const void *info, static struct xt_match tcpudp_mt_reg[] __read_mostly = { { .name = "tcp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), @@ -195,7 +181,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "tcp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = tcp_mt_check, .match = tcp_mt, .matchsize = sizeof(struct xt_tcp), @@ -204,7 +190,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udp", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -213,7 +199,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udp", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -222,7 +208,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udplite", - .family = AF_INET, + .family = NFPROTO_IPV4, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), @@ -231,7 +217,7 @@ static struct xt_match tcpudp_mt_reg[] __read_mostly = { }, { .name = "udplite", - .family = AF_INET6, + .family = NFPROTO_IPV6, .checkentry = udp_mt_check, .match = udp_mt, .matchsize = sizeof(struct xt_udp), diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 9f328593287e..29375ba8db73 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -136,26 +136,26 @@ static void localtime_3(struct xtm *r, time_t time) * from w repeatedly while counting.) */ if (is_leap(year)) { + /* use days_since_leapyear[] in a leap year */ for (i = ARRAY_SIZE(days_since_leapyear) - 1; - i > 0 && days_since_year[i] > w; --i) + i > 0 && days_since_leapyear[i] > w; --i) /* just loop */; + r->monthday = w - days_since_leapyear[i] + 1; } else { for (i = ARRAY_SIZE(days_since_year) - 1; i > 0 && days_since_year[i] > w; --i) /* just loop */; + r->monthday = w - days_since_year[i] + 1; } r->month = i + 1; - r->monthday = w - days_since_year[i] + 1; return; } static bool -time_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +time_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_time_info *info = matchinfo; + const struct xt_time_info *info = par->matchinfo; unsigned int packet_time; struct xtm current_time; s64 stamp; @@ -218,12 +218,9 @@ time_mt(const struct sk_buff *skb, const struct net_device *in, return true; } -static bool -time_mt_check(const char *tablename, const void *ip, - const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) +static bool time_mt_check(const struct xt_mtchk_param *par) { - const struct xt_time_info *info = matchinfo; + const struct xt_time_info *info = par->matchinfo; if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { @@ -235,33 +232,23 @@ time_mt_check(const char *tablename, const void *ip, return true; } -static struct xt_match time_mt_reg[] __read_mostly = { - { - .name = "time", - .family = AF_INET, - .match = time_mt, - .matchsize = sizeof(struct xt_time_info), - .checkentry = time_mt_check, - .me = THIS_MODULE, - }, - { - .name = "time", - .family = AF_INET6, - .match = time_mt, - .matchsize = sizeof(struct xt_time_info), - .checkentry = time_mt_check, - .me = THIS_MODULE, - }, +static struct xt_match xt_time_mt_reg __read_mostly = { + .name = "time", + .family = NFPROTO_UNSPEC, + .match = time_mt, + .checkentry = time_mt_check, + .matchsize = sizeof(struct xt_time_info), + .me = THIS_MODULE, }; static int __init time_mt_init(void) { - return xt_register_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg)); + return xt_register_match(&xt_time_mt_reg); } static void __exit time_mt_exit(void) { - xt_unregister_matches(time_mt_reg, ARRAY_SIZE(time_mt_reg)); + xt_unregister_match(&xt_time_mt_reg); } module_init(time_mt_init); diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c index 627e0f336d54..24a527624500 100644 --- a/net/netfilter/xt_u32.c +++ b/net/netfilter/xt_u32.c @@ -87,43 +87,32 @@ static bool u32_match_it(const struct xt_u32 *data, return true; } -static bool -u32_mt(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const struct xt_match *match, - const void *matchinfo, int offset, unsigned int protoff, bool *hotdrop) +static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_u32 *data = matchinfo; + const struct xt_u32 *data = par->matchinfo; bool ret; ret = u32_match_it(data, skb); return ret ^ data->invert; } -static struct xt_match u32_mt_reg[] __read_mostly = { - { - .name = "u32", - .family = AF_INET, - .match = u32_mt, - .matchsize = sizeof(struct xt_u32), - .me = THIS_MODULE, - }, - { - .name = "u32", - .family = AF_INET6, - .match = u32_mt, - .matchsize = sizeof(struct xt_u32), - .me = THIS_MODULE, - }, +static struct xt_match xt_u32_mt_reg __read_mostly = { + .name = "u32", + .revision = 0, + .family = NFPROTO_UNSPEC, + .match = u32_mt, + .matchsize = sizeof(struct xt_u32), + .me = THIS_MODULE, }; static int __init u32_mt_init(void) { - return xt_register_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg)); + return xt_register_match(&xt_u32_mt_reg); } static void __exit u32_mt_exit(void) { - xt_unregister_matches(u32_mt_reg, ARRAY_SIZE(u32_mt_reg)); + xt_unregister_match(&xt_u32_mt_reg); } module_init(u32_mt_init); diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index 8af18c0a47d9..ea750e9df65f 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -5,7 +5,8 @@ # # base objects -obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o +obj-y := netlabel_user.o netlabel_kapi.o +obj-y += netlabel_domainhash.o netlabel_addrlist.o # management objects obj-y += netlabel_mgmt.o diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c new file mode 100644 index 000000000000..249f6b92f153 --- /dev/null +++ b/net/netlabel/netlabel_addrlist.c @@ -0,0 +1,390 @@ +/* + * NetLabel Network Address Lists + * + * This file contains network address list functions used to manage ordered + * lists of network addresses for use by the NetLabel subsystem. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/in.h> +#include <linux/in6.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/ip.h> +#include <net/ipv6.h> +#include <linux/audit.h> + +#include "netlabel_addrlist.h" + +/* + * Address List Functions + */ + +/** + * netlbl_af4list_search - Search for a matching IPv4 address entry + * @addr: IPv4 address + * @head: the list head + * + * Description: + * Searches the IPv4 address list given by @head. If a matching address entry + * is found it is returned, otherwise NULL is returned. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af4list *netlbl_af4list_search(__be32 addr, + struct list_head *head) +{ + struct netlbl_af4list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && (addr & iter->mask) == iter->addr) + return iter; + + return NULL; +} + +/** + * netlbl_af4list_search_exact - Search for an exact IPv4 address entry + * @addr: IPv4 address + * @mask: IPv4 address mask + * @head: the list head + * + * Description: + * Searches the IPv4 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head) +{ + struct netlbl_af4list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && iter->addr == addr && iter->mask == mask) + return iter; + + return NULL; +} + + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_search - Search for a matching IPv6 address entry + * @addr: IPv6 address + * @head: the list head + * + * Description: + * Searches the IPv6 address list given by @head. If a matching address entry + * is found it is returned, otherwise NULL is returned. The caller is + * responsible for calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, + struct list_head *head) +{ + struct netlbl_af6list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) + return iter; + + return NULL; +} + +/** + * netlbl_af6list_search_exact - Search for an exact IPv6 address entry + * @addr: IPv6 address + * @mask: IPv6 address mask + * @head: the list head + * + * Description: + * Searches the IPv6 address list given by @head. If an exact match if found + * it is returned, otherwise NULL is returned. The caller is responsible for + * calling the rcu_read_[un]lock() functions. + * + */ +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head) +{ + struct netlbl_af6list *iter; + + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_addr_equal(&iter->addr, addr) && + ipv6_addr_equal(&iter->mask, mask)) + return iter; + + return NULL; +} +#endif /* IPv6 */ + +/** + * netlbl_af4list_add - Add a new IPv4 address entry to a list + * @entry: address entry + * @head: the list head + * + * Description: + * Add a new address entry to the list pointed to by @head. On success zero is + * returned, otherwise a negative value is returned. The caller is responsible + * for calling the necessary locking functions. + * + */ +int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head) +{ + struct netlbl_af4list *iter; + + iter = netlbl_af4list_search(entry->addr, head); + if (iter != NULL && + iter->addr == entry->addr && iter->mask == entry->mask) + return -EEXIST; + + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ntohl(entry->mask) > ntohl(iter->mask)) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + return 0; + } + list_add_tail_rcu(&entry->list, head); + return 0; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_add - Add a new IPv6 address entry to a list + * @entry: address entry + * @head: the list head + * + * Description: + * Add a new address entry to the list pointed to by @head. On success zero is + * returned, otherwise a negative value is returned. The caller is responsible + * for calling the necessary locking functions. + * + */ +int netlbl_af6list_add(struct netlbl_af6list *entry, struct list_head *head) +{ + struct netlbl_af6list *iter; + + iter = netlbl_af6list_search(&entry->addr, head); + if (iter != NULL && + ipv6_addr_equal(&iter->addr, &entry->addr) && + ipv6_addr_equal(&iter->mask, &entry->mask)) + return -EEXIST; + + /* in order to speed up address searches through the list (the common + * case) we need to keep the list in order based on the size of the + * address mask such that the entry with the widest mask (smallest + * numerical value) appears first in the list */ + list_for_each_entry_rcu(iter, head, list) + if (iter->valid && + ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { + __list_add_rcu(&entry->list, + iter->list.prev, + &iter->list); + return 0; + } + list_add_tail_rcu(&entry->list, head); + return 0; +} +#endif /* IPv6 */ + +/** + * netlbl_af4list_remove_entry - Remove an IPv4 address entry + * @entry: address entry + * + * Description: + * Remove the specified IP address entry. The caller is responsible for + * calling the necessary locking functions. + * + */ +void netlbl_af4list_remove_entry(struct netlbl_af4list *entry) +{ + entry->valid = 0; + list_del_rcu(&entry->list); +} + +/** + * netlbl_af4list_remove - Remove an IPv4 address entry + * @addr: IP address + * @mask: IP address mask + * @head: the list head + * + * Description: + * Remove an IP address entry from the list pointed to by @head. Returns the + * entry on success, NULL on failure. The caller is responsible for calling + * the necessary locking functions. + * + */ +struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, + struct list_head *head) +{ + struct netlbl_af4list *entry; + + entry = netlbl_af4list_search(addr, head); + if (entry != NULL && entry->addr == addr && entry->mask == mask) { + netlbl_af4list_remove_entry(entry); + return entry; + } + + return NULL; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_remove_entry - Remove an IPv6 address entry + * @entry: address entry + * + * Description: + * Remove the specified IP address entry. The caller is responsible for + * calling the necessary locking functions. + * + */ +void netlbl_af6list_remove_entry(struct netlbl_af6list *entry) +{ + entry->valid = 0; + list_del_rcu(&entry->list); +} + +/** + * netlbl_af6list_remove - Remove an IPv6 address entry + * @addr: IP address + * @mask: IP address mask + * @head: the list head + * + * Description: + * Remove an IP address entry from the list pointed to by @head. Returns the + * entry on success, NULL on failure. The caller is responsible for calling + * the necessary locking functions. + * + */ +struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head) +{ + struct netlbl_af6list *entry; + + entry = netlbl_af6list_search(addr, head); + if (entry != NULL && + ipv6_addr_equal(&entry->addr, addr) && + ipv6_addr_equal(&entry->mask, mask)) { + netlbl_af6list_remove_entry(entry); + return entry; + } + + return NULL; +} +#endif /* IPv6 */ + +/* + * Audit Helper Functions + */ + +#ifdef CONFIG_AUDIT +/** + * netlbl_af4list_audit_addr - Audit an IPv4 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv4 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + u32 mask_val = ntohl(mask); + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIPQUAD_FMT, dir, NIPQUAD(addr)); + if (mask_val != 0xffffffff) { + u32 mask_len = 0; + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_af6list_audit_addr - Audit an IPv6 address + * @audit_buf: audit buffer + * @src: true if source address, false if destination + * @dev: network interface + * @addr: IP address + * @mask: IP address mask + * + * Description: + * Write the IPv6 address and address mask, if necessary, to @audit_buf. + * + */ +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + char *dir = (src ? "src" : "dst"); + + if (dev != NULL) + audit_log_format(audit_buf, " netif=%s", dev); + audit_log_format(audit_buf, " %s=" NIP6_FMT, dir, NIP6(*addr)); + if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { + u32 mask_len = 0; + u32 mask_val; + int iter = -1; + while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) + mask_len += 32; + mask_val = ntohl(mask->s6_addr32[iter]); + while (mask_val > 0) { + mask_val <<= 1; + mask_len++; + } + audit_log_format(audit_buf, " %s_prefixlen=%d", dir, mask_len); + } +} +#endif /* IPv6 */ +#endif /* CONFIG_AUDIT */ diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h new file mode 100644 index 000000000000..07ae7fd82be1 --- /dev/null +++ b/net/netlabel/netlabel_addrlist.h @@ -0,0 +1,211 @@ +/* + * NetLabel Network Address Lists + * + * This file contains network address list functions used to manage ordered + * lists of network addresses for use by the NetLabel subsystem. The NetLabel + * system manages static and dynamic label mappings for network protocols such + * as CIPSO and RIPSO. + * + * Author: Paul Moore <paul.moore@hp.com> + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_ADDRLIST_H +#define _NETLABEL_ADDRLIST_H + +#include <linux/types.h> +#include <linux/rcupdate.h> +#include <linux/list.h> +#include <linux/in6.h> +#include <linux/audit.h> + +/** + * struct netlbl_af4list - NetLabel IPv4 address list + * @addr: IPv4 address + * @mask: IPv4 address mask + * @valid: valid flag + * @list: list structure, used internally + */ +struct netlbl_af4list { + __be32 addr; + __be32 mask; + + u32 valid; + struct list_head list; +}; + +/** + * struct netlbl_af6list - NetLabel IPv6 address list + * @addr: IPv6 address + * @mask: IPv6 address mask + * @valid: valid flag + * @list: list structure, used internally + */ +struct netlbl_af6list { + struct in6_addr addr; + struct in6_addr mask; + + u32 valid; + struct list_head list; +}; + +#define __af4list_entry(ptr) container_of(ptr, struct netlbl_af4list, list) + +static inline struct netlbl_af4list *__af4list_valid(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af4list *n = __af4list_entry(s); + while (i != h && !n->valid) { + i = i->next; + n = __af4list_entry(i); + } + return n; +} + +static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af4list *n = __af4list_entry(s); + while (i != h && !n->valid) { + i = rcu_dereference(i->next); + n = __af4list_entry(i); + } + return n; +} + +#define netlbl_af4list_foreach(iter, head) \ + for (iter = __af4list_valid((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af4list_valid(iter->list.next, head)) + +#define netlbl_af4list_foreach_rcu(iter, head) \ + for (iter = __af4list_valid_rcu((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af4list_valid_rcu(iter->list.next, head)) + +#define netlbl_af4list_foreach_safe(iter, tmp, head) \ + for (iter = __af4list_valid((head)->next, head), \ + tmp = __af4list_valid(iter->list.next, head); \ + &iter->list != (head); \ + iter = tmp, tmp = __af4list_valid(iter->list.next, head)) + +int netlbl_af4list_add(struct netlbl_af4list *entry, + struct list_head *head); +struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask, + struct list_head *head); +void netlbl_af4list_remove_entry(struct netlbl_af4list *entry); +struct netlbl_af4list *netlbl_af4list_search(__be32 addr, + struct list_head *head); +struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr, + __be32 mask, + struct list_head *head); + +#ifdef CONFIG_AUDIT +void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask); +#else +static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf, + int src, const char *dev, + __be32 addr, __be32 mask) +{ + return; +} +#endif + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + +#define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list) + +static inline struct netlbl_af6list *__af6list_valid(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af6list *n = __af6list_entry(s); + while (i != h && !n->valid) { + i = i->next; + n = __af6list_entry(i); + } + return n; +} + +static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s, + struct list_head *h) +{ + struct list_head *i = s; + struct netlbl_af6list *n = __af6list_entry(s); + while (i != h && !n->valid) { + i = rcu_dereference(i->next); + n = __af6list_entry(i); + } + return n; +} + +#define netlbl_af6list_foreach(iter, head) \ + for (iter = __af6list_valid((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af6list_valid(iter->list.next, head)) + +#define netlbl_af6list_foreach_rcu(iter, head) \ + for (iter = __af6list_valid_rcu((head)->next, head); \ + prefetch(iter->list.next), &iter->list != (head); \ + iter = __af6list_valid_rcu(iter->list.next, head)) + +#define netlbl_af6list_foreach_safe(iter, tmp, head) \ + for (iter = __af6list_valid((head)->next, head), \ + tmp = __af6list_valid(iter->list.next, head); \ + &iter->list != (head); \ + iter = tmp, tmp = __af6list_valid(iter->list.next, head)) + +int netlbl_af6list_add(struct netlbl_af6list *entry, + struct list_head *head); +struct netlbl_af6list *netlbl_af6list_remove(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head); +void netlbl_af6list_remove_entry(struct netlbl_af6list *entry); +struct netlbl_af6list *netlbl_af6list_search(const struct in6_addr *addr, + struct list_head *head); +struct netlbl_af6list *netlbl_af6list_search_exact(const struct in6_addr *addr, + const struct in6_addr *mask, + struct list_head *head); + +#ifdef CONFIG_AUDIT +void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask); +#else +static inline void netlbl_af6list_audit_addr(struct audit_buffer *audit_buf, + int src, + const char *dev, + const struct in6_addr *addr, + const struct in6_addr *mask) +{ + return; +} +#endif +#endif /* IPV6 */ + +#endif diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 0aec318bf0ef..fff32b70efa9 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -43,6 +43,7 @@ #include "netlabel_user.h" #include "netlabel_cipso_v4.h" #include "netlabel_mgmt.h" +#include "netlabel_domainhash.h" /* Argument struct for cipso_v4_doi_walk() */ struct netlbl_cipsov4_doiwalk_arg { @@ -51,6 +52,12 @@ struct netlbl_cipsov4_doiwalk_arg { u32 seq; }; +/* Argument struct for netlbl_domhsh_walk() */ +struct netlbl_domhsh_walk_arg { + struct netlbl_audit *audit_info; + u32 doi; +}; + /* NetLabel Generic NETLINK CIPSOv4 family */ static struct genl_family netlbl_cipsov4_gnl_family = { .id = GENL_ID_GENERATE, @@ -81,32 +88,6 @@ static const struct nla_policy netlbl_cipsov4_genl_policy[NLBL_CIPSOV4_A_MAX + 1 */ /** - * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to the DOI definition can be released - * safely. - * - */ -void netlbl_cipsov4_doi_free(struct rcu_head *entry) -{ - struct cipso_v4_doi *ptr; - - ptr = container_of(entry, struct cipso_v4_doi, rcu); - switch (ptr->type) { - case CIPSO_V4_MAP_STD: - kfree(ptr->map.std->lvl.cipso); - kfree(ptr->map.std->lvl.local); - kfree(ptr->map.std->cat.cipso); - kfree(ptr->map.std->cat.local); - break; - } - kfree(ptr); -} - -/** * netlbl_cipsov4_add_common - Parse the common sections of a ADD message * @info: the Generic NETLINK info block * @doi_def: the CIPSO V4 DOI definition @@ -151,9 +132,9 @@ static int netlbl_cipsov4_add_common(struct genl_info *info, * @info: the Generic NETLINK info block * * Description: - * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message - * and add it to the CIPSO V4 engine. Return zero on success and non-zero on - * error. + * Create a new CIPSO_V4_MAP_TRANS DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. * */ static int netlbl_cipsov4_add_std(struct genl_info *info) @@ -183,7 +164,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) ret_val = -ENOMEM; goto add_std_failure; } - doi_def->type = CIPSO_V4_MAP_STD; + doi_def->type = CIPSO_V4_MAP_TRANS; ret_val = netlbl_cipsov4_add_common(info, doi_def); if (ret_val != 0) @@ -342,7 +323,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) add_std_failure: if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); return ret_val; } @@ -379,7 +360,44 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) return 0; add_pass_failure: - netlbl_cipsov4_doi_free(&doi_def->rcu); + cipso_v4_doi_free(doi_def); + return ret_val; +} + +/** + * netlbl_cipsov4_add_local - Adds a CIPSO V4 DOI definition + * @info: the Generic NETLINK info block + * + * Description: + * Create a new CIPSO_V4_MAP_LOCAL DOI definition based on the given ADD + * message and add it to the CIPSO V4 engine. Return zero on success and + * non-zero on error. + * + */ +static int netlbl_cipsov4_add_local(struct genl_info *info) +{ + int ret_val; + struct cipso_v4_doi *doi_def = NULL; + + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) + return -EINVAL; + + doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); + if (doi_def == NULL) + return -ENOMEM; + doi_def->type = CIPSO_V4_MAP_LOCAL; + + ret_val = netlbl_cipsov4_add_common(info, doi_def); + if (ret_val != 0) + goto add_local_failure; + + ret_val = cipso_v4_doi_add(doi_def); + if (ret_val != 0) + goto add_local_failure; + return 0; + +add_local_failure: + cipso_v4_doi_free(doi_def); return ret_val; } @@ -412,14 +430,18 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: type_str = "pass"; ret_val = netlbl_cipsov4_add_pass(info); break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + ret_val = netlbl_cipsov4_add_local(info); + break; } if (ret_val == 0) atomic_inc(&netlabel_mgmt_protocount); @@ -491,7 +513,7 @@ list_start: doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { ret_val = -EINVAL; - goto list_failure; + goto list_failure_lock; } ret_val = nla_put_u32(ans_skb, NLBL_CIPSOV4_A_MTYPE, doi_def->type); @@ -516,7 +538,7 @@ list_start: nla_nest_end(ans_skb, nla_a); switch (doi_def->type) { - case CIPSO_V4_MAP_STD: + case CIPSO_V4_MAP_TRANS: nla_a = nla_nest_start(ans_skb, NLBL_CIPSOV4_A_MLSLVLLST); if (nla_a == NULL) { ret_val = -ENOMEM; @@ -655,7 +677,7 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, struct netlink_callback *cb) { struct netlbl_cipsov4_doiwalk_arg cb_arg; - int doi_skip = cb->args[0]; + u32 doi_skip = cb->args[0]; cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -668,6 +690,29 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, } /** + * netlbl_cipsov4_remove_cb - netlbl_cipsov4_remove() callback for REMOVE + * @entry: LSM domain mapping entry + * @arg: the netlbl_domhsh_walk_arg structure + * + * Description: + * This function is intended for use by netlbl_cipsov4_remove() as the callback + * for the netlbl_domhsh_walk() function; it removes LSM domain map entries + * which are associated with the CIPSO DOI specified in @arg. Returns zero on + * success, negative values on failure. + * + */ +static int netlbl_cipsov4_remove_cb(struct netlbl_dom_map *entry, void *arg) +{ + struct netlbl_domhsh_walk_arg *cb_arg = arg; + + if (entry->type == NETLBL_NLTYPE_CIPSOV4 && + entry->type_def.cipsov4->doi == cb_arg->doi) + return netlbl_domhsh_remove_entry(entry, cb_arg->audit_info); + + return 0; +} + +/** * netlbl_cipsov4_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block @@ -681,8 +726,11 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; u32 doi = 0; + struct netlbl_domhsh_walk_arg cb_arg; struct audit_buffer *audit_buf; struct netlbl_audit audit_info; + u32 skip_bkt = 0; + u32 skip_chain = 0; if (!info->attrs[NLBL_CIPSOV4_A_DOI]) return -EINVAL; @@ -690,11 +738,15 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); netlbl_netlink_auditinfo(skb, &audit_info); - ret_val = cipso_v4_doi_remove(doi, - &audit_info, - netlbl_cipsov4_doi_free); - if (ret_val == 0) - atomic_dec(&netlabel_mgmt_protocount); + cb_arg.doi = doi; + cb_arg.audit_info = &audit_info; + ret_val = netlbl_domhsh_walk(&skip_bkt, &skip_chain, + netlbl_cipsov4_remove_cb, &cb_arg); + if (ret_val == 0 || ret_val == -ENOENT) { + ret_val = cipso_v4_doi_remove(doi, &audit_info); + if (ret_val == 0) + atomic_dec(&netlabel_mgmt_protocount); + } audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, &audit_info); diff --git a/net/netlabel/netlabel_cipso_v4.h b/net/netlabel/netlabel_cipso_v4.h index 220cb9d06b49..c8a4079261f0 100644 --- a/net/netlabel/netlabel_cipso_v4.h +++ b/net/netlabel/netlabel_cipso_v4.h @@ -45,12 +45,13 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o REMOVE: * Sent by an application to remove a specific DOI mapping table from the @@ -76,12 +77,13 @@ * NLBL_CIPSOV4_A_MTYPE * NLBL_CIPSOV4_A_TAGLST * - * If using CIPSO_V4_MAP_STD the following attributes are required: + * If using CIPSO_V4_MAP_TRANS the following attributes are required: * * NLBL_CIPSOV4_A_MLSLVLLST * NLBL_CIPSOV4_A_MLSCATLST * - * If using CIPSO_V4_MAP_PASS no additional attributes are required. + * If using CIPSO_V4_MAP_PASS or CIPSO_V4_MAP_LOCAL no additional attributes + * are required. * * o LISTALL: * This message is sent by an application to list the valid DOIs on the diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 643c032a3a57..5fadf10e5ddf 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -40,6 +40,7 @@ #include <asm/bug.h> #include "netlabel_mgmt.h" +#include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_user.h" @@ -72,8 +73,28 @@ static struct netlbl_dom_map *netlbl_domhsh_def = NULL; static void netlbl_domhsh_free_entry(struct rcu_head *entry) { struct netlbl_dom_map *ptr; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ ptr = container_of(entry, struct netlbl_dom_map, rcu); + if (ptr->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_safe(iter4, tmp4, + &ptr->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + kfree(netlbl_domhsh_addr4_entry(iter4)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &ptr->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + kfree(netlbl_domhsh_addr6_entry(iter6)); + } +#endif /* IPv6 */ + } kfree(ptr->domain); kfree(ptr); } @@ -115,13 +136,13 @@ static u32 netlbl_domhsh_hash(const char *key) static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain) { u32 bkt; + struct list_head *bkt_list; struct netlbl_dom_map *iter; if (domain != NULL) { bkt = netlbl_domhsh_hash(domain); - list_for_each_entry_rcu(iter, - &rcu_dereference(netlbl_domhsh)->tbl[bkt], - list) + bkt_list = &rcu_dereference(netlbl_domhsh)->tbl[bkt]; + list_for_each_entry_rcu(iter, bkt_list, list) if (iter->valid && strcmp(iter->domain, domain) == 0) return iter; } @@ -156,6 +177,69 @@ static struct netlbl_dom_map *netlbl_domhsh_search_def(const char *domain) return entry; } +/** + * netlbl_domhsh_audit_add - Generate an audit entry for an add event + * @entry: the entry being added + * @addr4: the IPv4 address information + * @addr6: the IPv6 address information + * @result: the result code + * @audit_info: NetLabel audit information + * + * Description: + * Generate an audit record for adding a new NetLabel/LSM mapping entry with + * the given information. Caller is responsibile for holding the necessary + * locks. + * + */ +static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry, + struct netlbl_af4list *addr4, + struct netlbl_af6list *addr6, + int result, + struct netlbl_audit *audit_info) +{ + struct audit_buffer *audit_buf; + struct cipso_v4_doi *cipsov4 = NULL; + u32 type; + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + if (audit_buf != NULL) { + audit_log_format(audit_buf, " nlbl_domain=%s", + entry->domain ? entry->domain : "(default)"); + if (addr4 != NULL) { + struct netlbl_domaddr4_map *map4; + map4 = netlbl_domhsh_addr4_entry(addr4); + type = map4->type; + cipsov4 = map4->type_def.cipsov4; + netlbl_af4list_audit_addr(audit_buf, 0, NULL, + addr4->addr, addr4->mask); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (addr6 != NULL) { + struct netlbl_domaddr6_map *map6; + map6 = netlbl_domhsh_addr6_entry(addr6); + type = map6->type; + netlbl_af6list_audit_addr(audit_buf, 0, NULL, + &addr6->addr, &addr6->mask); +#endif /* IPv6 */ + } else { + type = entry->type; + cipsov4 = entry->type_def.cipsov4; + } + switch (type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + BUG_ON(cipsov4 == NULL); + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + cipsov4->doi); + break; + } + audit_log_format(audit_buf, " res=%u", result == 0 ? 1 : 0); + audit_log_end(audit_buf); + } +} + /* * Domain Hash Table Functions */ @@ -213,74 +297,106 @@ int __init netlbl_domhsh_init(u32 size) int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info) { - int ret_val; - u32 bkt; - struct audit_buffer *audit_buf; - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = 0; - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, - entry->domain); - break; - default: - return -EINVAL; - } - if (ret_val != 0) - return ret_val; - - entry->valid = 1; - INIT_RCU_HEAD(&entry->rcu); + int ret_val = 0; + struct netlbl_dom_map *entry_old; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ rcu_read_lock(); + spin_lock(&netlbl_domhsh_lock); - if (entry->domain != NULL) { - bkt = netlbl_domhsh_hash(entry->domain); - if (netlbl_domhsh_search(entry->domain) == NULL) + if (entry->domain != NULL) + entry_old = netlbl_domhsh_search(entry->domain); + else + entry_old = netlbl_domhsh_search_def(entry->domain); + if (entry_old == NULL) { + entry->valid = 1; + INIT_RCU_HEAD(&entry->rcu); + + if (entry->domain != NULL) { + u32 bkt = netlbl_domhsh_hash(entry->domain); list_add_tail_rcu(&entry->list, &rcu_dereference(netlbl_domhsh)->tbl[bkt]); - else - ret_val = -EEXIST; - } else { - INIT_LIST_HEAD(&entry->list); - if (rcu_dereference(netlbl_domhsh_def) == NULL) + } else { + INIT_LIST_HEAD(&entry->list); rcu_assign_pointer(netlbl_domhsh_def, entry); - else - ret_val = -EEXIST; - } - spin_unlock(&netlbl_domhsh_lock); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); - if (audit_buf != NULL) { - audit_log_format(audit_buf, - " nlbl_domain=%s", - entry->domain ? entry->domain : "(default)"); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " nlbl_protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " nlbl_protocol=cipsov4 cipso_doi=%u", - entry->type_def.cipsov4->doi); - break; } - audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - rcu_read_unlock(); - if (ret_val != 0) { - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain) != 0) - BUG(); - break; + if (entry->type == NETLBL_NLTYPE_ADDRSELECT) { + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + netlbl_domhsh_audit_add(entry, iter4, NULL, + ret_val, audit_info); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + netlbl_domhsh_audit_add(entry, NULL, iter6, + ret_val, audit_info); +#endif /* IPv6 */ + } else + netlbl_domhsh_audit_add(entry, NULL, NULL, + ret_val, audit_info); + } else if (entry_old->type == NETLBL_NLTYPE_ADDRSELECT && + entry->type == NETLBL_NLTYPE_ADDRSELECT) { + struct list_head *old_list4; + struct list_head *old_list6; + + old_list4 = &entry_old->type_def.addrsel->list4; + old_list6 = &entry_old->type_def.addrsel->list6; + + /* we only allow the addition of address selectors if all of + * the selectors do not exist in the existing domain map */ + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) + if (netlbl_af4list_search_exact(iter4->addr, + iter4->mask, + old_list4)) { + ret_val = -EEXIST; + goto add_return; + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) + if (netlbl_af6list_search_exact(&iter6->addr, + &iter6->mask, + old_list6)) { + ret_val = -EEXIST; + goto add_return; + } +#endif /* IPv6 */ + + netlbl_af4list_foreach_safe(iter4, tmp4, + &entry->type_def.addrsel->list4) { + netlbl_af4list_remove_entry(iter4); + iter4->valid = 1; + ret_val = netlbl_af4list_add(iter4, old_list4); + netlbl_domhsh_audit_add(entry_old, iter4, NULL, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; } - } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, + &entry->type_def.addrsel->list6) { + netlbl_af6list_remove_entry(iter6); + iter6->valid = 1; + ret_val = netlbl_af6list_add(iter6, old_list6); + netlbl_domhsh_audit_add(entry_old, NULL, iter6, + ret_val, audit_info); + if (ret_val != 0) + goto add_return; + } +#endif /* IPv6 */ + } else + ret_val = -EINVAL; +add_return: + spin_unlock(&netlbl_domhsh_lock); + rcu_read_unlock(); return ret_val; } @@ -302,35 +418,26 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, } /** - * netlbl_domhsh_remove - Removes an entry from the domain hash table - * @domain: the domain to remove + * netlbl_domhsh_remove_entry - Removes a given entry from the domain table + * @entry: the entry to remove * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the - * lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. + * lower level protocol handler (i.e. CIPSO). Caller is responsible for + * ensuring that the RCU read lock is held. Returns zero on success, negative + * on failure. * */ -int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; - struct netlbl_dom_map *entry; + int ret_val = 0; struct audit_buffer *audit_buf; - rcu_read_lock(); - if (domain) - entry = netlbl_domhsh_search(domain); - else - entry = netlbl_domhsh_search_def(domain); if (entry == NULL) - goto remove_return; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain); - break; - } + return -ENOENT; + spin_lock(&netlbl_domhsh_lock); if (entry->valid) { entry->valid = 0; @@ -338,8 +445,8 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) list_del_rcu(&entry->list); else rcu_assign_pointer(netlbl_domhsh_def, NULL); - ret_val = 0; - } + } else + ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_lock); audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); @@ -351,10 +458,54 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) audit_log_end(audit_buf); } -remove_return: - rcu_read_unlock(); - if (ret_val == 0) + if (ret_val == 0) { + struct netlbl_af4list *iter4; + struct netlbl_domaddr4_map *map4; + + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + map4 = netlbl_domhsh_addr4_entry(iter4); + cipso_v4_doi_putdef(map4->type_def.cipsov4); + } + /* no need to check the IPv6 list since we currently + * support only unlabeled protocols for IPv6 */ + break; + case NETLBL_NLTYPE_CIPSOV4: + cipso_v4_doi_putdef(entry->type_def.cipsov4); + break; + } call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + } + + return ret_val; +} + +/** + * netlbl_domhsh_remove - Removes an entry from the domain hash table + * @domain: the domain to remove + * @audit_info: NetLabel audit information + * + * Description: + * Removes an entry from the domain hash table and handles any updates to the + * lower level protocol handler (i.e. CIPSO). Returns zero on success, + * negative on failure. + * + */ +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) +{ + int ret_val; + struct netlbl_dom_map *entry; + + rcu_read_lock(); + if (domain) + entry = netlbl_domhsh_search(domain); + else + entry = netlbl_domhsh_search_def(domain); + ret_val = netlbl_domhsh_remove_entry(entry, audit_info); + rcu_read_unlock(); + return ret_val; } @@ -389,6 +540,70 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) } /** + * netlbl_domhsh_getentry_af4 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af4list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af4list_search(addr, + &dom_iter->type_def.addrsel->list4); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr4_entry(addr_iter); +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +/** + * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table + * @domain: the domain name to search for + * @addr: the IP address to search for + * + * Description: + * Look through the domain hash table searching for an entry to match @domain + * and @addr, return a pointer to a copy of the entry or NULL. The caller is + * responsible for ensuring that rcu_read_[un]lock() is called. + * + */ +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr) +{ + struct netlbl_dom_map *dom_iter; + struct netlbl_af6list *addr_iter; + + dom_iter = netlbl_domhsh_search_def(domain); + if (dom_iter == NULL) + return NULL; + if (dom_iter->type != NETLBL_NLTYPE_ADDRSELECT) + return NULL; + + addr_iter = netlbl_af6list_search(addr, + &dom_iter->type_def.addrsel->list6); + if (addr_iter == NULL) + return NULL; + + return netlbl_domhsh_addr6_entry(addr_iter); +} +#endif /* IPv6 */ + +/** * netlbl_domhsh_walk - Iterate through the domain mapping hash table * @skip_bkt: the number of buckets to skip at the start * @skip_chain: the number of entries to skip in the first iterated bucket @@ -410,6 +625,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt, { int ret_val = -ENOENT; u32 iter_bkt; + struct list_head *iter_list; struct netlbl_dom_map *iter_entry; u32 chain_cnt = 0; @@ -417,9 +633,8 @@ int netlbl_domhsh_walk(u32 *skip_bkt, for (iter_bkt = *skip_bkt; iter_bkt < rcu_dereference(netlbl_domhsh)->size; iter_bkt++, chain_cnt = 0) { - list_for_each_entry_rcu(iter_entry, - &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt], - list) + iter_list = &rcu_dereference(netlbl_domhsh)->tbl[iter_bkt]; + list_for_each_entry_rcu(iter_entry, iter_list, list) if (iter_entry->valid) { if (chain_cnt++ < *skip_chain) continue; diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 8220990ceb96..bfcb6763a1a1 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -11,7 +11,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -36,16 +36,43 @@ #include <linux/rcupdate.h> #include <linux/list.h> +#include "netlabel_addrlist.h" + /* Domain hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_DOMHSH_BITSIZE 7 -/* Domain mapping definition struct */ +/* Domain mapping definition structures */ +#define netlbl_domhsh_addr4_entry(iter) \ + container_of(iter, struct netlbl_domaddr4_map, list) +struct netlbl_domaddr4_map { + u32 type; + union { + struct cipso_v4_doi *cipsov4; + } type_def; + + struct netlbl_af4list list; +}; +#define netlbl_domhsh_addr6_entry(iter) \ + container_of(iter, struct netlbl_domaddr6_map, list) +struct netlbl_domaddr6_map { + u32 type; + + /* NOTE: no 'type_def' union needed at present since we don't currently + * support any IPv6 labeling protocols */ + + struct netlbl_af6list list; +}; +struct netlbl_domaddr_map { + struct list_head list4; + struct list_head list6; +}; struct netlbl_dom_map { char *domain; u32 type; union { struct cipso_v4_doi *cipsov4; + struct netlbl_domaddr_map *addrsel; } type_def; u32 valid; @@ -61,12 +88,21 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_entry(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); +struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain, + __be32 addr); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, int (*callback) (struct netlbl_dom_map *entry, void *arg), void *cb_arg); +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain, + const struct in6_addr *addr); +#endif /* IPv6 */ + #endif diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 39793a1a93aa..b32eceb3ab0d 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -82,7 +82,7 @@ int netlbl_cfg_unlbl_add_map(const char *domain, entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_unlbl_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) @@ -104,49 +104,6 @@ cfg_unlbl_add_map_failure: } /** - * netlbl_cfg_cipsov4_add - Add a new CIPSOv4 DOI definition - * @doi_def: the DOI definition - * @audit_info: NetLabel audit information - * - * Description: - * Add a new CIPSOv4 DOI definition to the NetLabel subsystem. Returns zero on - * success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_add(struct cipso_v4_doi *doi_def, - struct netlbl_audit *audit_info) -{ - int ret_val; - const char *type_str; - struct audit_buffer *audit_buf; - - ret_val = cipso_v4_doi_add(doi_def); - - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - audit_info); - if (audit_buf != NULL) { - switch (doi_def->type) { - case CIPSO_V4_MAP_STD: - type_str = "std"; - break; - case CIPSO_V4_MAP_PASS: - type_str = "pass"; - break; - default: - type_str = "(unknown)"; - } - audit_log_format(audit_buf, - " cipso_doi=%u cipso_type=%s res=%u", - doi_def->doi, - type_str, - ret_val == 0 ? 1 : 0); - audit_log_end(audit_buf); - } - - return ret_val; -} - -/** * netlbl_cfg_cipsov4_add_map - Add a new CIPSOv4 DOI definition and mapping * @doi_def: the DOI definition * @domain: the domain mapping to add @@ -164,58 +121,71 @@ int netlbl_cfg_cipsov4_add_map(struct cipso_v4_doi *doi_def, struct netlbl_audit *audit_info) { int ret_val = -ENOMEM; + u32 doi; + u32 doi_type; struct netlbl_dom_map *entry; + const char *type_str; + struct audit_buffer *audit_buf; + + doi = doi_def->doi; + doi_type = doi_def->type; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) - goto cfg_cipsov4_add_map_failure; + return -ENOMEM; if (domain != NULL) { entry->domain = kstrdup(domain, GFP_ATOMIC); if (entry->domain == NULL) goto cfg_cipsov4_add_map_failure; } - entry->type = NETLBL_NLTYPE_CIPSOV4; - entry->type_def.cipsov4 = doi_def; - - /* Grab a RCU read lock here so nothing happens to the doi_def variable - * between adding it to the CIPSOv4 protocol engine and adding a - * domain mapping for it. */ - rcu_read_lock(); - ret_val = netlbl_cfg_cipsov4_add(doi_def, audit_info); + ret_val = cipso_v4_doi_add(doi_def); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_unlock; + goto cfg_cipsov4_add_map_failure_remove_doi; + entry->type = NETLBL_NLTYPE_CIPSOV4; + entry->type_def.cipsov4 = cipso_v4_doi_getdef(doi); + if (entry->type_def.cipsov4 == NULL) { + ret_val = -ENOENT; + goto cfg_cipsov4_add_map_failure_remove_doi; + } ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) - goto cfg_cipsov4_add_map_failure_remove_doi; - rcu_read_unlock(); + goto cfg_cipsov4_add_map_failure_release_doi; - return 0; +cfg_cipsov4_add_map_return: + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + audit_info); + if (audit_buf != NULL) { + switch (doi_type) { + case CIPSO_V4_MAP_TRANS: + type_str = "trans"; + break; + case CIPSO_V4_MAP_PASS: + type_str = "pass"; + break; + case CIPSO_V4_MAP_LOCAL: + type_str = "local"; + break; + default: + type_str = "(unknown)"; + } + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, type_str, ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + } + return ret_val; + +cfg_cipsov4_add_map_failure_release_doi: + cipso_v4_doi_putdef(doi_def); cfg_cipsov4_add_map_failure_remove_doi: - cipso_v4_doi_remove(doi_def->doi, audit_info, netlbl_cipsov4_doi_free); -cfg_cipsov4_add_map_failure_unlock: - rcu_read_unlock(); + cipso_v4_doi_remove(doi, audit_info); cfg_cipsov4_add_map_failure: if (entry != NULL) kfree(entry->domain); kfree(entry); - return ret_val; -} - -/** - * netlbl_cfg_cipsov4_del - Removean existing CIPSOv4 DOI definition - * @doi: the CIPSO DOI value - * @audit_info: NetLabel audit information - * - * Description: - * Removes an existing CIPSOv4 DOI definition from the NetLabel subsystem. - * Returns zero on success, negative values on failure. - * - */ -int netlbl_cfg_cipsov4_del(u32 doi, struct netlbl_audit *audit_info) -{ - return cipso_v4_doi_remove(doi, audit_info, netlbl_cipsov4_doi_free); + goto cfg_cipsov4_add_map_return; } /* @@ -452,7 +422,9 @@ int netlbl_enabled(void) * Attach the correct label to the given socket using the security attributes * specified in @secattr. This function requires exclusive access to @sk, * which means it either needs to be in the process of being created or locked. - * Returns zero on success, negative values on failure. + * Returns zero on success, -EDESTADDRREQ if the domain is configured to use + * network address selectors (can't blindly label the socket), and negative + * values on all other failures. * */ int netlbl_sock_setattr(struct sock *sk, @@ -466,6 +438,9 @@ int netlbl_sock_setattr(struct sock *sk, if (dom_entry == NULL) goto socket_setattr_return; switch (dom_entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + ret_val = -EDESTADDRREQ; + break; case NETLBL_NLTYPE_CIPSOV4: ret_val = cipso_v4_sock_setattr(sk, dom_entry->type_def.cipsov4, @@ -484,6 +459,20 @@ socket_setattr_return: } /** + * netlbl_sock_delattr - Delete all the NetLabel labels on a socket + * @sk: the socket + * + * Description: + * Remove all the NetLabel labeling from @sk. The caller is responsible for + * ensuring that @sk is locked. + * + */ +void netlbl_sock_delattr(struct sock *sk) +{ + cipso_v4_sock_delattr(sk); +} + +/** * netlbl_sock_getattr - Determine the security attributes of a sock * @sk: the sock * @secattr: the security attributes @@ -501,6 +490,128 @@ int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) } /** + * netlbl_conn_setattr - Label a connected socket using the correct protocol + * @sk: the socket to label + * @addr: the destination address + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given connected socket using the security + * attributes specified in @secattr. The caller is responsible for ensuring + * that @sk is locked. Returns zero on success, negative values on failure. + * + */ +int netlbl_conn_setattr(struct sock *sk, + struct sockaddr *addr, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct sockaddr_in *addr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (addr->sa_family) { + case AF_INET: + addr4 = (struct sockaddr_in *)addr; + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + addr4->sin_addr.s_addr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto conn_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_sock_setattr(sk, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + cipso_v4_sock_delattr(sk); + ret_val = 0; + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +conn_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** + * netlbl_skbuff_setattr - Label a packet using the correct protocol + * @skb: the packet + * @family: protocol family + * @secattr: the security attributes + * + * Description: + * Attach the correct label to the given packet using the security attributes + * specified in @secattr. Returns zero on success, negative values on failure. + * + */ +int netlbl_skbuff_setattr(struct sk_buff *skb, + u16 family, + const struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + struct iphdr *hdr4; + struct netlbl_domaddr4_map *af4_entry; + + rcu_read_lock(); + switch (family) { + case AF_INET: + hdr4 = ip_hdr(skb); + af4_entry = netlbl_domhsh_getentry_af4(secattr->domain, + hdr4->daddr); + if (af4_entry == NULL) { + ret_val = -ENOENT; + goto skbuff_setattr_return; + } + switch (af4_entry->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = cipso_v4_skbuff_setattr(skb, + af4_entry->type_def.cipsov4, + secattr); + break; + case NETLBL_NLTYPE_UNLABELED: + /* just delete the protocols we support for right now + * but we could remove other protocols if needed */ + ret_val = cipso_v4_skbuff_delattr(skb); + break; + default: + ret_val = -ENOENT; + } + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case AF_INET6: + /* since we don't support any IPv6 labeling protocols right + * now we can optimize everything away until we do */ + ret_val = 0; + break; +#endif /* IPv6 */ + default: + ret_val = 0; + } + +skbuff_setattr_return: + rcu_read_unlock(); + return ret_val; +} + +/** * netlbl_skbuff_getattr - Determine the security attributes of a packet * @skb: the packet * @family: protocol family @@ -528,6 +639,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * netlbl_skbuff_err - Handle a LSM error on a sk_buff * @skb: the packet * @error: the error code + * @gateway: true if host is acting as a gateway, false otherwise * * Description: * Deal with a LSM problem when handling the packet in @skb, typically this is @@ -535,10 +647,10 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb, * according to the packet's labeling protocol. * */ -void netlbl_skbuff_err(struct sk_buff *skb, int error) +void netlbl_skbuff_err(struct sk_buff *skb, int error, int gateway) { if (CIPSO_V4_OPTEXIST(skb)) - cipso_v4_error(skb, error, 0); + cipso_v4_error(skb, error, gateway); } /** diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 44be5d5261f4..0a0ef17b2a40 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006, 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,9 +32,13 @@ #include <linux/socket.h> #include <linux/string.h> #include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/in6.h> #include <net/sock.h> #include <net/netlink.h> #include <net/genetlink.h> +#include <net/ip.h> +#include <net/ipv6.h> #include <net/netlabel.h> #include <net/cipso_ipv4.h> #include <asm/atomic.h> @@ -71,86 +75,337 @@ static const struct nla_policy netlbl_mgmt_genl_policy[NLBL_MGMT_A_MAX + 1] = { }; /* - * NetLabel Command Handlers + * Helper Functions */ /** * netlbl_mgmt_add - Handle an ADD message - * @skb: the NETLINK buffer * @info: the Generic NETLINK info block + * @audit_info: NetLabel audit information * * Description: - * Process a user generated ADD message and add the domains from the message - * to the hash table. See netlabel.h for a description of the message format. - * Returns zero on success, negative values on failure. + * Helper function for the ADD and ADDDEF messages to add the domain mappings + * from the message to the hash table. See netlabel.h for a description of the + * message format. Returns zero on success, negative values on failure. * */ -static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +static int netlbl_mgmt_add_common(struct genl_info *info, + struct netlbl_audit *audit_info) { int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; - size_t tmp_size; + struct netlbl_domaddr_map *addrmap = NULL; + struct cipso_v4_doi *cipsov4 = NULL; u32 tmp_val; - struct netlbl_audit audit_info; - - if (!info->attrs[NLBL_MGMT_A_DOMAIN] || - !info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto add_failure; - - netlbl_netlink_auditinfo(skb, &audit_info); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; goto add_failure; } - tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); - entry->domain = kmalloc(tmp_size, GFP_KERNEL); - if (entry->domain == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - nla_strlcpy(entry->domain, info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + if (info->attrs[NLBL_MGMT_A_DOMAIN]) { + size_t tmp_size = nla_len(info->attrs[NLBL_MGMT_A_DOMAIN]); + entry->domain = kmalloc(tmp_size, GFP_KERNEL); + if (entry->domain == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + nla_strlcpy(entry->domain, + info->attrs[NLBL_MGMT_A_DOMAIN], tmp_size); + } + + /* NOTE: internally we allow/use a entry->type value of + * NETLBL_NLTYPE_ADDRSELECT but we don't currently allow users + * to pass that as a protocol value because we need to know the + * "real" protocol */ switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) goto add_failure; tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); + cipsov4 = cipso_v4_doi_getdef(tmp_val); + if (cipsov4 == NULL) goto add_failure; - } - ret_val = netlbl_domhsh_add(entry, &audit_info); - rcu_read_unlock(); + entry->type_def.cipsov4 = cipsov4; break; default: goto add_failure; } + + if (info->attrs[NLBL_MGMT_A_IPV4ADDR]) { + struct in_addr *addr; + struct in_addr *mask; + struct netlbl_domaddr4_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4ADDR]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV4MASK]) != + sizeof(struct in_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV4ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV4MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + map->list.addr = addr->s_addr & mask->s_addr; + map->list.mask = mask->s_addr; + map->list.valid = 1; + map->type = entry->type; + if (cipsov4) + map->type_def.cipsov4 = cipsov4; + + ret_val = netlbl_af4list_add(&map->list, &addrmap->list4); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + } else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) { + struct in6_addr *addr; + struct in6_addr *mask; + struct netlbl_domaddr6_map *map; + + addrmap = kzalloc(sizeof(*addrmap), GFP_KERNEL); + if (addrmap == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + INIT_LIST_HEAD(&addrmap->list4); + INIT_LIST_HEAD(&addrmap->list6); + + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6ADDR]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + if (nla_len(info->attrs[NLBL_MGMT_A_IPV6MASK]) != + sizeof(struct in6_addr)) { + ret_val = -EINVAL; + goto add_failure; + } + addr = nla_data(info->attrs[NLBL_MGMT_A_IPV6ADDR]); + mask = nla_data(info->attrs[NLBL_MGMT_A_IPV6MASK]); + + map = kzalloc(sizeof(*map), GFP_KERNEL); + if (map == NULL) { + ret_val = -ENOMEM; + goto add_failure; + } + ipv6_addr_copy(&map->list.addr, addr); + map->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; + map->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; + map->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; + map->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&map->list.mask, mask); + map->list.valid = 1; + map->type = entry->type; + + ret_val = netlbl_af6list_add(&map->list, &addrmap->list6); + if (ret_val != 0) { + kfree(map); + goto add_failure; + } + + entry->type = NETLBL_NLTYPE_ADDRSELECT; + entry->type_def.addrsel = addrmap; +#endif /* IPv6 */ + } + + ret_val = netlbl_domhsh_add(entry, audit_info); if (ret_val != 0) goto add_failure; return 0; add_failure: + if (cipsov4) + cipso_v4_doi_putdef(cipsov4); if (entry) kfree(entry->domain); + kfree(addrmap); kfree(entry); return ret_val; } /** + * netlbl_mgmt_listentry - List a NetLabel/LSM domain map entry + * @skb: the NETLINK buffer + * @entry: the map entry + * + * Description: + * This function is a helper function used by the LISTALL and LISTDEF command + * handlers. The caller is responsibile for ensuring that the RCU read lock + * is held. Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_listentry(struct sk_buff *skb, + struct netlbl_dom_map *entry) +{ + int ret_val = 0; + struct nlattr *nla_a; + struct nlattr *nla_b; + struct netlbl_af4list *iter4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; +#endif + + if (entry->domain != NULL) { + ret_val = nla_put_string(skb, + NLBL_MGMT_A_DOMAIN, entry->domain); + if (ret_val != 0) + return ret_val; + } + + switch (entry->type) { + case NETLBL_NLTYPE_ADDRSELECT: + nla_a = nla_nest_start(skb, NLBL_MGMT_A_SELECTORLIST); + if (nla_a == NULL) + return -ENOMEM; + + netlbl_af4list_foreach_rcu(iter4, + &entry->type_def.addrsel->list4) { + struct netlbl_domaddr4_map *map4; + struct in_addr addr_struct; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + addr_struct.s_addr = iter4->addr; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4ADDR, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + addr_struct.s_addr = iter4->mask; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV4MASK, + sizeof(struct in_addr), + &addr_struct); + if (ret_val != 0) + return ret_val; + map4 = netlbl_domhsh_addr4_entry(iter4); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map4->type); + if (ret_val != 0) + return ret_val; + switch (map4->type) { + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + map4->type_def.cipsov4->doi); + if (ret_val != 0) + return ret_val; + break; + } + + nla_nest_end(skb, nla_b); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, + &entry->type_def.addrsel->list6) { + struct netlbl_domaddr6_map *map6; + + nla_b = nla_nest_start(skb, NLBL_MGMT_A_ADDRSELECTOR); + if (nla_b == NULL) + return -ENOMEM; + + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6ADDR, + sizeof(struct in6_addr), + &iter6->addr); + if (ret_val != 0) + return ret_val; + ret_val = nla_put(skb, NLBL_MGMT_A_IPV6MASK, + sizeof(struct in6_addr), + &iter6->mask); + if (ret_val != 0) + return ret_val; + map6 = netlbl_domhsh_addr6_entry(iter6); + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, + map6->type); + if (ret_val != 0) + return ret_val; + + nla_nest_end(skb, nla_b); + } +#endif /* IPv6 */ + + nla_nest_end(skb, nla_a); + break; + case NETLBL_NLTYPE_UNLABELED: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + break; + case NETLBL_NLTYPE_CIPSOV4: + ret_val = nla_put_u32(skb, NLBL_MGMT_A_PROTOCOL, entry->type); + if (ret_val != 0) + return ret_val; + ret_val = nla_put_u32(skb, NLBL_MGMT_A_CV4DOI, + entry->type_def.cipsov4->doi); + break; + } + + return ret_val; +} + +/* + * NetLabel Command Handlers + */ + +/** + * netlbl_mgmt_add - Handle an ADD message + * @skb: the NETLINK buffer + * @info: the Generic NETLINK info block + * + * Description: + * Process a user generated ADD message and add the domains from the message + * to the hash table. See netlabel.h for a description of the message format. + * Returns zero on success, negative values on failure. + * + */ +static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) +{ + struct netlbl_audit audit_info; + + if ((!info->attrs[NLBL_MGMT_A_DOMAIN]) || + (!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_mgmt_add_common(info, &audit_info); +} + +/** * netlbl_mgmt_remove - Handle a REMOVE message * @skb: the NETLINK buffer * @info: the Generic NETLINK info block @@ -198,23 +453,9 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) if (data == NULL) goto listall_cb_failure; - ret_val = nla_put_string(cb_arg->skb, - NLBL_MGMT_A_DOMAIN, - entry->domain); + ret_val = netlbl_mgmt_listentry(cb_arg->skb, entry); if (ret_val != 0) goto listall_cb_failure; - ret_val = nla_put_u32(cb_arg->skb, NLBL_MGMT_A_PROTOCOL, entry->type); - if (ret_val != 0) - goto listall_cb_failure; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(cb_arg->skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listall_cb_failure; - break; - } cb_arg->seq++; return genlmsg_end(cb_arg->skb, data); @@ -268,56 +509,22 @@ static int netlbl_mgmt_listall(struct sk_buff *skb, */ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; - struct netlbl_dom_map *entry = NULL; - u32 tmp_val; struct netlbl_audit audit_info; - if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) - goto adddef_failure; + if ((!info->attrs[NLBL_MGMT_A_PROTOCOL]) || + (info->attrs[NLBL_MGMT_A_IPV4ADDR] && + info->attrs[NLBL_MGMT_A_IPV6ADDR]) || + (info->attrs[NLBL_MGMT_A_IPV4MASK] && + info->attrs[NLBL_MGMT_A_IPV6MASK]) || + ((info->attrs[NLBL_MGMT_A_IPV4ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV4MASK] != NULL)) || + ((info->attrs[NLBL_MGMT_A_IPV6ADDR] != NULL) ^ + (info->attrs[NLBL_MGMT_A_IPV6MASK] != NULL))) + return -EINVAL; netlbl_netlink_auditinfo(skb, &audit_info); - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto adddef_failure; - } - entry->type = nla_get_u32(info->attrs[NLBL_MGMT_A_PROTOCOL]); - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (!info->attrs[NLBL_MGMT_A_CV4DOI]) - goto adddef_failure; - - tmp_val = nla_get_u32(info->attrs[NLBL_MGMT_A_CV4DOI]); - /* We should be holding a rcu_read_lock() here while we hold - * the result but since the entry will always be deleted when - * the CIPSO DOI is deleted we aren't going to keep the - * lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); - goto adddef_failure; - } - ret_val = netlbl_domhsh_add_default(entry, &audit_info); - rcu_read_unlock(); - break; - default: - goto adddef_failure; - } - if (ret_val != 0) - goto adddef_failure; - - return 0; - -adddef_failure: - kfree(entry); - return ret_val; + return netlbl_mgmt_add_common(info, &audit_info); } /** @@ -371,19 +578,10 @@ static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) ret_val = -ENOENT; goto listdef_failure_lock; } - ret_val = nla_put_u32(ans_skb, NLBL_MGMT_A_PROTOCOL, entry->type); - if (ret_val != 0) - goto listdef_failure_lock; - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = nla_put_u32(ans_skb, - NLBL_MGMT_A_CV4DOI, - entry->type_def.cipsov4->doi); - if (ret_val != 0) - goto listdef_failure_lock; - break; - } + ret_val = netlbl_mgmt_listentry(ans_skb, entry); rcu_read_unlock(); + if (ret_val != 0) + goto listdef_failure; genlmsg_end(ans_skb, data); return genlmsg_reply(ans_skb, info); diff --git a/net/netlabel/netlabel_mgmt.h b/net/netlabel/netlabel_mgmt.h index a43bff169d6b..05d96431f819 100644 --- a/net/netlabel/netlabel_mgmt.h +++ b/net/netlabel/netlabel_mgmt.h @@ -45,6 +45,16 @@ * NLBL_MGMT_A_DOMAIN * NLBL_MGMT_A_PROTOCOL * + * If IPv4 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV4ADDR + * NLBL_MGMT_A_IPV4MASK + * + * If IPv6 is specified the following attributes are required: + * + * NLBL_MGMT_A_IPV6ADDR + * NLBL_MGMT_A_IPV6MASK + * * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: * * NLBL_MGMT_A_CV4DOI @@ -68,13 +78,24 @@ * Required attributes: * * NLBL_MGMT_A_DOMAIN + * + * If the IP address selectors are not used the following attribute is + * required: + * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o ADDDEF: * Sent by an application to set the default domain mapping for the NetLabel @@ -100,15 +121,23 @@ * application there is no payload. On success the kernel should send a * response using the following format. * - * Required attributes: + * If the IP address selectors are not used the following attribute is + * required: * * NLBL_MGMT_A_PROTOCOL * - * If using NETLBL_NLTYPE_CIPSOV4 the following attributes are required: + * If the IP address selectors are used then the following attritbute is + * required: + * + * NLBL_MGMT_A_SELECTORLIST + * + * If the mapping is using the NETLBL_NLTYPE_CIPSOV4 type then the following + * attributes are required: * * NLBL_MGMT_A_CV4DOI * - * If using NETLBL_NLTYPE_UNLABELED no other attributes are required. + * If the mapping is using the NETLBL_NLTYPE_UNLABELED type no other + * attributes are required. * * o PROTOCOLS: * Sent by an application to request a list of configured NetLabel protocols @@ -162,6 +191,26 @@ enum { NLBL_MGMT_A_CV4DOI, /* (NLA_U32) * the CIPSOv4 DOI value */ + NLBL_MGMT_A_IPV6ADDR, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address */ + NLBL_MGMT_A_IPV6MASK, + /* (NLA_BINARY, struct in6_addr) + * an IPv6 address mask */ + NLBL_MGMT_A_IPV4ADDR, + /* (NLA_BINARY, struct in_addr) + * an IPv4 address */ + NLBL_MGMT_A_IPV4MASK, + /* (NLA_BINARY, struct in_addr) + * and IPv4 address mask */ + NLBL_MGMT_A_ADDRSELECTOR, + /* (NLA_NESTED) + * an IP address selector, must contain an address, mask, and protocol + * attribute plus any protocol specific attributes */ + NLBL_MGMT_A_SELECTORLIST, + /* (NLA_NESTED) + * the selector list, there must be at least one + * NLBL_MGMT_A_ADDRSELECTOR attribute */ __NLBL_MGMT_A_MAX, }; #define NLBL_MGMT_A_MAX (__NLBL_MGMT_A_MAX - 1) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 921c118ead89..e8a5c32b0f10 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -10,7 +10,7 @@ */ /* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2007 + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - 2008 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -54,6 +54,7 @@ #include <asm/atomic.h> #include "netlabel_user.h" +#include "netlabel_addrlist.h" #include "netlabel_domainhash.h" #include "netlabel_unlabeled.h" #include "netlabel_mgmt.h" @@ -76,22 +77,20 @@ struct netlbl_unlhsh_tbl { struct list_head *tbl; u32 size; }; +#define netlbl_unlhsh_addr4_entry(iter) \ + container_of(iter, struct netlbl_unlhsh_addr4, list) struct netlbl_unlhsh_addr4 { - __be32 addr; - __be32 mask; u32 secid; - u32 valid; - struct list_head list; + struct netlbl_af4list list; struct rcu_head rcu; }; +#define netlbl_unlhsh_addr6_entry(iter) \ + container_of(iter, struct netlbl_unlhsh_addr6, list) struct netlbl_unlhsh_addr6 { - struct in6_addr addr; - struct in6_addr mask; u32 secid; - u32 valid; - struct list_head list; + struct netlbl_af6list list; struct rcu_head rcu; }; struct netlbl_unlhsh_iface { @@ -147,76 +146,6 @@ static const struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1 }; /* - * Audit Helper Functions - */ - -/** - * netlbl_unlabel_audit_addr4 - Audit an IPv4 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv4 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr4(struct audit_buffer *audit_buf, - const char *dev, - __be32 addr, __be32 mask) -{ - u32 mask_val = ntohl(mask); - - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIPQUAD_FMT, NIPQUAD(addr)); - if (mask_val != 0xffffffff) { - u32 mask_len = 0; - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -/** - * netlbl_unlabel_audit_addr6 - Audit an IPv6 address - * @audit_buf: audit buffer - * @dev: network interface - * @addr: IP address - * @mask: IP address mask - * - * Description: - * Write the IPv6 address and address mask, if necessary, to @audit_buf. - * - */ -static void netlbl_unlabel_audit_addr6(struct audit_buffer *audit_buf, - const char *dev, - const struct in6_addr *addr, - const struct in6_addr *mask) -{ - if (dev != NULL) - audit_log_format(audit_buf, " netif=%s", dev); - audit_log_format(audit_buf, " src=" NIP6_FMT, NIP6(*addr)); - if (ntohl(mask->s6_addr32[3]) != 0xffffffff) { - u32 mask_len = 0; - u32 mask_val; - int iter = -1; - while (ntohl(mask->s6_addr32[++iter]) == 0xffffffff) - mask_len += 32; - mask_val = ntohl(mask->s6_addr32[iter]); - while (mask_val > 0) { - mask_val <<= 1; - mask_len++; - } - audit_log_format(audit_buf, " src_prefixlen=%d", mask_len); - } -} -#endif /* IPv6 */ - -/* * Unlabeled Connection Hash Table Functions */ @@ -274,26 +203,28 @@ static void netlbl_unlhsh_free_addr6(struct rcu_head *entry) static void netlbl_unlhsh_free_iface(struct rcu_head *entry) { struct netlbl_unlhsh_iface *iface; - struct netlbl_unlhsh_addr4 *iter4; - struct netlbl_unlhsh_addr4 *tmp4; - struct netlbl_unlhsh_addr6 *iter6; - struct netlbl_unlhsh_addr6 *tmp6; + struct netlbl_af4list *iter4; + struct netlbl_af4list *tmp4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; + struct netlbl_af6list *tmp6; +#endif /* IPv6 */ iface = container_of(entry, struct netlbl_unlhsh_iface, rcu); /* no need for locks here since we are the only one with access to this * structure */ - list_for_each_entry_safe(iter4, tmp4, &iface->addr4_list, list) - if (iter4->valid) { - list_del_rcu(&iter4->list); - kfree(iter4); - } - list_for_each_entry_safe(iter6, tmp6, &iface->addr6_list, list) - if (iter6->valid) { - list_del_rcu(&iter6->list); - kfree(iter6); - } + netlbl_af4list_foreach_safe(iter4, tmp4, &iface->addr4_list) { + netlbl_af4list_remove_entry(iter4); + kfree(netlbl_unlhsh_addr4_entry(iter4)); + } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) { + netlbl_af6list_remove_entry(iter6); + kfree(netlbl_unlhsh_addr6_entry(iter6)); + } +#endif /* IPv6 */ kfree(iface); } @@ -316,59 +247,6 @@ static u32 netlbl_unlhsh_hash(int ifindex) } /** - * netlbl_unlhsh_search_addr4 - Search for a matching IPv4 address entry - * @addr: IPv4 address - * @iface: the network interface entry - * - * Description: - * Searches the IPv4 address list of the network interface specified by @iface. - * If a matching address entry is found it is returned, otherwise NULL is - * returned. The caller is responsible for calling the rcu_read_[un]lock() - * functions. - * - */ -static struct netlbl_unlhsh_addr4 *netlbl_unlhsh_search_addr4( - __be32 addr, - const struct netlbl_unlhsh_iface *iface) -{ - struct netlbl_unlhsh_addr4 *iter; - - list_for_each_entry_rcu(iter, &iface->addr4_list, list) - if (iter->valid && (addr & iter->mask) == iter->addr) - return iter; - - return NULL; -} - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -/** - * netlbl_unlhsh_search_addr6 - Search for a matching IPv6 address entry - * @addr: IPv6 address - * @iface: the network interface entry - * - * Description: - * Searches the IPv6 address list of the network interface specified by @iface. - * If a matching address entry is found it is returned, otherwise NULL is - * returned. The caller is responsible for calling the rcu_read_[un]lock() - * functions. - * - */ -static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( - const struct in6_addr *addr, - const struct netlbl_unlhsh_iface *iface) -{ - struct netlbl_unlhsh_addr6 *iter; - - list_for_each_entry_rcu(iter, &iface->addr6_list, list) - if (iter->valid && - ipv6_masked_addr_cmp(&iter->addr, &iter->mask, addr) == 0) - return iter; - - return NULL; -} -#endif /* IPv6 */ - -/** * netlbl_unlhsh_search_iface - Search for a matching interface entry * @ifindex: the network interface * @@ -381,12 +259,12 @@ static struct netlbl_unlhsh_addr6 *netlbl_unlhsh_search_addr6( static struct netlbl_unlhsh_iface *netlbl_unlhsh_search_iface(int ifindex) { u32 bkt; + struct list_head *bkt_list; struct netlbl_unlhsh_iface *iter; bkt = netlbl_unlhsh_hash(ifindex); - list_for_each_entry_rcu(iter, - &rcu_dereference(netlbl_unlhsh)->tbl[bkt], - list) + bkt_list = &rcu_dereference(netlbl_unlhsh)->tbl[bkt]; + list_for_each_entry_rcu(iter, bkt_list, list) if (iter->valid && iter->ifindex == ifindex) return iter; @@ -439,43 +317,26 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface, const struct in_addr *mask, u32 secid) { + int ret_val; struct netlbl_unlhsh_addr4 *entry; - struct netlbl_unlhsh_addr4 *iter; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; - entry->addr = addr->s_addr & mask->s_addr; - entry->mask = mask->s_addr; - entry->secid = secid; - entry->valid = 1; + entry->list.addr = addr->s_addr & mask->s_addr; + entry->list.mask = mask->s_addr; + entry->list.valid = 1; INIT_RCU_HEAD(&entry->rcu); + entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); - iter = netlbl_unlhsh_search_addr4(entry->addr, iface); - if (iter != NULL && - iter->addr == addr->s_addr && iter->mask == mask->s_addr) { - spin_unlock(&netlbl_unlhsh_lock); - kfree(entry); - return -EEXIST; - } - /* in order to speed up address searches through the list (the common - * case) we need to keep the list in order based on the size of the - * address mask such that the entry with the widest mask (smallest - * numerical value) appears first in the list */ - list_for_each_entry_rcu(iter, &iface->addr4_list, list) - if (iter->valid && - ntohl(entry->mask) > ntohl(iter->mask)) { - __list_add_rcu(&entry->list, - iter->list.prev, - &iter->list); - spin_unlock(&netlbl_unlhsh_lock); - return 0; - } - list_add_tail_rcu(&entry->list, &iface->addr4_list); + ret_val = netlbl_af4list_add(&entry->list, &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); - return 0; + + if (ret_val != 0) + kfree(entry); + return ret_val; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -498,47 +359,29 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface, const struct in6_addr *mask, u32 secid) { + int ret_val; struct netlbl_unlhsh_addr6 *entry; - struct netlbl_unlhsh_addr6 *iter; entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (entry == NULL) return -ENOMEM; - ipv6_addr_copy(&entry->addr, addr); - entry->addr.s6_addr32[0] &= mask->s6_addr32[0]; - entry->addr.s6_addr32[1] &= mask->s6_addr32[1]; - entry->addr.s6_addr32[2] &= mask->s6_addr32[2]; - entry->addr.s6_addr32[3] &= mask->s6_addr32[3]; - ipv6_addr_copy(&entry->mask, mask); - entry->secid = secid; - entry->valid = 1; + ipv6_addr_copy(&entry->list.addr, addr); + entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0]; + entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1]; + entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2]; + entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3]; + ipv6_addr_copy(&entry->list.mask, mask); + entry->list.valid = 1; INIT_RCU_HEAD(&entry->rcu); + entry->secid = secid; spin_lock(&netlbl_unlhsh_lock); - iter = netlbl_unlhsh_search_addr6(&entry->addr, iface); - if (iter != NULL && - (ipv6_addr_equal(&iter->addr, addr) && - ipv6_addr_equal(&iter->mask, mask))) { - spin_unlock(&netlbl_unlhsh_lock); - kfree(entry); - return -EEXIST; - } - /* in order to speed up address searches through the list (the common - * case) we need to keep the list in order based on the size of the - * address mask such that the entry with the widest mask (smallest - * numerical value) appears first in the list */ - list_for_each_entry_rcu(iter, &iface->addr6_list, list) - if (iter->valid && - ipv6_addr_cmp(&entry->mask, &iter->mask) > 0) { - __list_add_rcu(&entry->list, - iter->list.prev, - &iter->list); - spin_unlock(&netlbl_unlhsh_lock); - return 0; - } - list_add_tail_rcu(&entry->list, &iface->addr6_list); + ret_val = netlbl_af6list_add(&entry->list, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); + + if (ret_val != 0) + kfree(entry); return 0; } #endif /* IPv6 */ @@ -658,10 +501,10 @@ static int netlbl_unlhsh_add(struct net *net, mask4 = (struct in_addr *)mask; ret_val = netlbl_unlhsh_add_addr4(iface, addr4, mask4, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr4(audit_buf, - dev_name, - addr4->s_addr, - mask4->s_addr); + netlbl_af4list_audit_addr(audit_buf, 1, + dev_name, + addr4->s_addr, + mask4->s_addr); break; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -672,9 +515,9 @@ static int netlbl_unlhsh_add(struct net *net, mask6 = (struct in6_addr *)mask; ret_val = netlbl_unlhsh_add_addr6(iface, addr6, mask6, secid); if (audit_buf != NULL) - netlbl_unlabel_audit_addr6(audit_buf, - dev_name, - addr6, mask6); + netlbl_af6list_audit_addr(audit_buf, 1, + dev_name, + addr6, mask6); break; } #endif /* IPv6 */ @@ -719,35 +562,34 @@ static int netlbl_unlhsh_remove_addr4(struct net *net, const struct in_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; + int ret_val = 0; + struct netlbl_af4list *list_entry; struct netlbl_unlhsh_addr4 *entry; - struct audit_buffer *audit_buf = NULL; + struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx = NULL; + char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); - entry = netlbl_unlhsh_search_addr4(addr->s_addr, iface); - if (entry != NULL && - entry->addr == addr->s_addr && entry->mask == mask->s_addr) { - entry->valid = 0; - list_del_rcu(&entry->list); - ret_val = 0; - } + list_entry = netlbl_af4list_remove(addr->s_addr, mask->s_addr, + &iface->addr4_list); spin_unlock(&netlbl_unlhsh_lock); + if (list_entry == NULL) + ret_val = -ENOENT; + entry = netlbl_unlhsh_addr4_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr4(audit_buf, - (dev != NULL ? dev->name : NULL), - entry->addr, entry->mask); + netlbl_af4list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr->s_addr, mask->s_addr); if (dev != NULL) dev_put(dev); - if (security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } @@ -781,36 +623,33 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, const struct in6_addr *mask, struct netlbl_audit *audit_info) { - int ret_val = -ENOENT; + int ret_val = 0; + struct netlbl_af6list *list_entry; struct netlbl_unlhsh_addr6 *entry; - struct audit_buffer *audit_buf = NULL; + struct audit_buffer *audit_buf; struct net_device *dev; - char *secctx = NULL; + char *secctx; u32 secctx_len; spin_lock(&netlbl_unlhsh_lock); - entry = netlbl_unlhsh_search_addr6(addr, iface); - if (entry != NULL && - (ipv6_addr_equal(&entry->addr, addr) && - ipv6_addr_equal(&entry->mask, mask))) { - entry->valid = 0; - list_del_rcu(&entry->list); - ret_val = 0; - } + list_entry = netlbl_af6list_remove(addr, mask, &iface->addr6_list); spin_unlock(&netlbl_unlhsh_lock); + if (list_entry == NULL) + ret_val = -ENOENT; + entry = netlbl_unlhsh_addr6_entry(list_entry); audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_STCDEL, audit_info); if (audit_buf != NULL) { dev = dev_get_by_index(net, iface->ifindex); - netlbl_unlabel_audit_addr6(audit_buf, - (dev != NULL ? dev->name : NULL), - addr, mask); + netlbl_af6list_audit_addr(audit_buf, 1, + (dev != NULL ? dev->name : NULL), + addr, mask); if (dev != NULL) dev_put(dev); - if (security_secid_to_secctx(entry->secid, - &secctx, - &secctx_len) == 0) { + if (entry && security_secid_to_secctx(entry->secid, + &secctx, + &secctx_len) == 0) { audit_log_format(audit_buf, " sec_obj=%s", secctx); security_release_secctx(secctx, secctx_len); } @@ -836,16 +675,18 @@ static int netlbl_unlhsh_remove_addr6(struct net *net, */ static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface) { - struct netlbl_unlhsh_addr4 *iter4; - struct netlbl_unlhsh_addr6 *iter6; + struct netlbl_af4list *iter4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *iter6; +#endif /* IPv6 */ spin_lock(&netlbl_unlhsh_lock); - list_for_each_entry_rcu(iter4, &iface->addr4_list, list) - if (iter4->valid) - goto unlhsh_condremove_failure; - list_for_each_entry_rcu(iter6, &iface->addr6_list, list) - if (iter6->valid) - goto unlhsh_condremove_failure; + netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list) + goto unlhsh_condremove_failure; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list) + goto unlhsh_condremove_failure; +#endif /* IPv6 */ iface->valid = 0; if (iface->ifindex > 0) list_del_rcu(&iface->list); @@ -1349,7 +1190,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, if (addr4) { struct in_addr addr_struct; - addr_struct.s_addr = addr4->addr; + addr_struct.s_addr = addr4->list.addr; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV4ADDR, sizeof(struct in_addr), @@ -1357,7 +1198,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, if (ret_val != 0) goto list_cb_failure; - addr_struct.s_addr = addr4->mask; + addr_struct.s_addr = addr4->list.mask; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV4MASK, sizeof(struct in_addr), @@ -1370,14 +1211,14 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV6ADDR, sizeof(struct in6_addr), - &addr6->addr); + &addr6->list.addr); if (ret_val != 0) goto list_cb_failure; ret_val = nla_put(cb_arg->skb, NLBL_UNLABEL_A_IPV6MASK, sizeof(struct in6_addr), - &addr6->mask); + &addr6->list.mask); if (ret_val != 0) goto list_cb_failure; @@ -1425,8 +1266,11 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, u32 iter_bkt; u32 iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0; struct netlbl_unlhsh_iface *iface; - struct netlbl_unlhsh_addr4 *addr4; - struct netlbl_unlhsh_addr6 *addr6; + struct list_head *iter_list; + struct netlbl_af4list *addr4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct netlbl_af6list *addr6; +#endif cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -1436,44 +1280,43 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb, for (iter_bkt = skip_bkt; iter_bkt < rcu_dereference(netlbl_unlhsh)->size; iter_bkt++, iter_chain = 0, iter_addr4 = 0, iter_addr6 = 0) { - list_for_each_entry_rcu(iface, - &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt], - list) { + iter_list = &rcu_dereference(netlbl_unlhsh)->tbl[iter_bkt]; + list_for_each_entry_rcu(iface, iter_list, list) { if (!iface->valid || iter_chain++ < skip_chain) continue; - list_for_each_entry_rcu(addr4, - &iface->addr4_list, - list) { - if (!addr4->valid || iter_addr4++ < skip_addr4) + netlbl_af4list_foreach_rcu(addr4, + &iface->addr4_list) { + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen( - NLBL_UNLABEL_C_STATICLIST, - iface, - addr4, - NULL, - &cb_arg) < 0) { + NLBL_UNLABEL_C_STATICLIST, + iface, + netlbl_unlhsh_addr4_entry(addr4), + NULL, + &cb_arg) < 0) { iter_addr4--; iter_chain--; goto unlabel_staticlist_return; } } - list_for_each_entry_rcu(addr6, - &iface->addr6_list, - list) { - if (!addr6->valid || iter_addr6++ < skip_addr6) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(addr6, + &iface->addr6_list) { + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen( - NLBL_UNLABEL_C_STATICLIST, - iface, - NULL, - addr6, - &cb_arg) < 0) { + NLBL_UNLABEL_C_STATICLIST, + iface, + NULL, + netlbl_unlhsh_addr6_entry(addr6), + &cb_arg) < 0) { iter_addr6--; iter_chain--; goto unlabel_staticlist_return; } } +#endif /* IPv6 */ } } @@ -1504,9 +1347,12 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, struct netlbl_unlhsh_iface *iface; u32 skip_addr4 = cb->args[0]; u32 skip_addr6 = cb->args[1]; - u32 iter_addr4 = 0, iter_addr6 = 0; - struct netlbl_unlhsh_addr4 *addr4; - struct netlbl_unlhsh_addr6 *addr6; + u32 iter_addr4 = 0; + struct netlbl_af4list *addr4; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + u32 iter_addr6 = 0; + struct netlbl_af6list *addr6; +#endif cb_arg.nl_cb = cb; cb_arg.skb = skb; @@ -1517,30 +1363,32 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb, if (iface == NULL || !iface->valid) goto unlabel_staticlistdef_return; - list_for_each_entry_rcu(addr4, &iface->addr4_list, list) { - if (!addr4->valid || iter_addr4++ < skip_addr4) + netlbl_af4list_foreach_rcu(addr4, &iface->addr4_list) { + if (iter_addr4++ < skip_addr4) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, - iface, - addr4, - NULL, - &cb_arg) < 0) { + iface, + netlbl_unlhsh_addr4_entry(addr4), + NULL, + &cb_arg) < 0) { iter_addr4--; goto unlabel_staticlistdef_return; } } - list_for_each_entry_rcu(addr6, &iface->addr6_list, list) { - if (!addr6->valid || iter_addr6++ < skip_addr6) +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) { + if (iter_addr6++ < skip_addr6) continue; if (netlbl_unlabel_staticlist_gen(NLBL_UNLABEL_C_STATICLISTDEF, - iface, - NULL, - addr6, - &cb_arg) < 0) { + iface, + NULL, + netlbl_unlhsh_addr6_entry(addr6), + &cb_arg) < 0) { iter_addr6--; goto unlabel_staticlistdef_return; } } +#endif /* IPv6 */ unlabel_staticlistdef_return: rcu_read_unlock(); @@ -1718,25 +1566,27 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb, switch (family) { case PF_INET: { struct iphdr *hdr4; - struct netlbl_unlhsh_addr4 *addr4; + struct netlbl_af4list *addr4; hdr4 = ip_hdr(skb); - addr4 = netlbl_unlhsh_search_addr4(hdr4->saddr, iface); + addr4 = netlbl_af4list_search(hdr4->saddr, + &iface->addr4_list); if (addr4 == NULL) goto unlabel_getattr_nolabel; - secattr->attr.secid = addr4->secid; + secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid; break; } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) case PF_INET6: { struct ipv6hdr *hdr6; - struct netlbl_unlhsh_addr6 *addr6; + struct netlbl_af6list *addr6; hdr6 = ipv6_hdr(skb); - addr6 = netlbl_unlhsh_search_addr6(&hdr6->saddr, iface); + addr6 = netlbl_af6list_search(&hdr6->saddr, + &iface->addr6_list); if (addr6 == NULL) goto unlabel_getattr_nolabel; - secattr->attr.secid = addr6->secid; + secattr->attr.secid = netlbl_unlhsh_addr6_entry(addr6)->secid; break; } #endif /* IPv6 */ diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index b0eacc0007cc..480184a857d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1,7 +1,7 @@ /* * NETLINK Kernel-user communication protocol. * - * Authors: Alan Cox <alan@redhat.com> + * Authors: Alan Cox <alan@lxorguk.ukuu.org.uk> * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> * * This program is free software; you can redistribute it and/or @@ -435,7 +435,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol) return -EPROTONOSUPPORT; netlink_lock_table(); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (!nl_table[protocol].registered) { netlink_unlock_table(); request_module("net-pf-%d-proto-%d", PF_NETLINK, protocol); diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index 532e4faa29f7..9f1ea4a27b35 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -525,6 +525,7 @@ static int nr_release(struct socket *sock) if (sk == NULL) return 0; sock_hold(sk); + sock_orphan(sk); lock_sock(sk); nr = nr_sk(sk); @@ -548,7 +549,6 @@ static int nr_release(struct socket *sock) sk->sk_state = TCP_CLOSE; sk->sk_shutdown |= SEND_SHUTDOWN; sk->sk_state_change(sk); - sock_orphan(sk); sock_set_flag(sk, SOCK_DESTROY); break; diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig new file mode 100644 index 000000000000..51a5669573f2 --- /dev/null +++ b/net/phonet/Kconfig @@ -0,0 +1,16 @@ +# +# Phonet protocol +# + +config PHONET + tristate "Phonet protocols family" + help + The Phone Network protocol (PhoNet) is a packet-oriented + communication protocol developped by Nokia for use with its modems. + + This is required for Maemo to use cellular data connectivity (if + supported). It can also be used to control Nokia phones + from a Linux computer, although AT commands may be easier to use. + + To compile this driver as a module, choose M here: the module + will be called phonet. If unsure, say N. diff --git a/net/phonet/Makefile b/net/phonet/Makefile new file mode 100644 index 000000000000..d62bbba649b3 --- /dev/null +++ b/net/phonet/Makefile @@ -0,0 +1,11 @@ +obj-$(CONFIG_PHONET) += phonet.o pn_pep.o + +phonet-objs := \ + pn_dev.o \ + pn_netlink.o \ + socket.o \ + datagram.o \ + sysctl.o \ + af_phonet.o + +pn_pep-objs := pep.o pep-gprs.o diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c new file mode 100644 index 000000000000..9d211f12582b --- /dev/null +++ b/net/phonet/af_phonet.c @@ -0,0 +1,475 @@ +/* + * File: af_phonet.c + * + * Phonet protocols family + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/unaligned.h> +#include <net/sock.h> + +#include <linux/if_phonet.h> +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pn_dev.h> + +/* Transport protocol registration */ +static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static DEFINE_SPINLOCK(proto_tab_lock); + +static struct phonet_protocol *phonet_proto_get(int protocol) +{ + struct phonet_protocol *pp; + + if (protocol >= PHONET_NPROTO) + return NULL; + + spin_lock(&proto_tab_lock); + pp = proto_tab[protocol]; + if (pp && !try_module_get(pp->prot->owner)) + pp = NULL; + spin_unlock(&proto_tab_lock); + + return pp; +} + +static inline void phonet_proto_put(struct phonet_protocol *pp) +{ + module_put(pp->prot->owner); +} + +/* protocol family functions */ + +static int pn_socket_create(struct net *net, struct socket *sock, int protocol) +{ + struct sock *sk; + struct pn_sock *pn; + struct phonet_protocol *pnp; + int err; + + if (net != &init_net) + return -EAFNOSUPPORT; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (protocol == 0) { + /* Default protocol selection */ + switch (sock->type) { + case SOCK_DGRAM: + protocol = PN_PROTO_PHONET; + break; + case SOCK_SEQPACKET: + protocol = PN_PROTO_PIPE; + break; + default: + return -EPROTONOSUPPORT; + } + } + + pnp = phonet_proto_get(protocol); + if (pnp == NULL && + request_module("net-pf-%d-proto-%d", PF_PHONET, protocol) == 0) + pnp = phonet_proto_get(protocol); + + if (pnp == NULL) + return -EPROTONOSUPPORT; + if (sock->type != pnp->sock_type) { + err = -EPROTONOSUPPORT; + goto out; + } + + sk = sk_alloc(net, PF_PHONET, GFP_KERNEL, pnp->prot); + if (sk == NULL) { + err = -ENOMEM; + goto out; + } + + sock_init_data(sock, sk); + sock->state = SS_UNCONNECTED; + sock->ops = pnp->ops; + sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; + sk->sk_protocol = protocol; + pn = pn_sk(sk); + pn->sobject = 0; + pn->resource = 0; + sk->sk_prot->init(sk); + err = 0; + +out: + phonet_proto_put(pnp); + return err; +} + +static struct net_proto_family phonet_proto_family = { + .family = PF_PHONET, + .create = pn_socket_create, + .owner = THIS_MODULE, +}; + +/* Phonet device header operations */ +static int pn_header_create(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned len) +{ + u8 *media = skb_push(skb, 1); + + if (type != ETH_P_PHONET) + return -1; + + if (!saddr) + saddr = dev->dev_addr; + *media = *(const u8 *)saddr; + return 1; +} + +static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const u8 *media = skb_mac_header(skb); + *haddr = *media; + return 1; +} + +struct header_ops phonet_header_ops = { + .create = pn_header_create, + .parse = pn_header_parse, +}; +EXPORT_SYMBOL(phonet_header_ops); + +/* + * Prepends an ISI header and sends a datagram. + */ +static int pn_send(struct sk_buff *skb, struct net_device *dev, + u16 dst, u16 src, u8 res, u8 irq) +{ + struct phonethdr *ph; + int err; + + if (skb->len + 2 > 0xffff /* Phonet length field limit */ || + skb->len + sizeof(struct phonethdr) > dev->mtu) { + err = -EMSGSIZE; + goto drop; + } + + skb_reset_transport_header(skb); + WARN_ON(skb_headroom(skb) & 1); /* HW assumes word alignment */ + skb_push(skb, sizeof(struct phonethdr)); + skb_reset_network_header(skb); + ph = pn_hdr(skb); + ph->pn_rdev = pn_dev(dst); + ph->pn_sdev = pn_dev(src); + ph->pn_res = res; + ph->pn_length = __cpu_to_be16(skb->len + 2 - sizeof(*ph)); + ph->pn_robj = pn_obj(dst); + ph->pn_sobj = pn_obj(src); + + skb->protocol = htons(ETH_P_PHONET); + skb->priority = 0; + skb->dev = dev; + + if (pn_addr(src) == pn_addr(dst)) { + skb_reset_mac_header(skb); + skb->pkt_type = PACKET_LOOPBACK; + skb_orphan(skb); + if (irq) + netif_rx(skb); + else + netif_rx_ni(skb); + err = 0; + } else { + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + NULL, NULL, skb->len); + if (err < 0) { + err = -EHOSTUNREACH; + goto drop; + } + err = dev_queue_xmit(skb); + } + + return err; +drop: + kfree_skb(skb); + return err; +} + +static int pn_raw_send(const void *data, int len, struct net_device *dev, + u16 dst, u16 src, u8 res) +{ + struct sk_buff *skb = alloc_skb(MAX_PHONET_HEADER + len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + skb_reserve(skb, MAX_PHONET_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + return pn_send(skb, dev, dst, src, res, 1); +} + +/* + * Create a Phonet header for the skb and send it out. Returns + * non-zero error code if failed. The skb is freed then. + */ +int pn_skb_send(struct sock *sk, struct sk_buff *skb, + const struct sockaddr_pn *target) +{ + struct net_device *dev; + struct pn_sock *pn = pn_sk(sk); + int err; + u16 src; + u8 daddr = pn_sockaddr_get_addr(target), saddr = PN_NO_ADDR; + + err = -EHOSTUNREACH; + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (!dev || !(dev->flags & IFF_UP)) + goto drop; + + saddr = phonet_address_get(dev, daddr); + if (saddr == PN_NO_ADDR) + goto drop; + + src = pn->sobject; + if (!pn_addr(src)) + src = pn_object(saddr, pn_obj(src)); + + err = pn_send(skb, dev, pn_sockaddr_get_object(target), + src, pn_sockaddr_get_resource(target), 0); + dev_put(dev); + return err; + +drop: + kfree_skb(skb); + if (dev) + dev_put(dev); + return err; +} +EXPORT_SYMBOL(pn_skb_send); + +/* Do not send an error message in response to an error message */ +static inline int can_respond(struct sk_buff *skb) +{ + const struct phonethdr *ph; + const struct phonetmsg *pm; + u8 submsg_id; + + if (!pskb_may_pull(skb, 3)) + return 0; + + ph = pn_hdr(skb); + if (phonet_address_get(skb->dev, ph->pn_rdev) != ph->pn_rdev) + return 0; /* we are not the destination */ + if (ph->pn_res == PN_PREFIX && !pskb_may_pull(skb, 5)) + return 0; + if (ph->pn_res == PN_COMMGR) /* indications */ + return 0; + + ph = pn_hdr(skb); /* re-acquires the pointer */ + pm = pn_msg(skb); + if (pm->pn_msg_id != PN_COMMON_MESSAGE) + return 1; + submsg_id = (ph->pn_res == PN_PREFIX) + ? pm->pn_e_submsg_id : pm->pn_submsg_id; + if (submsg_id != PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP && + pm->pn_e_submsg_id != PN_COMM_SERVICE_NOT_IDENTIFIED_RESP) + return 1; + return 0; +} + +static int send_obj_unreachable(struct sk_buff *rskb) +{ + const struct phonethdr *oph = pn_hdr(rskb); + const struct phonetmsg *opm = pn_msg(rskb); + struct phonetmsg resp; + + memset(&resp, 0, sizeof(resp)); + resp.pn_trans_id = opm->pn_trans_id; + resp.pn_msg_id = PN_COMMON_MESSAGE; + if (oph->pn_res == PN_PREFIX) { + resp.pn_e_res_id = opm->pn_e_res_id; + resp.pn_e_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_e_orig_msg_id = opm->pn_msg_id; + resp.pn_e_status = 0; + } else { + resp.pn_submsg_id = PN_COMM_ISA_ENTITY_NOT_REACHABLE_RESP; + resp.pn_orig_msg_id = opm->pn_msg_id; + resp.pn_status = 0; + } + return pn_raw_send(&resp, sizeof(resp), rskb->dev, + pn_object(oph->pn_sdev, oph->pn_sobj), + pn_object(oph->pn_rdev, oph->pn_robj), + oph->pn_res); +} + +static int send_reset_indications(struct sk_buff *rskb) +{ + struct phonethdr *oph = pn_hdr(rskb); + static const u8 data[4] = { + 0x00 /* trans ID */, 0x10 /* subscribe msg */, + 0x00 /* subscription count */, 0x00 /* dummy */ + }; + + return pn_raw_send(data, sizeof(data), rskb->dev, + pn_object(oph->pn_sdev, 0x00), + pn_object(oph->pn_rdev, oph->pn_robj), + PN_COMMGR); +} + + +/* packet type functions */ + +/* + * Stuff received packets to associated sockets. + * On error, returns non-zero and releases the skb. + */ +static int phonet_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pkttype, + struct net_device *orig_dev) +{ + struct phonethdr *ph; + struct sock *sk; + struct sockaddr_pn sa; + u16 len; + + if (dev_net(dev) != &init_net) + goto out; + + /* check we have at least a full Phonet header */ + if (!pskb_pull(skb, sizeof(struct phonethdr))) + goto out; + + /* check that the advertised length is correct */ + ph = pn_hdr(skb); + len = get_unaligned_be16(&ph->pn_length); + if (len < 2) + goto out; + len -= 2; + if ((len > skb->len) || pskb_trim(skb, len)) + goto out; + skb_reset_transport_header(skb); + + pn_skb_get_dst_sockaddr(skb, &sa); + if (pn_sockaddr_get_addr(&sa) == 0) + goto out; /* currently, we cannot be device 0 */ + + sk = pn_find_sock_by_sa(&sa); + if (sk == NULL) { + if (can_respond(skb)) { + send_obj_unreachable(skb); + send_reset_indications(skb); + } + goto out; + } + + /* Push data to the socket (or other sockets connected to it). */ + return sk_receive_skb(sk, skb, 0); + +out: + kfree_skb(skb); + return NET_RX_DROP; +} + +static struct packet_type phonet_packet_type = { + .type = __constant_htons(ETH_P_PHONET), + .dev = NULL, + .func = phonet_rcv, +}; + +int __init_or_module phonet_proto_register(int protocol, + struct phonet_protocol *pp) +{ + int err = 0; + + if (protocol >= PHONET_NPROTO) + return -EINVAL; + + err = proto_register(pp->prot, 1); + if (err) + return err; + + spin_lock(&proto_tab_lock); + if (proto_tab[protocol]) + err = -EBUSY; + else + proto_tab[protocol] = pp; + spin_unlock(&proto_tab_lock); + + return err; +} +EXPORT_SYMBOL(phonet_proto_register); + +void phonet_proto_unregister(int protocol, struct phonet_protocol *pp) +{ + spin_lock(&proto_tab_lock); + BUG_ON(proto_tab[protocol] != pp); + proto_tab[protocol] = NULL; + spin_unlock(&proto_tab_lock); + proto_unregister(pp->prot); +} +EXPORT_SYMBOL(phonet_proto_unregister); + +/* Module registration */ +static int __init phonet_init(void) +{ + int err; + + err = sock_register(&phonet_proto_family); + if (err) { + printk(KERN_ALERT + "phonet protocol family initialization failed\n"); + return err; + } + + phonet_device_init(); + dev_add_pack(&phonet_packet_type); + phonet_netlink_register(); + phonet_sysctl_init(); + + err = isi_register(); + if (err) + goto err; + return 0; + +err: + phonet_sysctl_exit(); + sock_unregister(PF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); + return err; +} + +static void __exit phonet_exit(void) +{ + isi_unregister(); + phonet_sysctl_exit(); + sock_unregister(PF_PHONET); + dev_remove_pack(&phonet_packet_type); + phonet_device_exit(); +} + +module_init(phonet_init); +module_exit(phonet_exit); +MODULE_DESCRIPTION("Phonet protocol stack for Linux"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_PHONET); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c new file mode 100644 index 000000000000..e087862ed7e4 --- /dev/null +++ b/net/phonet/datagram.c @@ -0,0 +1,197 @@ +/* + * File: datagram.c + * + * Datagram (ISI) Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/socket.h> +#include <asm/ioctls.h> +#include <net/sock.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> + +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb); + +/* associated socket ceases to exist */ +static void pn_sock_close(struct sock *sk, long timeout) +{ + sk_common_release(sk); +} + +static int pn_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct sk_buff *skb; + int answ; + + switch (cmd) { + case SIOCINQ: + lock_sock(sk); + skb = skb_peek(&sk->sk_receive_queue); + answ = skb ? skb->len : 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +/* Destroy socket. All references are gone. */ +static void pn_destruct(struct sock *sk) +{ + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pn_init(struct sock *sk) +{ + sk->sk_destruct = pn_destruct; + return 0; +} + +static int pn_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct sockaddr_pn *target; + struct sk_buff *skb; + int err; + + if (msg->msg_flags & MSG_OOB) + return -EOPNOTSUPP; + + if (msg->msg_name == NULL) + return -EDESTADDRREQ; + + if (msg->msg_namelen < sizeof(struct sockaddr_pn)) + return -EINVAL; + + target = (struct sockaddr_pn *)msg->msg_name; + if (target->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + skb = sock_alloc_send_skb(sk, MAX_PHONET_HEADER + len, + msg->msg_flags & MSG_DONTWAIT, &err); + if (skb == NULL) + return err; + skb_reserve(skb, MAX_PHONET_HEADER); + + err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len); + if (err < 0) { + kfree_skb(skb); + return err; + } + + /* + * Fill in the Phonet header and + * finally pass the packet forwards. + */ + err = pn_skb_send(sk, skb, target); + + /* If ok, return len. */ + return (err >= 0) ? len : err; +} + +static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb = NULL; + struct sockaddr_pn sa; + int rval = -EOPNOTSUPP; + int copylen; + + if (flags & MSG_OOB) + goto out_nofree; + + if (addr_len) + *addr_len = sizeof(sa); + + skb = skb_recv_datagram(sk, flags, noblock, &rval); + if (skb == NULL) + goto out_nofree; + + pn_skb_get_src_sockaddr(skb, &sa); + + copylen = skb->len; + if (len < copylen) { + msg->msg_flags |= MSG_TRUNC; + copylen = len; + } + + rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen); + if (rval) { + rval = -EFAULT; + goto out; + } + + rval = (flags & MSG_TRUNC) ? skb->len : copylen; + + if (msg->msg_name != NULL) + memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + +out: + skb_free_datagram(sk, skb); + +out_nofree: + return rval; +} + +/* Queue an skb for a sock. */ +static int pn_backlog_rcv(struct sock *sk, struct sk_buff *skb) +{ + int err = sock_queue_rcv_skb(sk, skb); + if (err < 0) + kfree_skb(skb); + return err ? NET_RX_DROP : NET_RX_SUCCESS; +} + +/* Module registration */ +static struct proto pn_proto = { + .close = pn_sock_close, + .ioctl = pn_ioctl, + .init = pn_init, + .sendmsg = pn_sendmsg, + .recvmsg = pn_recvmsg, + .backlog_rcv = pn_backlog_rcv, + .hash = pn_sock_hash, + .unhash = pn_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pn_sock), + .owner = THIS_MODULE, + .name = "PHONET", +}; + +static struct phonet_protocol pn_dgram_proto = { + .ops = &phonet_dgram_ops, + .prot = &pn_proto, + .sock_type = SOCK_DGRAM, +}; + +int __init isi_register(void) +{ + return phonet_proto_register(PN_PROTO_PHONET, &pn_dgram_proto); +} + +void __exit isi_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PHONET, &pn_dgram_proto); +} diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c new file mode 100644 index 000000000000..9978afbd9f2a --- /dev/null +++ b/net/phonet/pep-gprs.c @@ -0,0 +1,347 @@ +/* + * File: pep-gprs.c + * + * GPRS over Phonet pipe end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/if_ether.h> +#include <linux/if_arp.h> +#include <net/sock.h> + +#include <linux/if_phonet.h> +#include <net/tcp_states.h> +#include <net/phonet/gprs.h> + +#define GPRS_DEFAULT_MTU 1400 + +struct gprs_dev { + struct sock *sk; + void (*old_state_change)(struct sock *); + void (*old_data_ready)(struct sock *, int); + void (*old_write_space)(struct sock *); + + struct net_device *net; + struct net_device_stats stats; + + struct sk_buff_head tx_queue; + struct work_struct tx_work; + spinlock_t tx_lock; + unsigned tx_max; +}; + +static int gprs_type_trans(struct sk_buff *skb) +{ + const u8 *pvfc; + u8 buf; + + pvfc = skb_header_pointer(skb, 0, 1, &buf); + if (!pvfc) + return 0; + /* Look at IP version field */ + switch (*pvfc >> 4) { + case 4: + return htons(ETH_P_IP); + case 6: + return htons(ETH_P_IPV6); + } + return 0; +} + +/* + * Socket callbacks + */ + +static void gprs_state_change(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + netif_stop_queue(dev->net); + netif_carrier_off(dev->net); + } +} + +static int gprs_recv(struct gprs_dev *dev, struct sk_buff *skb) +{ + int err = 0; + u16 protocol = gprs_type_trans(skb); + + if (!protocol) { + err = -EINVAL; + goto drop; + } + + if (likely(skb_headroom(skb) & 3)) { + struct sk_buff *rskb, *fs; + int flen = 0; + + /* Phonet Pipe data header is misaligned (3 bytes), + * so wrap the IP packet as a single fragment of an head-less + * socket buffer. The network stack will pull what it needs, + * but at least, the whole IP payload is not memcpy'd. */ + rskb = netdev_alloc_skb(dev->net, 0); + if (!rskb) { + err = -ENOBUFS; + goto drop; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb = rskb; + } + + skb->protocol = protocol; + skb_reset_mac_header(skb); + skb->dev = dev->net; + + if (likely(dev->net->flags & IFF_UP)) { + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; + netif_rx(skb); + skb = NULL; + } else + err = -ENODEV; + +drop: + if (skb) { + dev_kfree_skb(skb); + dev->stats.rx_dropped++; + } + return err; +} + +static void gprs_data_ready(struct sock *sk, int len) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct sk_buff *skb; + + while ((skb = pep_read(sk)) != NULL) { + skb_orphan(skb); + gprs_recv(dev, skb); + } +} + +static void gprs_write_space(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + unsigned credits = pep_writeable(sk); + + spin_lock_bh(&dev->tx_lock); + dev->tx_max = credits; + if (credits > skb_queue_len(&dev->tx_queue)) + netif_wake_queue(dev->net); + spin_unlock_bh(&dev->tx_lock); +} + +/* + * Network device callbacks + */ + +static int gprs_xmit(struct sk_buff *skb, struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + switch (skb->protocol) { + case htons(ETH_P_IP): + case htons(ETH_P_IPV6): + break; + default: + dev_kfree_skb(skb); + return 0; + } + + spin_lock(&dev->tx_lock); + if (likely(skb_queue_len(&dev->tx_queue) < dev->tx_max)) { + skb_queue_tail(&dev->tx_queue, skb); + skb = NULL; + } + if (skb_queue_len(&dev->tx_queue) >= dev->tx_max) + netif_stop_queue(net); + spin_unlock(&dev->tx_lock); + + schedule_work(&dev->tx_work); + if (unlikely(skb)) + dev_kfree_skb(skb); + return 0; +} + +static void gprs_tx(struct work_struct *work) +{ + struct gprs_dev *dev = container_of(work, struct gprs_dev, tx_work); + struct sock *sk = dev->sk; + struct sk_buff *skb; + + while ((skb = skb_dequeue(&dev->tx_queue)) != NULL) { + int err; + + dev->stats.tx_bytes += skb->len; + dev->stats.tx_packets++; + + skb_orphan(skb); + skb_set_owner_w(skb, sk); + + lock_sock(sk); + err = pep_write(sk, skb); + if (err) { + LIMIT_NETDEBUG(KERN_WARNING"%s: TX error (%d)\n", + dev->net->name, err); + dev->stats.tx_aborted_errors++; + dev->stats.tx_errors++; + } + release_sock(sk); + } + + lock_sock(sk); + gprs_write_space(sk); + release_sock(sk); +} + +static int gprs_set_mtu(struct net_device *net, int new_mtu) +{ + if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11))) + return -EINVAL; + + net->mtu = new_mtu; + return 0; +} + +static struct net_device_stats *gprs_get_stats(struct net_device *net) +{ + struct gprs_dev *dev = netdev_priv(net); + + return &dev->stats; +} + +static void gprs_setup(struct net_device *net) +{ + net->features = NETIF_F_FRAGLIST; + net->type = ARPHRD_NONE; + net->flags = IFF_POINTOPOINT | IFF_NOARP; + net->mtu = GPRS_DEFAULT_MTU; + net->hard_header_len = 0; + net->addr_len = 0; + net->tx_queue_len = 10; + + net->destructor = free_netdev; + net->hard_start_xmit = gprs_xmit; /* mandatory */ + net->change_mtu = gprs_set_mtu; + net->get_stats = gprs_get_stats; +} + +/* + * External interface + */ + +/* + * Attach a GPRS interface to a datagram socket. + * Returns the interface index on success, negative error code on error. + */ +int gprs_attach(struct sock *sk) +{ + static const char ifname[] = "gprs%d"; + struct gprs_dev *dev; + struct net_device *net; + int err; + + if (unlikely(sk->sk_type == SOCK_STREAM)) + return -EINVAL; /* need packet boundaries */ + + /* Create net device */ + net = alloc_netdev(sizeof(*dev), ifname, gprs_setup); + if (!net) + return -ENOMEM; + dev = netdev_priv(net); + dev->net = net; + dev->tx_max = 0; + spin_lock_init(&dev->tx_lock); + skb_queue_head_init(&dev->tx_queue); + INIT_WORK(&dev->tx_work, gprs_tx); + + netif_stop_queue(net); + err = register_netdev(net); + if (err) { + free_netdev(net); + return err; + } + + lock_sock(sk); + if (unlikely(sk->sk_user_data)) { + err = -EBUSY; + goto out_rel; + } + if (unlikely((1 << sk->sk_state & (TCPF_CLOSE|TCPF_LISTEN)) || + sock_flag(sk, SOCK_DEAD))) { + err = -EINVAL; + goto out_rel; + } + sk->sk_user_data = dev; + dev->old_state_change = sk->sk_state_change; + dev->old_data_ready = sk->sk_data_ready; + dev->old_write_space = sk->sk_write_space; + sk->sk_state_change = gprs_state_change; + sk->sk_data_ready = gprs_data_ready; + sk->sk_write_space = gprs_write_space; + release_sock(sk); + + sock_hold(sk); + dev->sk = sk; + + printk(KERN_DEBUG"%s: attached\n", net->name); + gprs_write_space(sk); /* kick off TX */ + return net->ifindex; + +out_rel: + release_sock(sk); + unregister_netdev(net); + return err; +} + +void gprs_detach(struct sock *sk) +{ + struct gprs_dev *dev = sk->sk_user_data; + struct net_device *net = dev->net; + + lock_sock(sk); + sk->sk_user_data = NULL; + sk->sk_state_change = dev->old_state_change; + sk->sk_data_ready = dev->old_data_ready; + sk->sk_write_space = dev->old_write_space; + release_sock(sk); + + printk(KERN_DEBUG"%s: detached\n", net->name); + unregister_netdev(net); + flush_scheduled_work(); + sock_put(sk); + skb_queue_purge(&dev->tx_queue); +} diff --git a/net/phonet/pep.c b/net/phonet/pep.c new file mode 100644 index 000000000000..bc6d50f83249 --- /dev/null +++ b/net/phonet/pep.c @@ -0,0 +1,1076 @@ +/* + * File: pep.c + * + * Phonet pipe protocol end point socket + * + * Copyright (C) 2008 Nokia Corporation. + * + * Author: Rémi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/socket.h> +#include <net/sock.h> +#include <net/tcp_states.h> +#include <asm/ioctls.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pep.h> +#include <net/phonet/gprs.h> + +/* sk_state values: + * TCP_CLOSE sock not in use yet + * TCP_CLOSE_WAIT disconnected pipe + * TCP_LISTEN listening pipe endpoint + * TCP_SYN_RECV connected pipe in disabled state + * TCP_ESTABLISHED connected pipe in enabled state + * + * pep_sock locking: + * - sk_state, ackq, hlist: sock lock needed + * - listener: read only + * - pipe_handle: read only + */ + +#define CREDITS_MAX 10 +#define CREDITS_THR 7 + +static const struct sockaddr_pn pipe_srv = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, /* pipe service */ +}; + +#define pep_sb_size(s) (((s) + 5) & ~3) /* 2-bytes head, 32-bits aligned */ + +/* Get the next TLV sub-block. */ +static unsigned char *pep_get_sb(struct sk_buff *skb, u8 *ptype, u8 *plen, + void *buf) +{ + void *data = NULL; + struct { + u8 sb_type; + u8 sb_len; + } *ph, h; + int buflen = *plen; + + ph = skb_header_pointer(skb, 0, 2, &h); + if (ph == NULL || ph->sb_len < 2 || !pskb_may_pull(skb, ph->sb_len)) + return NULL; + ph->sb_len -= 2; + *ptype = ph->sb_type; + *plen = ph->sb_len; + + if (buflen > ph->sb_len) + buflen = ph->sb_len; + data = skb_header_pointer(skb, 2, buflen, buf); + __skb_pull(skb, 2 + ph->sb_len); + return data; +} + +static int pep_reply(struct sock *sk, struct sk_buff *oskb, + u8 code, const void *data, int len, gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = oph->utid; + ph->message_id = oph->message_id + 1; /* REQ -> RESP */ + ph->pipe_handle = oph->pipe_handle; + ph->error_code = code; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +#define PAD 0x00 +static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) +{ + static const u8 data[20] = { + PAD, PAD, PAD, 2 /* sub-blocks */, + PN_PIPE_SB_REQUIRED_FC_TX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + PN_PIPE_SB_PREFERRED_FC_RX, pep_sb_size(5), 3, PAD, + PN_MULTI_CREDIT_FLOW_CONTROL, + PN_ONE_CREDIT_FLOW_CONTROL, + PN_LEGACY_FLOW_CONTROL, + PAD, + }; + + might_sleep(); + return pep_reply(sk, skb, PN_PIPE_NO_ERROR, data, sizeof(data), + GFP_KERNEL); +} + +static int pep_reject_conn(struct sock *sk, struct sk_buff *skb, u8 code) +{ + static const u8 data[4] = { PAD, PAD, PAD, 0 /* sub-blocks */ }; + WARN_ON(code == PN_PIPE_NO_ERROR); + return pep_reply(sk, skb, code, data, sizeof(data), GFP_ATOMIC); +} + +/* Control requests are not sent by the pipe service and have a specific + * message format. */ +static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code, + gfp_t priority) +{ + const struct pnpipehdr *oph = pnp_hdr(oskb); + struct sk_buff *skb; + struct pnpipehdr *ph; + struct sockaddr_pn dst; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PHONET_HEADER); + ph = (struct pnpipehdr *)skb_put(skb, sizeof(*ph) + 4); + + ph->utid = oph->utid; + ph->message_id = PNS_PEP_CTRL_RESP; + ph->pipe_handle = oph->pipe_handle; + ph->data[0] = oph->data[1]; /* CTRL id */ + ph->data[1] = oph->data[0]; /* PEP type */ + ph->data[2] = code; /* error code, at an usual offset */ + ph->data[3] = PAD; + ph->data[4] = PAD; + + pn_skb_get_src_sockaddr(oskb, &dst); + return pn_skb_send(sk, skb, &dst); +} + +static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER + 4); + __skb_push(skb, sizeof(*ph) + 4); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PEP_STATUS_IND; + ph->pipe_handle = pn->pipe_handle; + ph->pep_type = PN_PEP_TYPE_COMMON; + ph->data[1] = type; + ph->data[2] = PAD; + ph->data[3] = PAD; + ph->data[4] = status; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +/* Send our RX flow control information to the sender. + * Socket must be locked. */ +static void pipe_grant_credits(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + switch (pn->rx_fc) { + case PN_LEGACY_FLOW_CONTROL: /* TODO */ + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + pipe_snd_status(sk, PN_PEP_IND_FLOW_CONTROL, + PEP_IND_READY, GFP_ATOMIC); + pn->rx_credits = 1; + break; + case PN_MULTI_CREDIT_FLOW_CONTROL: + if ((pn->rx_credits + CREDITS_THR) > CREDITS_MAX) + break; + if (pipe_snd_status(sk, PN_PEP_IND_ID_MCFC_GRANT_CREDITS, + CREDITS_MAX - pn->rx_credits, + GFP_ATOMIC) == 0) + pn->rx_credits = CREDITS_MAX; + break; + } +} + +static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + if (hdr->data[0] != PN_PEP_TYPE_COMMON) { + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP type: %u\n", + (unsigned)hdr->data[0]); + return -EOPNOTSUPP; + } + + switch (hdr->data[1]) { + case PN_PEP_IND_FLOW_CONTROL: + switch (pn->tx_fc) { + case PN_LEGACY_FLOW_CONTROL: + switch (hdr->data[4]) { + case PEP_IND_BUSY: + pn->tx_credits = 0; + break; + case PEP_IND_READY: + pn->tx_credits = 1; + break; + } + break; + case PN_ONE_CREDIT_FLOW_CONTROL: + if (hdr->data[4] == PEP_IND_READY) + pn->tx_credits = 1; + break; + } + break; + + case PN_PEP_IND_ID_MCFC_GRANT_CREDITS: + if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL) + break; + if (pn->tx_credits + hdr->data[4] > 0xff) + pn->tx_credits = 0xff; + else + pn->tx_credits += hdr->data[4]; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP indication: %u\n", + (unsigned)hdr->data[1]); + return -EOPNOTSUPP; + } + if (pn->tx_credits) + sk->sk_write_space(sk); + return 0; +} + +static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + u8 n_sb = hdr->data[0]; + + pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL; + __skb_pull(skb, sizeof(*hdr)); + while (n_sb > 0) { + u8 type, buf[2], len = sizeof(buf); + u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_NEGOTIATED_FC: + if (len < 2 || (data[0] | data[1]) > 3) + break; + pn->tx_fc = data[0] & 3; + pn->rx_fc = data[1] & 3; + break; + } + n_sb--; + } + return 0; +} + +/* Queue an skb to a connected sock. + * Socket lock must be held. */ +static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sk_buff_head *queue; + int err = 0; + + BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + sk->sk_state = TCP_CLOSE_WAIT; + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_state_change(sk); + break; + + case PNS_PEP_ENABLE_REQ: + /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + switch (hdr->state_after_reset) { + case PN_PIPE_DISABLE: + pn->init_enable = 0; + break; + case PN_PIPE_ENABLE: + pn->init_enable = 1; + break; + default: /* not allowed to send an error here!? */ + err = -EINVAL; + goto out; + } + /* fall through */ + case PNS_PEP_DISABLE_REQ: + pn->tx_credits = 0; + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) + break; + __skb_pull(skb, 4); + queue = &pn->ctrlreq_queue; + goto queue; + + case PNS_PIPE_DATA: + __skb_pull(skb, 3); /* Pipe data header */ + if (!pn_flow_safe(pn->rx_fc)) { + err = sock_queue_rcv_skb(sk, skb); + if (!err) + return 0; + break; + } + + if (pn->rx_credits == 0) { + err = -ENOBUFS; + break; + } + pn->rx_credits--; + queue = &sk->sk_receive_queue; + goto queue; + + case PNS_PEP_STATUS_IND: + pipe_rcv_status(sk, skb); + break; + + case PNS_PIPE_REDIRECTED_IND: + err = pipe_rcv_created(sk, skb); + break; + + case PNS_PIPE_CREATED_IND: + err = pipe_rcv_created(sk, skb); + if (err) + break; + /* fall through */ + case PNS_PIPE_RESET_IND: + if (!pn->init_enable) + break; + /* fall through */ + case PNS_PIPE_ENABLED_IND: + if (!pn_flow_safe(pn->tx_fc)) { + pn->tx_credits = 1; + sk->sk_write_space(sk); + } + if (sk->sk_state == TCP_ESTABLISHED) + break; /* Nothing to do */ + sk->sk_state = TCP_ESTABLISHED; + pipe_grant_credits(sk); + break; + + case PNS_PIPE_DISABLED_IND: + sk->sk_state = TCP_SYN_RECV; + pn->rx_credits = 0; + break; + + default: + LIMIT_NETDEBUG(KERN_DEBUG"Phonet unknown PEP message: %u\n", + hdr->message_id); + err = -EINVAL; + } +out: + kfree_skb(skb); + return err; + +queue: + skb->dev = NULL; + skb_set_owner_r(skb, sk); + err = skb->len; + skb_queue_tail(queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, err); + return 0; +} + +/* Destroy connected sock. */ +static void pipe_destruct(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + skb_queue_purge(&sk->sk_receive_queue); + skb_queue_purge(&pn->ctrlreq_queue); +} + +static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct sock *newsk; + struct pep_sock *newpn, *pn = pep_sk(sk); + struct pnpipehdr *hdr; + struct sockaddr_pn dst; + u16 peer_type; + u8 pipe_handle, enabled, n_sb; + + if (!pskb_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + switch (hdr->state_after_connect) { + case PN_PIPE_DISABLE: + enabled = 0; + break; + case PN_PIPE_ENABLE: + enabled = 1; + break; + default: + pep_reject_conn(sk, skb, PN_PIPE_ERR_INVALID_PARAM); + return -EINVAL; + } + peer_type = hdr->other_pep_type << 8; + + if (unlikely(sk->sk_state != TCP_LISTEN) || sk_acceptq_is_full(sk)) { + pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); + return -ENOBUFS; + } + + /* Parse sub-blocks (options) */ + n_sb = hdr->data[4]; + while (n_sb > 0) { + u8 type, buf[1], len = sizeof(buf); + const u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + switch (type) { + case PN_PIPE_SB_CONNECT_REQ_PEP_SUB_TYPE: + if (len < 1) + return -EINVAL; + peer_type = (peer_type & 0xff00) | data[0]; + break; + } + n_sb--; + } + + skb = skb_clone(skb, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + + /* Create a new to-be-accepted sock */ + newsk = sk_alloc(sock_net(sk), PF_PHONET, GFP_ATOMIC, sk->sk_prot); + if (!newsk) { + kfree_skb(skb); + return -ENOMEM; + } + sock_init_data(NULL, newsk); + newsk->sk_state = TCP_SYN_RECV; + newsk->sk_backlog_rcv = pipe_do_rcv; + newsk->sk_protocol = sk->sk_protocol; + newsk->sk_destruct = pipe_destruct; + + newpn = pep_sk(newsk); + pn_skb_get_dst_sockaddr(skb, &dst); + newpn->pn_sk.sobject = pn_sockaddr_get_object(&dst); + newpn->pn_sk.resource = pn->pn_sk.resource; + skb_queue_head_init(&newpn->ctrlreq_queue); + newpn->pipe_handle = pipe_handle; + newpn->peer_type = peer_type; + newpn->rx_credits = newpn->tx_credits = 0; + newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; + newpn->init_enable = enabled; + + BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); + skb_queue_head(&newsk->sk_receive_queue, skb); + if (!sock_flag(sk, SOCK_DEAD)) + sk->sk_data_ready(sk, 0); + + sk_acceptq_added(sk); + sk_add_node(newsk, &pn->ackq); + return 0; +} + +/* Listening sock must be locked */ +static struct sock *pep_find_pipe(const struct hlist_head *hlist, + const struct sockaddr_pn *dst, + u8 pipe_handle) +{ + struct hlist_node *node; + struct sock *sknode; + u16 dobj = pn_sockaddr_get_object(dst); + + sk_for_each(sknode, node, hlist) { + struct pep_sock *pnnode = pep_sk(sknode); + + /* Ports match, but addresses might not: */ + if (pnnode->pn_sk.sobject != dobj) + continue; + if (pnnode->pipe_handle != pipe_handle) + continue; + if (sknode->sk_state == TCP_CLOSE_WAIT) + continue; + + sock_hold(sknode); + return sknode; + } + return NULL; +} + +/* + * Deliver an skb to a listening sock. + * Socket lock must be held. + * We then queue the skb to the right connected sock (if any). + */ +static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *sknode; + struct pnpipehdr *hdr = pnp_hdr(skb); + struct sockaddr_pn dst; + int err = NET_RX_SUCCESS; + u8 pipe_handle; + + if (!pskb_may_pull(skb, sizeof(*hdr))) + goto drop; + + hdr = pnp_hdr(skb); + pipe_handle = hdr->pipe_handle; + if (pipe_handle == PN_PIPE_INVALID_HANDLE) + goto drop; + + pn_skb_get_dst_sockaddr(skb, &dst); + + /* Look for an existing pipe handle */ + sknode = pep_find_pipe(&pn->hlist, &dst, pipe_handle); + if (sknode) + return sk_receive_skb(sknode, skb, 1); + + /* Look for a pipe handle pending accept */ + sknode = pep_find_pipe(&pn->ackq, &dst, pipe_handle); + if (sknode) { + sock_put(sknode); + if (net_ratelimit()) + printk(KERN_WARNING"Phonet unconnected PEP ignored"); + err = NET_RX_DROP; + goto drop; + } + + switch (hdr->message_id) { + case PNS_PEP_CONNECT_REQ: + err = pep_connreq_rcv(sk, skb); + break; + + case PNS_PEP_DISCONNECT_REQ: + pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); + break; + + case PNS_PEP_CTRL_REQ: + pep_ctrlreq_error(sk, skb, PN_PIPE_INVALID_HANDLE, GFP_ATOMIC); + break; + + case PNS_PEP_RESET_REQ: + case PNS_PEP_ENABLE_REQ: + case PNS_PEP_DISABLE_REQ: + /* invalid handle is not even allowed here! */ + default: + err = NET_RX_DROP; + } +drop: + kfree_skb(skb); + return err; +} + +/* associated socket ceases to exist */ +static void pep_sock_close(struct sock *sk, long timeout) +{ + struct pep_sock *pn = pep_sk(sk); + int ifindex = 0; + + sk_common_release(sk); + + lock_sock(sk); + if (sk->sk_state == TCP_LISTEN) { + /* Destroy the listen queue */ + struct sock *sknode; + struct hlist_node *p, *n; + + sk_for_each_safe(sknode, p, n, &pn->ackq) + sk_del_node_init(sknode); + sk->sk_state = TCP_CLOSE; + } + ifindex = pn->ifindex; + pn->ifindex = 0; + release_sock(sk); + + if (ifindex) + gprs_detach(sk); +} + +static int pep_wait_connreq(struct sock *sk, int noblock) +{ + struct task_struct *tsk = current; + struct pep_sock *pn = pep_sk(sk); + long timeo = sock_rcvtimeo(sk, noblock); + + for (;;) { + DEFINE_WAIT(wait); + + if (sk->sk_state != TCP_LISTEN) + return -EINVAL; + if (!hlist_empty(&pn->ackq)) + break; + if (!timeo) + return -EWOULDBLOCK; + if (signal_pending(tsk)) + return sock_intr_errno(timeo); + + prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + finish_wait(&sk->sk_socket->wait, &wait); + } + + return 0; +} + +static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *newsk = NULL; + struct sk_buff *oskb; + int err; + + lock_sock(sk); + err = pep_wait_connreq(sk, flags & O_NONBLOCK); + if (err) + goto out; + + newsk = __sk_head(&pn->ackq); + + oskb = skb_dequeue(&newsk->sk_receive_queue); + err = pep_accept_conn(newsk, oskb); + if (err) { + skb_queue_head(&newsk->sk_receive_queue, oskb); + newsk = NULL; + goto out; + } + + sock_hold(sk); + pep_sk(newsk)->listener = sk; + + sock_hold(newsk); + sk_del_node_init(newsk); + sk_acceptq_removed(sk); + sk_add_node(newsk, &pn->hlist); + __sock_put(newsk); + +out: + release_sock(sk); + *errp = err; + return newsk; +} + +static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) +{ + struct pep_sock *pn = pep_sk(sk); + int answ; + + switch (cmd) { + case SIOCINQ: + if (sk->sk_state == TCP_LISTEN) + return -EINVAL; + + lock_sock(sk); + if (sock_flag(sk, SOCK_URGINLINE) + && !skb_queue_empty(&pn->ctrlreq_queue)) + answ = skb_peek(&pn->ctrlreq_queue)->len; + else if (!skb_queue_empty(&sk->sk_receive_queue)) + answ = skb_peek(&sk->sk_receive_queue)->len; + else + answ = 0; + release_sock(sk); + return put_user(answ, (int __user *)arg); + } + + return -ENOIOCTLCMD; +} + +static int pep_init(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + INIT_HLIST_HEAD(&pn->ackq); + INIT_HLIST_HEAD(&pn->hlist); + skb_queue_head_init(&pn->ctrlreq_queue); + pn->pipe_handle = PN_PIPE_INVALID_HANDLE; + return 0; +} + +static int pep_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int val = 0, err = 0; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (optlen >= sizeof(int)) { + if (get_user(val, (int __user *) optval)) + return -EFAULT; + } + + lock_sock(sk); + switch (optname) { + case PNPIPE_ENCAP: + if (val && val != PNPIPE_ENCAP_IP) { + err = -EINVAL; + break; + } + if (!pn->ifindex == !val) + break; /* Nothing to do! */ + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (val) { + release_sock(sk); + err = gprs_attach(sk); + if (err > 0) { + pn->ifindex = err; + err = 0; + } + } else { + pn->ifindex = 0; + release_sock(sk); + gprs_detach(sk); + err = 0; + } + goto out_norel; + default: + err = -ENOPROTOOPT; + } + release_sock(sk); + +out_norel: + return err; +} + +static int pep_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + struct pep_sock *pn = pep_sk(sk); + int len, val; + + if (level != SOL_PNPIPE) + return -ENOPROTOOPT; + if (get_user(len, optlen)) + return -EFAULT; + + switch (optname) { + case PNPIPE_ENCAP: + val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; + break; + case PNPIPE_IFINDEX: + val = pn->ifindex; + break; + default: + return -ENOPROTOOPT; + } + + len = min_t(unsigned int, sizeof(int), len); + if (put_user(len, optlen)) + return -EFAULT; + if (put_user(val, (int __user *) optval)) + return -EFAULT; + return 0; +} + +static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + + skb_push(skb, 3); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_DATA; + ph->pipe_handle = pn->pipe_handle; + if (pn_flow_safe(pn->tx_fc) && pn->tx_credits) + pn->tx_credits--; + + return pn_skb_send(sk, skb, &pipe_srv); +} + +static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len) +{ + struct pep_sock *pn = pep_sk(sk); + struct sk_buff *skb = NULL; + long timeo; + int flags = msg->msg_flags; + int err, done; + + if (msg->msg_flags & MSG_OOB || !(msg->msg_flags & MSG_EOR)) + return -EOPNOTSUPP; + + lock_sock(sk); + timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + if ((1 << sk->sk_state) & (TCPF_LISTEN|TCPF_CLOSE)) { + err = -ENOTCONN; + goto out; + } + if (sk->sk_state != TCP_ESTABLISHED) { + /* Wait until the pipe gets to enabled state */ +disabled: + err = sk_stream_wait_connect(sk, &timeo); + if (err) + goto out; + + if (sk->sk_state == TCP_CLOSE_WAIT) { + err = -ECONNRESET; + goto out; + } + } + BUG_ON(sk->sk_state != TCP_ESTABLISHED); + + /* Wait until flow control allows TX */ + done = pn->tx_credits > 0; + while (!done) { + DEFINE_WAIT(wait); + + if (!timeo) { + err = -EAGAIN; + goto out; + } + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + + prepare_to_wait(&sk->sk_socket->wait, &wait, + TASK_INTERRUPTIBLE); + done = sk_wait_event(sk, &timeo, pn->tx_credits > 0); + finish_wait(&sk->sk_socket->wait, &wait); + + if (sk->sk_state != TCP_ESTABLISHED) + goto disabled; + } + + if (!skb) { + skb = sock_alloc_send_skb(sk, MAX_PNPIPE_HEADER + len, + flags & MSG_DONTWAIT, &err); + if (skb == NULL) + goto out; + skb_reserve(skb, MAX_PHONET_HEADER + 3); + + if (sk->sk_state != TCP_ESTABLISHED || !pn->tx_credits) + goto disabled; /* sock_alloc_send_skb might sleep */ + } + + err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len); + if (err < 0) + goto out; + + err = pipe_skb_send(sk, skb); + if (err >= 0) + err = len; /* success! */ + skb = NULL; +out: + release_sock(sk); + kfree_skb(skb); + return err; +} + +int pep_writeable(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + + return (sk->sk_state == TCP_ESTABLISHED) ? pn->tx_credits : 0; +} + +int pep_write(struct sock *sk, struct sk_buff *skb) +{ + struct sk_buff *rskb, *fs; + int flen = 0; + + rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + if (!rskb) { + kfree_skb(skb); + return -ENOMEM; + } + skb_shinfo(rskb)->frag_list = skb; + rskb->len += skb->len; + rskb->data_len += rskb->len; + rskb->truesize += rskb->len; + + /* Avoid nested fragments */ + for (fs = skb_shinfo(skb)->frag_list; fs; fs = fs->next) + flen += fs->len; + skb->next = skb_shinfo(skb)->frag_list; + skb_shinfo(skb)->frag_list = NULL; + skb->len -= flen; + skb->data_len -= flen; + skb->truesize -= flen; + + skb_reserve(rskb, MAX_PHONET_HEADER + 3); + return pipe_skb_send(sk, rskb); +} + +struct sk_buff *pep_read(struct sock *sk) +{ + struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + return skb; +} + +static int pep_recvmsg(struct kiocb *iocb, struct sock *sk, + struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len) +{ + struct sk_buff *skb; + int err; + + if (unlikely(1 << sk->sk_state & (TCPF_LISTEN | TCPF_CLOSE))) + return -ENOTCONN; + + if ((flags & MSG_OOB) || sock_flag(sk, SOCK_URGINLINE)) { + /* Dequeue and acknowledge control request */ + struct pep_sock *pn = pep_sk(sk); + + skb = skb_dequeue(&pn->ctrlreq_queue); + if (skb) { + pep_ctrlreq_error(sk, skb, PN_PIPE_NO_ERROR, + GFP_KERNEL); + msg->msg_flags |= MSG_OOB; + goto copy; + } + if (flags & MSG_OOB) + return -EINVAL; + } + + skb = skb_recv_datagram(sk, flags, noblock, &err); + lock_sock(sk); + if (skb == NULL) { + if (err == -ENOTCONN && sk->sk_state == TCP_CLOSE_WAIT) + err = -ECONNRESET; + release_sock(sk); + return err; + } + + if (sk->sk_state == TCP_ESTABLISHED) + pipe_grant_credits(sk); + release_sock(sk); +copy: + msg->msg_flags |= MSG_EOR; + if (skb->len > len) + msg->msg_flags |= MSG_TRUNC; + else + len = skb->len; + + err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len); + if (!err) + err = (flags & MSG_TRUNC) ? skb->len : len; + + skb_free_datagram(sk, skb); + return err; +} + +static void pep_sock_unhash(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct sock *skparent = NULL; + + lock_sock(sk); + if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { + skparent = pn->listener; + sk_del_node_init(sk); + release_sock(sk); + + sk = skparent; + pn = pep_sk(skparent); + lock_sock(sk); + } + /* Unhash a listening sock only when it is closed + * and all of its active connected pipes are closed. */ + if (hlist_empty(&pn->hlist)) + pn_sock_unhash(&pn->pn_sk.sk); + release_sock(sk); + + if (skparent) + sock_put(skparent); +} + +static struct proto pep_proto = { + .close = pep_sock_close, + .accept = pep_sock_accept, + .ioctl = pep_ioctl, + .init = pep_init, + .setsockopt = pep_setsockopt, + .getsockopt = pep_getsockopt, + .sendmsg = pep_sendmsg, + .recvmsg = pep_recvmsg, + .backlog_rcv = pep_do_rcv, + .hash = pn_sock_hash, + .unhash = pep_sock_unhash, + .get_port = pn_sock_get_port, + .obj_size = sizeof(struct pep_sock), + .owner = THIS_MODULE, + .name = "PNPIPE", +}; + +static struct phonet_protocol pep_pn_proto = { + .ops = &phonet_stream_ops, + .prot = &pep_proto, + .sock_type = SOCK_SEQPACKET, +}; + +static int __init pep_register(void) +{ + return phonet_proto_register(PN_PROTO_PIPE, &pep_pn_proto); +} + +static void __exit pep_unregister(void) +{ + phonet_proto_unregister(PN_PROTO_PIPE, &pep_pn_proto); +} + +module_init(pep_register); +module_exit(pep_unregister); +MODULE_AUTHOR("Remi Denis-Courmont, Nokia"); +MODULE_DESCRIPTION("Phonet pipe protocol"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_PHONET, PN_PROTO_PIPE); diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c new file mode 100644 index 000000000000..53be9fc82aaa --- /dev/null +++ b/net/phonet/pn_dev.c @@ -0,0 +1,208 @@ +/* + * File: pn_dev.c + * + * Phonet network device + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/netdevice.h> +#include <linux/phonet.h> +#include <net/sock.h> +#include <net/phonet/pn_dev.h> + +/* when accessing, remember to lock with spin_lock(&pndevs.lock); */ +struct phonet_device_list pndevs = { + .list = LIST_HEAD_INIT(pndevs.list), + .lock = __SPIN_LOCK_UNLOCKED(pndevs.lock), +}; + +/* Allocate new Phonet device. */ +static struct phonet_device *__phonet_device_alloc(struct net_device *dev) +{ + struct phonet_device *pnd = kmalloc(sizeof(*pnd), GFP_ATOMIC); + if (pnd == NULL) + return NULL; + pnd->netdev = dev; + bitmap_zero(pnd->addrs, 64); + + list_add(&pnd->list, &pndevs.list); + return pnd; +} + +static struct phonet_device *__phonet_get(struct net_device *dev) +{ + struct phonet_device *pnd; + + list_for_each_entry(pnd, &pndevs.list, list) { + if (pnd->netdev == dev) + return pnd; + } + return NULL; +} + +static void __phonet_device_free(struct phonet_device *pnd) +{ + list_del(&pnd->list); + kfree(pnd); +} + +struct net_device *phonet_device_get(struct net *net) +{ + struct phonet_device *pnd; + struct net_device *dev; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + dev = pnd->netdev; + BUG_ON(!dev); + + if (dev_net(dev) == net && + (dev->reg_state == NETREG_REGISTERED) && + ((pnd->netdev->flags & IFF_UP)) == IFF_UP) + break; + dev = NULL; + } + if (dev) + dev_hold(dev); + spin_unlock_bh(&pndevs.lock); + return dev; +} + +int phonet_address_add(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + /* Find or create Phonet-specific device data */ + pnd = __phonet_get(dev); + if (pnd == NULL) + pnd = __phonet_device_alloc(dev); + if (unlikely(pnd == NULL)) + err = -ENOMEM; + else if (test_and_set_bit(addr >> 2, pnd->addrs)) + err = -EEXIST; + spin_unlock_bh(&pndevs.lock); + return err; +} + +int phonet_address_del(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + int err = 0; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (!pnd || !test_and_clear_bit(addr >> 2, pnd->addrs)) + err = -EADDRNOTAVAIL; + if (bitmap_empty(pnd->addrs, 64)) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + return err; +} + +/* Gets a source address toward a destination, through a interface. */ +u8 phonet_address_get(struct net_device *dev, u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) { + BUG_ON(bitmap_empty(pnd->addrs, 64)); + + /* Use same source address as destination, if possible */ + if (!test_bit(addr >> 2, pnd->addrs)) + addr = find_first_bit(pnd->addrs, 64) << 2; + } else + addr = PN_NO_ADDR; + spin_unlock_bh(&pndevs.lock); + return addr; +} + +int phonet_address_lookup(u8 addr) +{ + struct phonet_device *pnd; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + /* Don't allow unregistering devices! */ + if ((pnd->netdev->reg_state != NETREG_REGISTERED) || + ((pnd->netdev->flags & IFF_UP)) != IFF_UP) + continue; + + if (test_bit(addr >> 2, pnd->addrs)) { + spin_unlock_bh(&pndevs.lock); + return 0; + } + } + spin_unlock_bh(&pndevs.lock); + return -EADDRNOTAVAIL; +} + +/* notify Phonet of device events */ +static int phonet_device_notify(struct notifier_block *me, unsigned long what, + void *arg) +{ + struct net_device *dev = arg; + + if (what == NETDEV_UNREGISTER) { + struct phonet_device *pnd; + + /* Destroy phonet-specific device data */ + spin_lock_bh(&pndevs.lock); + pnd = __phonet_get(dev); + if (pnd) + __phonet_device_free(pnd); + spin_unlock_bh(&pndevs.lock); + } + return 0; + +} + +static struct notifier_block phonet_device_notifier = { + .notifier_call = phonet_device_notify, + .priority = 0, +}; + +/* Initialize Phonet devices list */ +void phonet_device_init(void) +{ + register_netdevice_notifier(&phonet_device_notifier); +} + +void phonet_device_exit(void) +{ + struct phonet_device *pnd, *n; + + rtnl_unregister_all(PF_PHONET); + rtnl_lock(); + spin_lock_bh(&pndevs.lock); + + list_for_each_entry_safe(pnd, n, &pndevs.list, list) + __phonet_device_free(pnd); + + spin_unlock_bh(&pndevs.lock); + rtnl_unlock(); + unregister_netdevice_notifier(&phonet_device_notifier); +} diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c new file mode 100644 index 000000000000..b1770d66bc8d --- /dev/null +++ b/net/phonet/pn_netlink.c @@ -0,0 +1,165 @@ +/* + * File: pn_netlink.c + * + * Phonet netlink interface + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/phonet.h> +#include <net/sock.h> +#include <net/phonet/pn_dev.h> + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event); + +static void rtmsg_notify(int event, struct net_device *dev, u8 addr) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(NLMSG_ALIGN(sizeof(struct ifaddrmsg)) + + nla_total_size(1), GFP_KERNEL); + if (skb == NULL) + goto errout; + err = fill_addr(skb, dev, addr, 0, 0, event); + if (err < 0) { + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + err = rtnl_notify(skb, dev_net(dev), 0, + RTNLGRP_PHONET_IFADDR, NULL, GFP_KERNEL); +errout: + if (err < 0) + rtnl_set_sk_err(dev_net(dev), RTNLGRP_PHONET_IFADDR, err); +} + +static const struct nla_policy ifa_phonet_policy[IFA_MAX+1] = { + [IFA_LOCAL] = { .type = NLA_U8 }, +}; + +static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) +{ + struct net *net = sock_net(skb->sk); + struct nlattr *tb[IFA_MAX+1]; + struct net_device *dev; + struct ifaddrmsg *ifm; + int err; + u8 pnaddr; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + ASSERT_RTNL(); + + err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_phonet_policy); + if (err < 0) + return err; + + ifm = nlmsg_data(nlh); + if (tb[IFA_LOCAL] == NULL) + return -EINVAL; + pnaddr = nla_get_u8(tb[IFA_LOCAL]); + if (pnaddr & 3) + /* Phonet addresses only have 6 high-order bits */ + return -EINVAL; + + dev = __dev_get_by_index(net, ifm->ifa_index); + if (dev == NULL) + return -ENODEV; + + if (nlh->nlmsg_type == RTM_NEWADDR) + err = phonet_address_add(dev, pnaddr); + else + err = phonet_address_del(dev, pnaddr); + if (!err) + rtmsg_notify(nlh->nlmsg_type, dev, pnaddr); + return err; +} + +static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, + u32 pid, u32 seq, int event) +{ + struct ifaddrmsg *ifm; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + if (nlh == NULL) + return -EMSGSIZE; + + ifm = nlmsg_data(nlh); + ifm->ifa_family = AF_PHONET; + ifm->ifa_prefixlen = 0; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_LINK; + ifm->ifa_index = dev->ifindex; + NLA_PUT_U8(skb, IFA_LOCAL, addr); + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct phonet_device *pnd; + int dev_idx = 0, dev_start_idx = cb->args[0]; + int addr_idx = 0, addr_start_idx = cb->args[1]; + + spin_lock_bh(&pndevs.lock); + list_for_each_entry(pnd, &pndevs.list, list) { + u8 addr; + + if (dev_idx > dev_start_idx) + addr_start_idx = 0; + if (dev_idx++ < dev_start_idx) + continue; + + addr_idx = 0; + for (addr = find_first_bit(pnd->addrs, 64); addr < 64; + addr = find_next_bit(pnd->addrs, 64, 1+addr)) { + if (addr_idx++ < addr_start_idx) + continue; + + if (fill_addr(skb, pnd->netdev, addr << 2, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWADDR)) + goto out; + } + } + +out: + spin_unlock_bh(&pndevs.lock); + cb->args[0] = dev_idx; + cb->args[1] = addr_idx; + + return skb->len; +} + +void __init phonet_netlink_register(void) +{ + rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, NULL); + rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL); + rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit); +} diff --git a/net/phonet/socket.c b/net/phonet/socket.c new file mode 100644 index 000000000000..d81740187fb4 --- /dev/null +++ b/net/phonet/socket.c @@ -0,0 +1,411 @@ +/* + * File: socket.c + * + * Phonet sockets + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * Original author: Sakari Ailus <sakari.ailus@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/kernel.h> +#include <linux/net.h> +#include <linux/poll.h> +#include <net/sock.h> +#include <net/tcp_states.h> + +#include <linux/phonet.h> +#include <net/phonet/phonet.h> +#include <net/phonet/pep.h> +#include <net/phonet/pn_dev.h> + +static int pn_socket_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + + if (sk) { + sock->sk = NULL; + sk->sk_prot->close(sk, 0); + } + return 0; +} + +static struct { + struct hlist_head hlist; + spinlock_t lock; +} pnsocks = { + .hlist = HLIST_HEAD_INIT, + .lock = __SPIN_LOCK_UNLOCKED(pnsocks.lock), +}; + +/* + * Find address based on socket address, match only certain fields. + * Also grab sock if it was found. Remember to sock_put it later. + */ +struct sock *pn_find_sock_by_sa(const struct sockaddr_pn *spn) +{ + struct hlist_node *node; + struct sock *sknode; + struct sock *rval = NULL; + u16 obj = pn_sockaddr_get_object(spn); + u8 res = spn->spn_resource; + + spin_lock_bh(&pnsocks.lock); + + sk_for_each(sknode, node, &pnsocks.hlist) { + struct pn_sock *pn = pn_sk(sknode); + BUG_ON(!pn->sobject); /* unbound socket */ + + if (pn_port(obj)) { + /* Look up socket by port */ + if (pn_port(pn->sobject) != pn_port(obj)) + continue; + } else { + /* If port is zero, look up by resource */ + if (pn->resource != res) + continue; + } + if (pn_addr(pn->sobject) + && pn_addr(pn->sobject) != pn_addr(obj)) + continue; + + rval = sknode; + sock_hold(sknode); + break; + } + + spin_unlock_bh(&pnsocks.lock); + + return rval; + +} + +void pn_sock_hash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_add_node(sk, &pnsocks.hlist); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_hash); + +void pn_sock_unhash(struct sock *sk) +{ + spin_lock_bh(&pnsocks.lock); + sk_del_node_init(sk); + spin_unlock_bh(&pnsocks.lock); +} +EXPORT_SYMBOL(pn_sock_unhash); + +static int pn_socket_bind(struct socket *sock, struct sockaddr *addr, int len) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + int err; + u16 handle; + u8 saddr; + + if (sk->sk_prot->bind) + return sk->sk_prot->bind(sk, addr, len); + + if (len < sizeof(struct sockaddr_pn)) + return -EINVAL; + if (spn->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + handle = pn_sockaddr_get_object((struct sockaddr_pn *)addr); + saddr = pn_addr(handle); + if (saddr && phonet_address_lookup(saddr)) + return -EADDRNOTAVAIL; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE || pn_port(pn->sobject)) { + err = -EINVAL; /* attempt to rebind */ + goto out; + } + err = sk->sk_prot->get_port(sk, pn_port(handle)); + if (err) + goto out; + + /* get_port() sets the port, bind() sets the address if applicable */ + pn->sobject = pn_object(saddr, pn_port(pn->sobject)); + pn->resource = spn->spn_resource; + + /* Enable RX on the socket */ + sk->sk_prot->hash(sk); +out: + release_sock(sk); + return err; +} + +static int pn_socket_autobind(struct socket *sock) +{ + struct sockaddr_pn sa; + int err; + + memset(&sa, 0, sizeof(sa)); + sa.spn_family = AF_PHONET; + err = pn_socket_bind(sock, (struct sockaddr *)&sa, + sizeof(struct sockaddr_pn)); + if (err != -EINVAL) + return err; + BUG_ON(!pn_port(pn_sk(sock->sk)->sobject)); + return 0; /* socket was already bound */ +} + +static int pn_socket_accept(struct socket *sock, struct socket *newsock, + int flags) +{ + struct sock *sk = sock->sk; + struct sock *newsk; + int err; + + newsk = sk->sk_prot->accept(sk, flags, &err); + if (!newsk) + return err; + + lock_sock(newsk); + sock_graft(newsk, newsock); + newsock->state = SS_CONNECTED; + release_sock(newsk); + return 0; +} + +static int pn_socket_getname(struct socket *sock, struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + memset(addr, 0, sizeof(struct sockaddr_pn)); + addr->sa_family = AF_PHONET; + if (!peer) /* Race with bind() here is userland's problem. */ + pn_sockaddr_set_object((struct sockaddr_pn *)addr, + pn->sobject); + + *sockaddr_len = sizeof(struct sockaddr_pn); + return 0; +} + +static unsigned int pn_socket_poll(struct file *file, struct socket *sock, + poll_table *wait) +{ + struct sock *sk = sock->sk; + struct pep_sock *pn = pep_sk(sk); + unsigned int mask = 0; + + poll_wait(file, &sock->wait, wait); + + switch (sk->sk_state) { + case TCP_LISTEN: + return hlist_empty(&pn->ackq) ? 0 : POLLIN; + case TCP_CLOSE: + return POLLERR; + } + + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= POLLIN | POLLRDNORM; + if (!skb_queue_empty(&pn->ctrlreq_queue)) + mask |= POLLPRI; + if (!mask && sk->sk_state == TCP_CLOSE_WAIT) + return POLLHUP; + + if (sk->sk_state == TCP_ESTABLISHED && pn->tx_credits) + mask |= POLLOUT | POLLWRNORM | POLLWRBAND; + + return mask; +} + +static int pn_socket_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = sock->sk; + struct pn_sock *pn = pn_sk(sk); + + if (cmd == SIOCPNGETOBJECT) { + struct net_device *dev; + u16 handle; + u8 saddr; + + if (get_user(handle, (__u16 __user *)arg)) + return -EFAULT; + + lock_sock(sk); + if (sk->sk_bound_dev_if) + dev = dev_get_by_index(sock_net(sk), + sk->sk_bound_dev_if); + else + dev = phonet_device_get(sock_net(sk)); + if (dev && (dev->flags & IFF_UP)) + saddr = phonet_address_get(dev, pn_addr(handle)); + else + saddr = PN_NO_ADDR; + release_sock(sk); + + if (dev) + dev_put(dev); + if (saddr == PN_NO_ADDR) + return -EHOSTUNREACH; + + handle = pn_object(saddr, pn_port(pn->sobject)); + return put_user(handle, (__u16 __user *)arg); + } + + return sk->sk_prot->ioctl(sk, cmd, arg); +} + +static int pn_socket_listen(struct socket *sock, int backlog) +{ + struct sock *sk = sock->sk; + int err = 0; + + if (sock->state != SS_UNCONNECTED) + return -EINVAL; + if (pn_socket_autobind(sock)) + return -ENOBUFS; + + lock_sock(sk); + if (sk->sk_state != TCP_CLOSE) { + err = -EINVAL; + goto out; + } + + sk->sk_state = TCP_LISTEN; + sk->sk_ack_backlog = 0; + sk->sk_max_ack_backlog = backlog; +out: + release_sock(sk); + return err; +} + +static int pn_socket_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +{ + struct sock *sk = sock->sk; + + if (pn_socket_autobind(sock)) + return -EAGAIN; + + return sk->sk_prot->sendmsg(iocb, sk, m, total_len); +} + +const struct proto_ops phonet_dgram_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pn_socket_getname, + .poll = datagram_poll, + .ioctl = pn_socket_ioctl, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = sock_no_setsockopt, + .compat_getsockopt = sock_no_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; + +const struct proto_ops phonet_stream_ops = { + .family = AF_PHONET, + .owner = THIS_MODULE, + .release = pn_socket_release, + .bind = pn_socket_bind, + .connect = sock_no_connect, + .socketpair = sock_no_socketpair, + .accept = pn_socket_accept, + .getname = pn_socket_getname, + .poll = pn_socket_poll, + .ioctl = pn_socket_ioctl, + .listen = pn_socket_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif + .sendmsg = pn_socket_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +}; +EXPORT_SYMBOL(phonet_stream_ops); + +static DEFINE_MUTEX(port_mutex); + +/* allocate port for a socket */ +int pn_sock_get_port(struct sock *sk, unsigned short sport) +{ + static int port_cur; + struct pn_sock *pn = pn_sk(sk); + struct sockaddr_pn try_sa; + struct sock *tmpsk; + + memset(&try_sa, 0, sizeof(struct sockaddr_pn)); + try_sa.spn_family = AF_PHONET; + + mutex_lock(&port_mutex); + + if (!sport) { + /* search free port */ + int port, pmin, pmax; + + phonet_get_local_port_range(&pmin, &pmax); + for (port = pmin; port <= pmax; port++) { + port_cur++; + if (port_cur < pmin || port_cur > pmax) + port_cur = pmin; + + pn_sockaddr_set_port(&try_sa, port_cur); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) { + sport = port_cur; + goto found; + } else + sock_put(tmpsk); + } + } else { + /* try to find specific port */ + pn_sockaddr_set_port(&try_sa, sport); + tmpsk = pn_find_sock_by_sa(&try_sa); + if (tmpsk == NULL) + /* No sock there! We can use that port... */ + goto found; + else + sock_put(tmpsk); + } + mutex_unlock(&port_mutex); + + /* the port must be in use already */ + return -EADDRINUSE; + +found: + mutex_unlock(&port_mutex); + pn->sobject = pn_object(pn_addr(pn->sobject), sport); + return 0; +} +EXPORT_SYMBOL(pn_sock_get_port); diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c new file mode 100644 index 000000000000..600a4309b8c8 --- /dev/null +++ b/net/phonet/sysctl.c @@ -0,0 +1,113 @@ +/* + * File: sysctl.c + * + * Phonet /proc/sys/net/phonet interface implementation + * + * Copyright (C) 2008 Nokia Corporation. + * + * Contact: Remi Denis-Courmont <remi.denis-courmont@nokia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA + * 02110-1301 USA + */ + +#include <linux/seqlock.h> +#include <linux/sysctl.h> +#include <linux/errno.h> +#include <linux/init.h> + +#define DYNAMIC_PORT_MIN 0x40 +#define DYNAMIC_PORT_MAX 0x7f + +static DEFINE_SEQLOCK(local_port_range_lock); +static int local_port_range_min[2] = {0, 0}; +static int local_port_range_max[2] = {1023, 1023}; +static int local_port_range[2] = {DYNAMIC_PORT_MIN, DYNAMIC_PORT_MAX}; +static struct ctl_table_header *phonet_table_hrd; + +static void set_local_port_range(int range[2]) +{ + write_seqlock(&local_port_range_lock); + local_port_range[0] = range[0]; + local_port_range[1] = range[1]; + write_sequnlock(&local_port_range_lock); +} + +void phonet_get_local_port_range(int *min, int *max) +{ + unsigned seq; + do { + seq = read_seqbegin(&local_port_range_lock); + if (min) + *min = local_port_range[0]; + if (max) + *max = local_port_range[1]; + } while (read_seqretry(&local_port_range_lock, seq)); +} + +static int proc_local_port_range(ctl_table *table, int write, struct file *filp, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + int ret; + int range[2] = {local_port_range[0], local_port_range[1]}; + ctl_table tmp = { + .data = &range, + .maxlen = sizeof(range), + .mode = table->mode, + .extra1 = &local_port_range_min, + .extra2 = &local_port_range_max, + }; + + ret = proc_dointvec_minmax(&tmp, write, filp, buffer, lenp, ppos); + + if (write && ret == 0) { + if (range[1] < range[0]) + ret = -EINVAL; + else + set_local_port_range(range); + } + + return ret; +} + +static struct ctl_table phonet_table[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "local_port_range", + .data = &local_port_range, + .maxlen = sizeof(local_port_range), + .mode = 0644, + .proc_handler = &proc_local_port_range, + .strategy = NULL, + }, + { .ctl_name = 0 } +}; + +struct ctl_path phonet_ctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "phonet", .ctl_name = CTL_UNNUMBERED, }, + { }, +}; + +int __init phonet_sysctl_init(void) +{ + phonet_table_hrd = register_sysctl_paths(phonet_ctl_path, phonet_table); + return phonet_table_hrd == NULL ? -ENOMEM : 0; +} + +void phonet_sysctl_exit(void) +{ + unregister_sysctl_table(phonet_table_hrd); +} diff --git a/net/rfkill/rfkill-input.c b/net/rfkill/rfkill-input.c index e5b69556bb5b..bfdade72e066 100644 --- a/net/rfkill/rfkill-input.c +++ b/net/rfkill/rfkill-input.c @@ -16,6 +16,7 @@ #include <linux/workqueue.h> #include <linux/init.h> #include <linux/rfkill.h> +#include <linux/sched.h> #include "rfkill-input.h" @@ -255,6 +256,11 @@ static struct input_handler rfkill_handler = { static int __init rfkill_handler_init(void) { + unsigned long last_run = jiffies - msecs_to_jiffies(500); + rfkill_wlan.last = last_run; + rfkill_bt.last = last_run; + rfkill_uwb.last = last_run; + rfkill_wimax.last = last_run; return input_register_handler(&rfkill_handler); } diff --git a/net/rfkill/rfkill-input.h b/net/rfkill/rfkill-input.h index f63d05045685..bbfa646157c6 100644 --- a/net/rfkill/rfkill-input.h +++ b/net/rfkill/rfkill-input.h @@ -13,5 +13,6 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state); void rfkill_epo(void); +void rfkill_restore_states(void); #endif /* __RFKILL_INPUT_H */ diff --git a/net/rfkill/rfkill.c b/net/rfkill/rfkill.c index 74aecc098bad..25ba3bd57e66 100644 --- a/net/rfkill/rfkill.c +++ b/net/rfkill/rfkill.c @@ -37,14 +37,20 @@ MODULE_DESCRIPTION("RF switch support"); MODULE_LICENSE("GPL"); static LIST_HEAD(rfkill_list); /* list of registered rf switches */ -static DEFINE_MUTEX(rfkill_mutex); +static DEFINE_MUTEX(rfkill_global_mutex); static unsigned int rfkill_default_state = RFKILL_STATE_UNBLOCKED; module_param_named(default_state, rfkill_default_state, uint, 0444); MODULE_PARM_DESC(default_state, "Default initial state for all radio types, 0 = radio off"); -static enum rfkill_state rfkill_states[RFKILL_TYPE_MAX]; +struct rfkill_gsw_state { + enum rfkill_state current_state; + enum rfkill_state default_state; +}; + +static struct rfkill_gsw_state rfkill_global_states[RFKILL_TYPE_MAX]; +static unsigned long rfkill_states_lockdflt[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); @@ -70,6 +76,7 @@ static BLOCKING_NOTIFIER_HEAD(rfkill_notifier_list); */ int register_rfkill_notifier(struct notifier_block *nb) { + BUG_ON(!nb); return blocking_notifier_chain_register(&rfkill_notifier_list, nb); } EXPORT_SYMBOL_GPL(register_rfkill_notifier); @@ -85,6 +92,7 @@ EXPORT_SYMBOL_GPL(register_rfkill_notifier); */ int unregister_rfkill_notifier(struct notifier_block *nb) { + BUG_ON(!nb); return blocking_notifier_chain_unregister(&rfkill_notifier_list, nb); } EXPORT_SYMBOL_GPL(unregister_rfkill_notifier); @@ -117,6 +125,7 @@ static void rfkill_led_trigger_activate(struct led_classdev *led) static void notify_rfkill_state_change(struct rfkill *rfkill) { + rfkill_led_trigger(rfkill, rfkill->state); blocking_notifier_call_chain(&rfkill_notifier_list, RFKILL_STATE_CHANGED, rfkill); @@ -195,6 +204,11 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, * BLOCK even a transmitter that is already in state * RFKILL_STATE_HARD_BLOCKED */ break; + default: + WARN(1, KERN_WARNING + "rfkill: illegal state %d passed as parameter " + "to rfkill_toggle_radio\n", state); + return -EINVAL; } if (force || state != rfkill->state) { @@ -204,31 +218,36 @@ static int rfkill_toggle_radio(struct rfkill *rfkill, rfkill->state = state; } - if (force || rfkill->state != oldstate) { - rfkill_led_trigger(rfkill, rfkill->state); + if (force || rfkill->state != oldstate) notify_rfkill_state_change(rfkill); - } return retval; } /** - * rfkill_switch_all - Toggle state of all switches of given type + * __rfkill_switch_all - Toggle state of all switches of given type * @type: type of interfaces to be affected * @state: the new state * * This function toggles the state of all switches of given type, * unless a specific switch is claimed by userspace (in which case, * that switch is left alone) or suspended. + * + * Caller must have acquired rfkill_global_mutex. */ -void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) +static void __rfkill_switch_all(const enum rfkill_type type, + const enum rfkill_state state) { struct rfkill *rfkill; - mutex_lock(&rfkill_mutex); - - rfkill_states[type] = state; + if (WARN((state >= RFKILL_STATE_MAX || type >= RFKILL_TYPE_MAX), + KERN_WARNING + "rfkill: illegal state %d or type %d " + "passed as parameter to __rfkill_switch_all\n", + state, type)) + return; + rfkill_global_states[type].current_state = state; list_for_each_entry(rfkill, &rfkill_list, node) { if ((!rfkill->user_claim) && (rfkill->type == type)) { mutex_lock(&rfkill->mutex); @@ -236,8 +255,21 @@ void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) mutex_unlock(&rfkill->mutex); } } +} - mutex_unlock(&rfkill_mutex); +/** + * rfkill_switch_all - Toggle state of all switches of given type + * @type: type of interfaces to be affected + * @state: the new state + * + * Acquires rfkill_global_mutex and calls __rfkill_switch_all(@type, @state). + * Please refer to __rfkill_switch_all() for details. + */ +void rfkill_switch_all(enum rfkill_type type, enum rfkill_state state) +{ + mutex_lock(&rfkill_global_mutex); + __rfkill_switch_all(type, state); + mutex_unlock(&rfkill_global_mutex); } EXPORT_SYMBOL(rfkill_switch_all); @@ -245,23 +277,53 @@ EXPORT_SYMBOL(rfkill_switch_all); * rfkill_epo - emergency power off all transmitters * * This kicks all non-suspended rfkill devices to RFKILL_STATE_SOFT_BLOCKED, - * ignoring everything in its path but rfkill_mutex and rfkill->mutex. + * ignoring everything in its path but rfkill_global_mutex and rfkill->mutex. + * + * The global state before the EPO is saved and can be restored later + * using rfkill_restore_states(). */ void rfkill_epo(void) { struct rfkill *rfkill; + int i; + + mutex_lock(&rfkill_global_mutex); - mutex_lock(&rfkill_mutex); list_for_each_entry(rfkill, &rfkill_list, node) { mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); mutex_unlock(&rfkill->mutex); } - mutex_unlock(&rfkill_mutex); + for (i = 0; i < RFKILL_TYPE_MAX; i++) { + rfkill_global_states[i].default_state = + rfkill_global_states[i].current_state; + rfkill_global_states[i].current_state = + RFKILL_STATE_SOFT_BLOCKED; + } + mutex_unlock(&rfkill_global_mutex); } EXPORT_SYMBOL_GPL(rfkill_epo); /** + * rfkill_restore_states - restore global states + * + * Restore (and sync switches to) the global state from the + * states in rfkill_default_states. This can undo the effects of + * a call to rfkill_epo(). + */ +void rfkill_restore_states(void) +{ + int i; + + mutex_lock(&rfkill_global_mutex); + + for (i = 0; i < RFKILL_TYPE_MAX; i++) + __rfkill_switch_all(i, rfkill_global_states[i].default_state); + mutex_unlock(&rfkill_global_mutex); +} +EXPORT_SYMBOL_GPL(rfkill_restore_states); + +/** * rfkill_force_state - Force the internal rfkill radio state * @rfkill: pointer to the rfkill class to modify. * @state: the current radio state the class should be forced to. @@ -282,9 +344,11 @@ int rfkill_force_state(struct rfkill *rfkill, enum rfkill_state state) { enum rfkill_state oldstate; - if (state != RFKILL_STATE_SOFT_BLOCKED && - state != RFKILL_STATE_UNBLOCKED && - state != RFKILL_STATE_HARD_BLOCKED) + BUG_ON(!rfkill); + if (WARN((state >= RFKILL_STATE_MAX), + KERN_WARNING + "rfkill: illegal state %d passed as parameter " + "to rfkill_force_state\n", state)) return -EINVAL; mutex_lock(&rfkill->mutex); @@ -352,12 +416,16 @@ static ssize_t rfkill_state_store(struct device *dev, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); - unsigned int state = simple_strtoul(buf, NULL, 0); + unsigned long state; int error; if (!capable(CAP_NET_ADMIN)) return -EPERM; + error = strict_strtoul(buf, 0, &state); + if (error) + return error; + /* RFKILL_STATE_HARD_BLOCKED is illegal here... */ if (state != RFKILL_STATE_UNBLOCKED && state != RFKILL_STATE_SOFT_BLOCKED) @@ -385,7 +453,8 @@ static ssize_t rfkill_claim_store(struct device *dev, const char *buf, size_t count) { struct rfkill *rfkill = to_rfkill(dev); - bool claim = !!simple_strtoul(buf, NULL, 0); + unsigned long claim_tmp; + bool claim; int error; if (!capable(CAP_NET_ADMIN)) @@ -394,11 +463,16 @@ static ssize_t rfkill_claim_store(struct device *dev, if (rfkill->user_claim_unsupported) return -EOPNOTSUPP; + error = strict_strtoul(buf, 0, &claim_tmp); + if (error) + return error; + claim = !!claim_tmp; + /* * Take the global lock to make sure the kernel is not in * the middle of rfkill_switch_all */ - error = mutex_lock_interruptible(&rfkill_mutex); + error = mutex_lock_interruptible(&rfkill_global_mutex); if (error) return error; @@ -406,14 +480,14 @@ static ssize_t rfkill_claim_store(struct device *dev, if (!claim) { mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, - rfkill_states[rfkill->type], - 0); + rfkill_global_states[rfkill->type].current_state, + 0); mutex_unlock(&rfkill->mutex); } rfkill->user_claim = claim; } - mutex_unlock(&rfkill_mutex); + mutex_unlock(&rfkill_global_mutex); return error ? error : count; } @@ -437,21 +511,9 @@ static void rfkill_release(struct device *dev) #ifdef CONFIG_PM static int rfkill_suspend(struct device *dev, pm_message_t state) { - struct rfkill *rfkill = to_rfkill(dev); - - if (dev->power.power_state.event != state.event) { - if (state.event & PM_EVENT_SLEEP) { - /* Stop transmitter, keep state, no notifies */ - update_rfkill_state(rfkill); - - mutex_lock(&rfkill->mutex); - rfkill->toggle_radio(rfkill->data, - RFKILL_STATE_SOFT_BLOCKED); - mutex_unlock(&rfkill->mutex); - } - + /* mark class device as suspended */ + if (dev->power.power_state.event != state.event) dev->power.power_state = state; - } return 0; } @@ -525,24 +587,60 @@ static struct class rfkill_class = { .dev_uevent = rfkill_dev_uevent, }; +static int rfkill_check_duplicity(const struct rfkill *rfkill) +{ + struct rfkill *p; + unsigned long seen[BITS_TO_LONGS(RFKILL_TYPE_MAX)]; + + memset(seen, 0, sizeof(seen)); + + list_for_each_entry(p, &rfkill_list, node) { + if (WARN((p == rfkill), KERN_WARNING + "rfkill: illegal attempt to register " + "an already registered rfkill struct\n")) + return -EEXIST; + set_bit(p->type, seen); + } + + /* 0: first switch of its kind */ + return (test_bit(rfkill->type, seen)) ? 1 : 0; +} + static int rfkill_add_switch(struct rfkill *rfkill) { - mutex_lock(&rfkill_mutex); + int error; + + mutex_lock(&rfkill_global_mutex); - rfkill_toggle_radio(rfkill, rfkill_states[rfkill->type], 0); + error = rfkill_check_duplicity(rfkill); + if (error < 0) + goto unlock_out; + + if (!error) { + /* lock default after first use */ + set_bit(rfkill->type, rfkill_states_lockdflt); + rfkill_global_states[rfkill->type].current_state = + rfkill_global_states[rfkill->type].default_state; + } + + rfkill_toggle_radio(rfkill, + rfkill_global_states[rfkill->type].current_state, + 0); list_add_tail(&rfkill->node, &rfkill_list); - mutex_unlock(&rfkill_mutex); + error = 0; +unlock_out: + mutex_unlock(&rfkill_global_mutex); - return 0; + return error; } static void rfkill_remove_switch(struct rfkill *rfkill) { - mutex_lock(&rfkill_mutex); + mutex_lock(&rfkill_global_mutex); list_del_init(&rfkill->node); - mutex_unlock(&rfkill_mutex); + mutex_unlock(&rfkill_global_mutex); mutex_lock(&rfkill->mutex); rfkill_toggle_radio(rfkill, RFKILL_STATE_SOFT_BLOCKED, 1); @@ -562,11 +660,18 @@ static void rfkill_remove_switch(struct rfkill *rfkill) * NOTE: If registration fails the structure shoudl be freed by calling * rfkill_free() otherwise rfkill_unregister() should be used. */ -struct rfkill *rfkill_allocate(struct device *parent, enum rfkill_type type) +struct rfkill * __must_check rfkill_allocate(struct device *parent, + enum rfkill_type type) { struct rfkill *rfkill; struct device *dev; + if (WARN((type >= RFKILL_TYPE_MAX), + KERN_WARNING + "rfkill: illegal type %d passed as parameter " + "to rfkill_allocate\n", type)) + return NULL; + rfkill = kzalloc(sizeof(struct rfkill), GFP_KERNEL); if (!rfkill) return NULL; @@ -633,15 +738,18 @@ static void rfkill_led_trigger_unregister(struct rfkill *rfkill) * structure needs to be registered. Immediately from registration the * switch driver should be able to service calls to toggle_radio. */ -int rfkill_register(struct rfkill *rfkill) +int __must_check rfkill_register(struct rfkill *rfkill) { static atomic_t rfkill_no = ATOMIC_INIT(0); struct device *dev = &rfkill->dev; int error; - if (!rfkill->toggle_radio) - return -EINVAL; - if (rfkill->type >= RFKILL_TYPE_MAX) + if (WARN((!rfkill || !rfkill->toggle_radio || + rfkill->type >= RFKILL_TYPE_MAX || + rfkill->state >= RFKILL_STATE_MAX), + KERN_WARNING + "rfkill: attempt to register a " + "badly initialized rfkill struct\n")) return -EINVAL; snprintf(dev->bus_id, sizeof(dev->bus_id), @@ -676,6 +784,7 @@ EXPORT_SYMBOL(rfkill_register); */ void rfkill_unregister(struct rfkill *rfkill) { + BUG_ON(!rfkill); device_del(&rfkill->dev); rfkill_remove_switch(rfkill); rfkill_led_trigger_unregister(rfkill); @@ -683,6 +792,56 @@ void rfkill_unregister(struct rfkill *rfkill) } EXPORT_SYMBOL(rfkill_unregister); +/** + * rfkill_set_default - set initial value for a switch type + * @type - the type of switch to set the default state of + * @state - the new default state for that group of switches + * + * Sets the initial state rfkill should use for a given type. + * The following initial states are allowed: RFKILL_STATE_SOFT_BLOCKED + * and RFKILL_STATE_UNBLOCKED. + * + * This function is meant to be used by platform drivers for platforms + * that can save switch state across power down/reboot. + * + * The default state for each switch type can be changed exactly once. + * After a switch of that type is registered, the default state cannot + * be changed anymore. This guards against multiple drivers it the + * same platform trying to set the initial switch default state, which + * is not allowed. + * + * Returns -EPERM if the state has already been set once or is in use, + * so drivers likely want to either ignore or at most printk(KERN_NOTICE) + * if this function returns -EPERM. + * + * Returns 0 if the new default state was set, or an error if it + * could not be set. + */ +int rfkill_set_default(enum rfkill_type type, enum rfkill_state state) +{ + int error; + + if (WARN((type >= RFKILL_TYPE_MAX || + (state != RFKILL_STATE_SOFT_BLOCKED && + state != RFKILL_STATE_UNBLOCKED)), + KERN_WARNING + "rfkill: illegal state %d or type %d passed as " + "parameter to rfkill_set_default\n", state, type)) + return -EINVAL; + + mutex_lock(&rfkill_global_mutex); + + if (!test_and_set_bit(type, rfkill_states_lockdflt)) { + rfkill_global_states[type].default_state = state; + error = 0; + } else + error = -EPERM; + + mutex_unlock(&rfkill_global_mutex); + return error; +} +EXPORT_SYMBOL_GPL(rfkill_set_default); + /* * Rfkill module initialization/deinitialization. */ @@ -696,8 +855,8 @@ static int __init rfkill_init(void) rfkill_default_state != RFKILL_STATE_UNBLOCKED) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(rfkill_states); i++) - rfkill_states[i] = rfkill_default_state; + for (i = 0; i < RFKILL_TYPE_MAX; i++) + rfkill_global_states[i].default_state = rfkill_default_state; error = class_register(&rfkill_class); if (error) { diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 9437b27ff84d..6767e54155db 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -106,6 +106,15 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_MULTIQ + tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)" + ---help--- + Say Y here if you want to use an n-band queue packet scheduler + to support devices that have multiple hardware transmit queues. + + To compile this code as a module, choose M here: the + module will be called sch_multiq. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- @@ -476,6 +485,17 @@ config NET_ACT_SIMP To compile this code as a module, choose M here: the module will be called simple. +config NET_ACT_SKBEDIT + tristate "SKB Editing" + depends on NET_CLS_ACT + ---help--- + Say Y here to change skb priority or queue_mapping settings. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called skbedit. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index 1d2b0f7df848..e60c9925b269 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o +obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o @@ -26,6 +27,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o +obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 9974b3f04f05..8f457f1e0acf 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -494,7 +494,7 @@ struct tc_action *tcf_action_init_1(struct nlattr *nla, struct nlattr *est, a_o = tc_lookup_action_n(act_name); if (a_o == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES rtnl_unlock(); request_module("act_%s", act_name); rtnl_lock(); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d1263b3c96c3..0453d79ebf57 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = { static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { + struct xt_tgchk_param par; struct xt_target *target; int ret = 0; @@ -49,29 +50,30 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int return -ENOENT; t->u.kernel.target = target; - - ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), - table, hook, 0, 0); - if (ret) { + par.table = table; + par.entryinfo = NULL; + par.target = target; + par.targinfo = t->data; + par.hook_mask = hook; + par.family = NFPROTO_IPV4; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { module_put(t->u.kernel.target->me); return ret; } - if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(table, NULL, - t->u.kernel.target, t->data, - hook)) { - module_put(t->u.kernel.target->me); - ret = -EINVAL; - } - - return ret; + return 0; } static void ipt_destroy_target(struct ipt_entry_target *t) { - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); - module_put(t->u.kernel.target->me); + struct xt_tgdtor_param par = { + .target = t->u.kernel.target, + .targinfo = t->data, + }; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); } static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) @@ -196,6 +198,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, { int ret = 0, result = 0; struct tcf_ipt *ipt = a->priv; + struct xt_target_param par; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) @@ -211,10 +214,13 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed from earlier kernels */ - ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL, - ipt->tcfi_hook, - ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data); + par.in = skb->dev; + par.out = NULL; + par.hooknum = ipt->tcfi_hook; + par.target = ipt->tcfi_t->u.kernel.target; + par.targinfo = ipt->tcfi_t->data; + ret = par.target->target(skb, &par); + switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c new file mode 100644 index 000000000000..fe9777e77f35 --- /dev/null +++ b/net/sched/act_skbedit.c @@ -0,0 +1,203 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck <alexander.h.duyck@intel.com> + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/rtnetlink.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + +#include <linux/tc_act/tc_skbedit.h> +#include <net/tc_act/tc_skbedit.h> + +#define SKBEDIT_TAB_MASK 15 +static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1]; +static u32 skbedit_idx_gen; +static DEFINE_RWLOCK(skbedit_lock); + +static struct tcf_hashinfo skbedit_hash_info = { + .htab = tcf_skbedit_ht, + .hmask = SKBEDIT_TAB_MASK, + .lock = &skbedit_lock, +}; + +static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbedit *d = a->priv; + + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += qdisc_pkt_len(skb); + d->tcf_bstats.packets++; + + if (d->flags & SKBEDIT_F_PRIORITY) + skb->priority = d->priority; + if (d->flags & SKBEDIT_F_QUEUE_MAPPING && + skb->dev->real_num_tx_queues > d->queue_mapping) + skb_set_queue_mapping(skb, d->queue_mapping); + + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = { + [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) }, + [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) }, + [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) }, +}; + +static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; + struct tc_skbedit *parm; + struct tcf_skbedit *d; + struct tcf_common *pc; + u32 flags = 0, *priority = NULL; + u16 *queue_mapping = NULL; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy); + if (err < 0) + return err; + + if (tb[TCA_SKBEDIT_PARMS] == NULL) + return -EINVAL; + + if (tb[TCA_SKBEDIT_PRIORITY] != NULL) { + flags |= SKBEDIT_F_PRIORITY; + priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]); + } + + if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) { + flags |= SKBEDIT_F_QUEUE_MAPPING; + queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]); + } + if (!flags) + return -EINVAL; + + parm = nla_data(tb[TCA_SKBEDIT_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &skbedit_idx_gen, &skbedit_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_skbedit(pc); + ret = ACT_P_CREATED; + } else { + d = to_skbedit(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &skbedit_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&d->tcf_lock); + + d->flags = flags; + if (flags & SKBEDIT_F_PRIORITY) + d->priority = *priority; + if (flags & SKBEDIT_F_QUEUE_MAPPING) + d->queue_mapping = *queue_mapping; + d->tcf_action = parm->action; + + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &skbedit_hash_info); + return ret; +} + +static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbedit *d = a->priv; + + if (d) + return tcf_hash_release(&d->common, bind, &skbedit_hash_info); + return 0; +} + +static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbedit *d = a->priv; + struct tc_skbedit opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt); + if (d->flags & SKBEDIT_F_PRIORITY) + NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority), + &d->priority); + if (d->flags & SKBEDIT_F_QUEUE_MAPPING) + NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING, + sizeof(d->queue_mapping), &d->queue_mapping); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t); + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_skbedit_ops = { + .kind = "skbedit", + .hinfo = &skbedit_hash_info, + .type = TCA_ACT_SKBEDIT, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_skbedit, + .dump = tcf_skbedit_dump, + .cleanup = tcf_skbedit_cleanup, + .init = tcf_skbedit_init, + .walk = tcf_generic_walker, +}; + +MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>"); +MODULE_DESCRIPTION("SKB Editing"); +MODULE_LICENSE("GPL"); + +static int __init skbedit_init_module(void) +{ + return tcf_register_action(&act_skbedit_ops); +} + +static void __exit skbedit_cleanup_module(void) +{ + tcf_unregister_action(&act_skbedit_ops); +} + +module_init(skbedit_init_module); +module_exit(skbedit_cleanup_module); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 8eb79e92e94c..16e7ac9774e5 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -227,7 +227,7 @@ replay: err = -ENOENT; tp_ops = tcf_proto_lookup_ops(tca[TCA_KIND]); if (tp_ops == NULL) { -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES struct nlattr *kind = tca[TCA_KIND]; char name[IFNAMSIZ]; diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 8f63a1a94014..0ebaff637e31 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr) static u32 flow_get_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->saddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]); default: return addr_fold(skb->sk); @@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb) static u32 flow_get_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(ip_hdr(skb)->daddr); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]); default: return addr_fold(skb->dst) ^ (__force u16)skb->protocol; @@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb) static u32 flow_get_proto(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ip_hdr(skb)->protocol; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ipv6_hdr(skb)->nexthdr; default: return 0; @@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) u32 res = 0; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): { + case htons(ETH_P_IP): { struct iphdr *iph = ip_hdr(skb); if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) && @@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb) res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2)); break; } - case __constant_htons(ETH_P_IPV6): { + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); if (has_ports(iph->nexthdr)) @@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb) static u32 flow_get_nfct_src(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, src.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, src.u3.ip6[3])); } fallback: @@ -225,9 +225,9 @@ fallback: static u32 flow_get_nfct_dst(const struct sk_buff *skb) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): return ntohl(CTTUPLE(skb, dst.u3.ip)); - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): return ntohl(CTTUPLE(skb, dst.u3.ip6[3])); } fallback: diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index cc49c932641d..bc450397487a 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -14,6 +14,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/tc_ematch/tc_em_cmp.h> +#include <asm/unaligned.h> #include <net/pkt_cls.h> static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp) @@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, break; case TCF_EM_ALIGN_U16: - val = *ptr << 8; - val |= *(ptr+1); + val = get_unaligned_be16(ptr); if (cmp_needs_transformation(cmp)) val = be16_to_cpu(val); @@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em, case TCF_EM_ALIGN_U32: /* Worth checking boundries? The branching seems * to get worse. Visit again. */ - val = *ptr << 24; - val |= *(ptr+1) << 16; - val |= *(ptr+2) << 8; - val |= *(ptr+3); + val = get_unaligned_be32(ptr); if (cmp_needs_transformation(cmp)) val = be32_to_cpu(val); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 5e6f82e0e6f3..e82519e548d7 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -224,7 +224,7 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES __rtnl_unlock(); request_module("ematch-kind-%u", em_hdr->kind); rtnl_lock(); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 1122c952aa99..6ab4a2f92ca0 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -417,6 +417,8 @@ static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab) struct nlattr *nest; nest = nla_nest_start(skb, TCA_STAB); + if (nest == NULL) + goto nla_put_failure; NLA_PUT(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts); nla_nest_end(skb, nest); @@ -764,7 +766,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, struct qdisc_size_table *stab; ops = qdisc_lookup_ops(kind); -#ifdef CONFIG_KMOD +#ifdef CONFIG_MODULES if (ops == NULL && kind != NULL) { char name[IFNAMSIZ]; if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) { diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 8b06fa900482..03e389e8d945 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -545,9 +545,10 @@ static void cbq_ovl_delay(struct cbq_class *cl) expires = ktime_set(0, 0); expires = ktime_add_ns(expires, PSCHED_US2NS(sched)); if (hrtimer_try_to_cancel(&q->delay_timer) && - ktime_to_ns(ktime_sub(q->delay_timer.expires, - expires)) > 0) - q->delay_timer.expires = expires; + ktime_to_ns(ktime_sub( + hrtimer_get_expires(&q->delay_timer), + expires)) > 0) + hrtimer_set_expires(&q->delay_timer, expires); hrtimer_restart(&q->delay_timer); cl->delayed = 1; cl->xstats.overactions++; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index edd1298f85f6..ba43aab3a851 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (p->set_tc_index) { switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): if (skb_cow_head(skb, sizeof(struct iphdr))) goto drop; @@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch) & ~INET_ECN_MASK; break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): if (skb_cow_head(skb, sizeof(struct ipv6hdr))) goto drop; @@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch) pr_debug("index %d->%d\n", skb->tc_index, index); switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): ipv4_change_dsfield(ip_hdr(skb), p->mask[index], p->value[index]); break; - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index], p->value[index]); break; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index ec0a0839ce51..cdcd16fcfeda 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -44,23 +44,30 @@ static inline int qdisc_qlen(struct Qdisc *q) static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { - if (unlikely(skb->next)) - q->gso_skb = skb; - else - q->ops->requeue(skb, q); - + q->gso_skb = skb; + q->qstats.requeues++; __netif_schedule(q); + return 0; } static inline struct sk_buff *dequeue_skb(struct Qdisc *q) { - struct sk_buff *skb; + struct sk_buff *skb = q->gso_skb; - if ((skb = q->gso_skb)) - q->gso_skb = NULL; - else + if (unlikely(skb)) { + struct net_device *dev = qdisc_dev(q); + struct netdev_queue *txq; + + /* check the reason of requeuing without tx lock first */ + txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb)); + if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq)) + q->gso_skb = NULL; + else + skb = NULL; + } else { skb = q->dequeue(q); + } return skb; } @@ -263,6 +270,8 @@ static void dev_watchdog_down(struct net_device *dev) void netif_carrier_on(struct net_device *dev) { if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); if (netif_running(dev)) __netdev_watchdog_up(dev); @@ -278,8 +287,11 @@ EXPORT_SYMBOL(netif_carrier_on); */ void netif_carrier_off(struct net_device *dev) { - if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) + if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) { + if (dev->reg_state == NETREG_UNINITIALIZED) + return; linkwatch_fire_event(dev); + } } EXPORT_SYMBOL(netif_carrier_off); @@ -319,6 +331,7 @@ struct Qdisc_ops noop_qdisc_ops __read_mostly = { static struct netdev_queue noop_netdev_queue = { .qdisc = &noop_qdisc, + .qdisc_sleeping = &noop_qdisc, }; struct Qdisc noop_qdisc = { @@ -327,6 +340,7 @@ struct Qdisc noop_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, .list = LIST_HEAD_INIT(noop_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, }; @@ -344,6 +358,7 @@ static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = { static struct Qdisc noqueue_qdisc; static struct netdev_queue noqueue_netdev_queue = { .qdisc = &noqueue_qdisc, + .qdisc_sleeping = &noqueue_qdisc, }; static struct Qdisc noqueue_qdisc = { @@ -352,6 +367,7 @@ static struct Qdisc noqueue_qdisc = { .flags = TCQ_F_BUILTIN, .ops = &noqueue_qdisc_ops, .list = LIST_HEAD_INIT(noqueue_qdisc.list), + .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock), .dev_queue = &noqueue_netdev_queue, }; @@ -472,6 +488,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch->padded = (char *) sch - (char *) p; INIT_LIST_HEAD(&sch->list); + skb_queue_head_init(&sch->requeue); skb_queue_head_init(&sch->q); sch->ops = ops; sch->enqueue = ops->enqueue; @@ -540,6 +557,7 @@ void qdisc_destroy(struct Qdisc *qdisc) dev_put(qdisc_dev(qdisc)); kfree_skb(qdisc->gso_skb); + __skb_queue_purge(&qdisc->requeue); kfree((char *) qdisc - qdisc->padded); } diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c new file mode 100644 index 000000000000..915f3149dde2 --- /dev/null +++ b/net/sched/sch_multiq.c @@ -0,0 +1,477 @@ +/* + * Copyright (c) 2008, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Author: Alexander Duyck <alexander.h.duyck@intel.com> + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/netlink.h> +#include <net/pkt_sched.h> + + +struct multiq_sched_data { + u16 bands; + u16 max_bands; + u16 curband; + struct tcf_proto *filter_list; + struct Qdisc **queues; +}; + + +static struct Qdisc * +multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + u32 band; + struct tcf_result res; + int err; + + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; + err = tc_classify(skb, q->filter_list, &res); +#ifdef CONFIG_NET_CLS_ACT + switch (err) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + case TC_ACT_SHOT: + return NULL; + } +#endif + band = skb_get_queue_mapping(skb); + + if (band >= q->bands) + return q->queues[0]; + + return q->queues[band]; +} + +static int +multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc_enqueue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->bstats.bytes += qdisc_pkt_len(skb); + sch->bstats.packets++; + sch->q.qlen++; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static int +multiq_requeue(struct sk_buff *skb, struct Qdisc *sch) +{ + struct Qdisc *qdisc; + struct multiq_sched_data *q = qdisc_priv(sch); + int ret; + + qdisc = multiq_classify(skb, sch, &ret); +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + if (ret & __NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif + + ret = qdisc->ops->requeue(skb, qdisc); + if (ret == NET_XMIT_SUCCESS) { + sch->q.qlen++; + sch->qstats.requeues++; + if (q->curband) + q->curband--; + else + q->curband = q->bands - 1; + return NET_XMIT_SUCCESS; + } + if (net_xmit_drop_count(ret)) + sch->qstats.drops++; + return ret; +} + + +static struct sk_buff *multiq_dequeue(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + struct sk_buff *skb; + int band; + + for (band = 0; band < q->bands; band++) { + /* cycle through bands to ensure fairness */ + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + + /* Check that target subqueue is available before + * pulling an skb to avoid excessive requeues + */ + if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } + } + } + return NULL; + +} + +static unsigned int multiq_drop(struct Qdisc *sch) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + unsigned int len; + struct Qdisc *qdisc; + + for (band = q->bands-1; band >= 0; band--) { + qdisc = q->queues[band]; + if (qdisc->ops->drop) { + len = qdisc->ops->drop(qdisc); + if (len != 0) { + sch->q.qlen--; + return len; + } + } + } + return 0; +} + + +static void +multiq_reset(struct Qdisc *sch) +{ + u16 band; + struct multiq_sched_data *q = qdisc_priv(sch); + + for (band = 0; band < q->bands; band++) + qdisc_reset(q->queues[band]); + sch->q.qlen = 0; + q->curband = 0; +} + +static void +multiq_destroy(struct Qdisc *sch) +{ + int band; + struct multiq_sched_data *q = qdisc_priv(sch); + + tcf_destroy_chain(&q->filter_list); + for (band = 0; band < q->bands; band++) + qdisc_destroy(q->queues[band]); + + kfree(q->queues); +} + +static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct tc_multiq_qopt *qopt; + int i; + + if (!netif_is_multiqueue(qdisc_dev(sch))) + return -EINVAL; + if (nla_len(opt) < sizeof(*qopt)) + return -EINVAL; + + qopt = nla_data(opt); + + qopt->bands = qdisc_dev(sch)->real_num_tx_queues; + + sch_tree_lock(sch); + q->bands = qopt->bands; + for (i = q->bands; i < q->max_bands; i++) { + if (q->queues[i] != &noop_qdisc) { + struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc); + qdisc_tree_decrease_qlen(child, child->q.qlen); + qdisc_destroy(child); + } + } + + sch_tree_unlock(sch); + + for (i = 0; i < q->bands; i++) { + if (q->queues[i] == &noop_qdisc) { + struct Qdisc *child; + child = qdisc_create_dflt(qdisc_dev(sch), + sch->dev_queue, + &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, + i + 1)); + if (child) { + sch_tree_lock(sch); + child = xchg(&q->queues[i], child); + + if (child != &noop_qdisc) { + qdisc_tree_decrease_qlen(child, + child->q.qlen); + qdisc_destroy(child); + } + sch_tree_unlock(sch); + } + } + } + return 0; +} + +static int multiq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int i, err; + + q->queues = NULL; + + if (opt == NULL) + return -EINVAL; + + q->max_bands = qdisc_dev(sch)->num_tx_queues; + + q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL); + if (!q->queues) + return -ENOBUFS; + for (i = 0; i < q->max_bands; i++) + q->queues[i] = &noop_qdisc; + + err = multiq_tune(sch,opt); + + if (err) + kfree(q->queues); + + return err; +} + +static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned char *b = skb_tail_pointer(skb); + struct tc_multiq_qopt opt; + + opt.bands = q->bands; + opt.max_bands = q->max_bands; + + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, + struct Qdisc **old) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return -EINVAL; + + if (new == NULL) + new = &noop_qdisc; + + sch_tree_lock(sch); + *old = q->queues[band]; + q->queues[band] = new; + qdisc_tree_decrease_qlen(*old, (*old)->q.qlen); + qdisc_reset(*old); + sch_tree_unlock(sch); + + return 0; +} + +static struct Qdisc * +multiq_leaf(struct Qdisc *sch, unsigned long arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = arg - 1; + + if (band >= q->bands) + return NULL; + + return q->queues[band]; +} + +static unsigned long multiq_get(struct Qdisc *sch, u32 classid) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + unsigned long band = TC_H_MIN(classid); + + if (band - 1 >= q->bands) + return 0; + return band; +} + +static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, + u32 classid) +{ + return multiq_get(sch, classid); +} + + +static void multiq_put(struct Qdisc *q, unsigned long cl) +{ + return; +} + +static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent, + struct nlattr **tca, unsigned long *arg) +{ + unsigned long cl = *arg; + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + +static int multiq_delete(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + if (cl - 1 > q->bands) + return -ENOENT; + return 0; +} + + +static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl - 1 > q->bands) + return -ENOENT; + tcm->tcm_handle |= TC_H_MIN(cl); + if (q->queues[cl-1]) + tcm->tcm_info = q->queues[cl-1]->handle; + return 0; +} + +static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + struct Qdisc *cl_q; + + cl_q = q->queues[cl - 1]; + if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 || + gnet_stats_copy_queue(d, &cl_q->qstats) < 0) + return -1; + + return 0; +} + +static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + int band; + + if (arg->stop) + return; + + for (band = 0; band < q->bands; band++) { + if (arg->count < arg->skip) { + arg->count++; + continue; + } + if (arg->fn(sch, band+1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl) +{ + struct multiq_sched_data *q = qdisc_priv(sch); + + if (cl) + return NULL; + return &q->filter_list; +} + +static const struct Qdisc_class_ops multiq_class_ops = { + .graft = multiq_graft, + .leaf = multiq_leaf, + .get = multiq_get, + .put = multiq_put, + .change = multiq_change, + .delete = multiq_delete, + .walk = multiq_walk, + .tcf_chain = multiq_find_tcf, + .bind_tcf = multiq_bind, + .unbind_tcf = multiq_put, + .dump = multiq_dump_class, + .dump_stats = multiq_dump_class_stats, +}; + +static struct Qdisc_ops multiq_qdisc_ops __read_mostly = { + .next = NULL, + .cl_ops = &multiq_class_ops, + .id = "multiq", + .priv_size = sizeof(struct multiq_sched_data), + .enqueue = multiq_enqueue, + .dequeue = multiq_dequeue, + .requeue = multiq_requeue, + .drop = multiq_drop, + .init = multiq_init, + .reset = multiq_reset, + .destroy = multiq_destroy, + .change = multiq_tune, + .dump = multiq_dump, + .owner = THIS_MODULE, +}; + +static int __init multiq_module_init(void) +{ + return register_qdisc(&multiq_qdisc_ops); +} + +static void __exit multiq_module_exit(void) +{ + unregister_qdisc(&multiq_qdisc_ops); +} + +module_init(multiq_module_init) +module_exit(multiq_module_exit) + +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 3781e55046d0..a11959908d9a 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) }, }; +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len < 0) + return -EINVAL; + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy); + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + /* Parse netlink message to set options */ static int netem_change(struct Qdisc *sch, struct nlattr *opt) { @@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy, - qopt, sizeof(*qopt)); + qopt = nla_data(opt); + ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt)); if (ret < 0) return ret; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index a6697c686c7f..504a78cdb718 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); - struct nlattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt); - if (nest == NULL) - goto nla_put_failure; - nla_nest_compat_end(skb, nest); + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); return skb->len; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6e041d10dbdb..fe1508ef0d3d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) u32 h, h2; switch (skb->protocol) { - case __constant_htons(ETH_P_IP): + case htons(ETH_P_IP): { const struct iphdr *iph = ip_hdr(skb); h = iph->daddr; @@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb) h2 ^= *(((u32*)iph) + iph->ihl); break; } - case __constant_htons(ETH_P_IPV6): + case htons(ETH_P_IPV6): { struct ipv6hdr *iph = ipv6_hdr(skb); h = iph->daddr.s6_addr32[3]; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index abd51cef2413..f4b23043b610 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -283,8 +283,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a if (!sctp_ulpq_init(&asoc->ulpq, asoc)) goto fail_init; - /* Set up the tsn tracking. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, 0); + memset(&asoc->peer.tsn_map, 0, sizeof(struct sctp_tsnmap)); asoc->need_ecne = 0; @@ -402,6 +401,8 @@ void sctp_association_free(struct sctp_association *asoc) /* Dispose of any pending chunks on the inqueue. */ sctp_inq_free(&asoc->base.inqueue); + sctp_tsnmap_free(&asoc->peer.tsn_map); + /* Free ssnmap storage. */ sctp_ssnmap_free(asoc->ssnmap); @@ -1122,8 +1123,8 @@ void sctp_assoc_update(struct sctp_association *asoc, asoc->peer.rwnd = new->peer.rwnd; asoc->peer.sack_needed = new->peer.sack_needed; asoc->peer.i = new->peer.i; - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, GFP_ATOMIC); /* Remove any peer addresses not present in the new association. */ list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index f62bc2468935..6d5944a745d4 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -457,7 +457,7 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, { int error = 0; - if (sctp_is_any(addr)) { + if (sctp_is_any(NULL, addr)) { error = sctp_copy_local_addr_list(dest, scope, gfp, flags); } else if (sctp_in_scope(addr, scope)) { /* Now that the address is in scope, check to see if @@ -477,11 +477,21 @@ static int sctp_copy_one_addr(struct sctp_bind_addr *dest, } /* Is this a wildcard address? */ -int sctp_is_any(const union sctp_addr *addr) +int sctp_is_any(struct sock *sk, const union sctp_addr *addr) { - struct sctp_af *af = sctp_get_af_specific(addr->sa.sa_family); + unsigned short fam = 0; + struct sctp_af *af; + + /* Try to get the right address family */ + if (addr->sa.sa_family != AF_UNSPEC) + fam = addr->sa.sa_family; + else if (sk) + fam = sk->sk_family; + + af = sctp_get_af_specific(fam); if (!af) return 0; + return af->is_any(addr); } diff --git a/net/sctp/input.c b/net/sctp/input.c index a49fa80b57b9..bf612d954d41 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -369,7 +369,7 @@ static void sctp_add_backlog(struct sock *sk, struct sk_buff *skb) void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc, struct sctp_transport *t, __u32 pmtu) { - if (!t || (t->pathmtu == pmtu)) + if (!t || (t->pathmtu <= pmtu)) return; if (sock_owned_by_user(sk)) { diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 47f91afa0211..4124bbb99947 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -156,7 +156,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, skb->network_header = saveip; skb->transport_header = savesctp; if (!sk) { - ICMP6_INC_STATS_BH(idev, ICMP6_MIB_INERRORS); + ICMP6_INC_STATS_BH(dev_net(skb->dev), idev, ICMP6_MIB_INERRORS); goto out; } @@ -837,6 +837,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, struct sctp_sock *opt) { struct sctp_af *af1, *af2; + struct sock *sk = sctp_opt2sk(opt); af1 = sctp_get_af_specific(addr1->sa.sa_family); af2 = sctp_get_af_specific(addr2->sa.sa_family); @@ -845,11 +846,11 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, return 0; /* If the socket is IPv6 only, v4 addrs will not match */ - if (__ipv6_only_sock(sctp_opt2sk(opt)) && af1 != af2) + if (__ipv6_only_sock(sk) && af1 != af2) return 0; /* Today, wildcard AF_INET/AF_INET6. */ - if (sctp_is_any(addr1) || sctp_is_any(addr2)) + if (sctp_is_any(sk, addr1) || sctp_is_any(sk, addr2)) return 1; if (addr1->sa.sa_family != addr2->sa.sa_family) diff --git a/net/sctp/output.c b/net/sctp/output.c index 225c7123c41f..c3f417f7ec6e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -699,7 +699,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, * When a Fast Retransmit is being performed the sender SHOULD * ignore the value of cwnd and SHOULD NOT delay retransmission. */ - if (chunk->fast_retransmit <= 0) + if (chunk->fast_retransmit != SCTP_NEED_FRTX) if (transport->flight_size >= transport->cwnd) { retval = SCTP_XMIT_RWND_FULL; goto finish; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 4328ad5439c9..247ebc95c1e5 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -420,7 +420,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * be added to the retransmit queue. */ if ((reason == SCTP_RTXR_FAST_RTX && - (chunk->fast_retransmit > 0)) || + (chunk->fast_retransmit == SCTP_NEED_FRTX)) || (reason != SCTP_RTXR_FAST_RTX && !chunk->tsn_gap_acked)) { /* If this chunk was sent less then 1 rto ago, do not * retransmit this chunk, but give the peer time @@ -650,8 +650,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. */ - if (chunk->fast_retransmit > 0) - chunk->fast_retransmit = -1; + if (chunk->fast_retransmit == SCTP_NEED_FRTX) + chunk->fast_retransmit = SCTP_DONT_FRTX; /* Force start T3-rtx timer when fast retransmitting * the earliest outstanding TSN @@ -680,8 +680,8 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, */ if (rtx_timeout || fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; + if (chunk1->fast_retransmit == SCTP_NEED_FRTX) + chunk1->fast_retransmit = SCTP_DONT_FRTX; } } @@ -1129,12 +1129,13 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) unsigned outstanding; struct sctp_transport *primary = asoc->peer.primary_path; int count_of_newacks = 0; + int gap_ack_blocks; /* Grab the association's destination address list. */ transport_list = &asoc->peer.transport_addr_list; sack_ctsn = ntohl(sack->cum_tsn_ack); - + gap_ack_blocks = ntohs(sack->num_gap_ack_blocks); /* * SFR-CACC algorithm: * On receipt of a SACK the sender SHOULD execute the @@ -1144,35 +1145,38 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) * on the current primary, the CHANGEOVER_ACTIVE flag SHOULD be * cleared. The CYCLING_CHANGEOVER flag SHOULD also be cleared for * all destinations. - */ - if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { - primary->cacc.changeover_active = 0; - list_for_each_entry(transport, transport_list, - transports) { - transport->cacc.cycling_changeover = 0; - } - } - - /* - * SFR-CACC algorithm: * 2) If the SACK contains gap acks and the flag CHANGEOVER_ACTIVE * is set the receiver of the SACK MUST take the following actions: * * A) Initialize the cacc_saw_newack to 0 for all destination * addresses. + * + * Only bother if changeover_active is set. Otherwise, this is + * totally suboptimal to do on every SACK. */ - if (sack->num_gap_ack_blocks && - primary->cacc.changeover_active) { - list_for_each_entry(transport, transport_list, transports) { - transport->cacc.cacc_saw_newack = 0; + if (primary->cacc.changeover_active) { + u8 clear_cycling = 0; + + if (TSN_lte(primary->cacc.next_tsn_at_change, sack_ctsn)) { + primary->cacc.changeover_active = 0; + clear_cycling = 1; + } + + if (clear_cycling || gap_ack_blocks) { + list_for_each_entry(transport, transport_list, + transports) { + if (clear_cycling) + transport->cacc.cycling_changeover = 0; + if (gap_ack_blocks) + transport->cacc.cacc_saw_newack = 0; + } } } /* Get the highest TSN in the sack. */ highest_tsn = sack_ctsn; - if (sack->num_gap_ack_blocks) - highest_tsn += - ntohs(frags[ntohs(sack->num_gap_ack_blocks) - 1].gab.end); + if (gap_ack_blocks) + highest_tsn += ntohs(frags[gap_ack_blocks - 1].gab.end); if (TSN_lt(asoc->highest_sacked, highest_tsn)) { highest_new_tsn = highest_tsn; @@ -1181,11 +1185,11 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) highest_new_tsn = sctp_highest_new_tsn(sack, asoc); } + /* Run through the retransmit queue. Credit bytes received * and free those chunks that we can. */ sctp_check_transmitted(q, &q->retransmit, NULL, sack, highest_new_tsn); - sctp_mark_missing(q, &q->retransmit, NULL, highest_new_tsn, 0); /* Run through the transmitted queue. * Credit bytes received and free those chunks which we can. @@ -1204,9 +1208,10 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_sackhdr *sack) count_of_newacks ++; } - list_for_each_entry(transport, transport_list, transports) { - sctp_mark_missing(q, &transport->transmitted, transport, - highest_new_tsn, count_of_newacks); + if (gap_ack_blocks) { + list_for_each_entry(transport, transport_list, transports) + sctp_mark_missing(q, &transport->transmitted, transport, + highest_new_tsn, count_of_newacks); } /* Move the Cumulative TSN Ack Point if appropriate. */ @@ -1651,7 +1656,7 @@ static void sctp_mark_missing(struct sctp_outq *q, * chunk if it has NOT been fast retransmitted or marked for * fast retransmit already. */ - if (!chunk->fast_retransmit && + if (chunk->fast_retransmit == SCTP_CAN_FRTX && !chunk->tsn_gap_acked && TSN_lt(tsn, highest_new_tsn_in_sack)) { @@ -1676,7 +1681,7 @@ static void sctp_mark_missing(struct sctp_outq *q, */ if (chunk->tsn_missing_report >= 3) { - chunk->fast_retransmit = 1; + chunk->fast_retransmit = SCTP_NEED_FRTX; do_fast_retransmit = 1; } } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index b599cbba4fbe..fd8acb48c3f2 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -702,12 +702,14 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) __u32 ctsn; __u16 num_gabs, num_dup_tsns; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; + struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); SCTP_DEBUG_PRINTK("sackCTSNAck sent: 0x%x.\n", ctsn); /* How much room is needed in the chunk? */ - num_gabs = sctp_tsnmap_num_gabs(map); + num_gabs = sctp_tsnmap_num_gabs(map, gabs); num_dup_tsns = sctp_tsnmap_num_dups(map); /* Initialize the SACK header. */ @@ -763,7 +765,7 @@ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) /* Add the gap ack block information. */ if (num_gabs) sctp_addto_chunk(retval, sizeof(__u32) * num_gabs, - sctp_tsnmap_get_gabs(map)); + gabs); /* Add the duplicate TSN information. */ if (num_dup_tsns) @@ -1012,6 +1014,29 @@ end: return retval; } +struct sctp_chunk *sctp_make_violation_paramlen( + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) +{ + struct sctp_chunk *retval; + static const char error[] = "The following parameter had invalid length:"; + size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + sizeof(sctp_paramhdr_t); + + retval = sctp_make_abort(asoc, chunk, payload_len); + if (!retval) + goto nodata; + + sctp_init_cause(retval, SCTP_ERROR_PROTO_VIOLATION, + sizeof(error) + sizeof(sctp_paramhdr_t)); + sctp_addto_chunk(retval, sizeof(error), error); + sctp_addto_param(retval, sizeof(sctp_paramhdr_t), param); + +nodata: + return retval; +} + /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, const struct sctp_transport *transport, @@ -1188,7 +1213,7 @@ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, */ retval->tsn_missing_report = 0; retval->tsn_gap_acked = 0; - retval->fast_retransmit = 0; + retval->fast_retransmit = SCTP_CAN_FRTX; /* If this is a fragmented message, track all fragments * of the message (for SEND_FAILED). @@ -1782,11 +1807,6 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, const struct sctp_chunk *chunk, struct sctp_chunk **errp) { - static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = WORD_ROUND(sizeof(error)) + - sizeof(sctp_paramhdr_t); - - /* This is a fatal error. Any accumulated non-fatal errors are * not reported. */ @@ -1794,14 +1814,7 @@ static int sctp_process_inv_paramlength(const struct sctp_association *asoc, sctp_chunk_free(*errp); /* Create an error chunk and fill it in with our payload. */ - *errp = sctp_make_op_error_space(asoc, chunk, payload_len); - - if (*errp) { - sctp_init_cause(*errp, SCTP_ERROR_PROTO_VIOLATION, - sizeof(error) + sizeof(sctp_paramhdr_t)); - sctp_addto_chunk(*errp, sizeof(error), error); - sctp_addto_param(*errp, sizeof(sctp_paramhdr_t), param); - } + *errp = sctp_make_violation_paramlen(asoc, chunk, param); return 0; } @@ -2277,8 +2290,9 @@ int sctp_process_init(struct sctp_association *asoc, sctp_cid_t cid, } /* Set up the TSN tracking pieces. */ - sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_SIZE, - asoc->peer.i.initial_tsn); + if (!sctp_tsnmap_init(&asoc->peer.tsn_map, SCTP_TSN_MAP_INITIAL, + asoc->peer.i.initial_tsn, gfp)) + goto clean_up; /* RFC 2960 6.5 Stream Identifier and Stream Sequence Number * @@ -2456,7 +2470,7 @@ do_addr_param: break; case SCTP_PARAM_ADAPTATION_LAYER_IND: - asoc->peer.adaptation_ind = param.aind->adaptation_ind; + asoc->peer.adaptation_ind = ntohl(param.aind->adaptation_ind); break; case SCTP_PARAM_SET_PRIMARY: diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 9732c797e8ed..e1d6076b4f59 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -889,6 +889,35 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, sctp_ulpq_tail_event(&asoc->ulpq, ev); } + +static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, + sctp_event_timeout_t timer, + char *name) +{ + struct sctp_transport *t; + + t = asoc->init_last_sent_to; + asoc->init_err_counter++; + + if (t->init_sent_count > (asoc->init_cycle + 1)) { + asoc->timeouts[timer] *= 2; + if (asoc->timeouts[timer] > asoc->max_init_timeo) { + asoc->timeouts[timer] = asoc->max_init_timeo; + } + asoc->init_cycle++; + SCTP_DEBUG_PRINTK( + "T1 %s Timeout adjustment" + " init_err_counter: %d" + " cycle: %d" + " timeout: %ld\n", + name, + asoc->init_err_counter, + asoc->init_cycle, + asoc->timeouts[timer]); + } + +} + /* These three macros allow us to pull the debugging code out of the * main flow of sctp_do_sm() to keep attention focused on the real * functionality there. @@ -1123,7 +1152,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_REPORT_TSN: /* Record the arrival of a TSN. */ - sctp_tsnmap_mark(&asoc->peer.tsn_map, cmd->obj.u32); + error = sctp_tsnmap_mark(&asoc->peer.tsn_map, + cmd->obj.u32); break; case SCTP_CMD_REPORT_FWDTSN: @@ -1196,6 +1226,11 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(cmd->obj.ptr)); + if (new_obj->transport) { + new_obj->transport->init_sent_count++; + asoc->init_last_sent_to = new_obj->transport; + } + /* FIXME - Eventually come up with a cleaner way to * enabling COOKIE-ECHO + DATA bundling during * multihoming stale cookie scenarios, the following @@ -1345,26 +1380,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - t = asoc->init_last_sent_to; - asoc->init_err_counter++; - - if (t->init_sent_count > (asoc->init_cycle + 1)) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT] = - asoc->max_init_timeo; - } - asoc->init_cycle++; - SCTP_DEBUG_PRINTK( - "T1 INIT Timeout adjustment" - " init_err_counter: %d" - " cycle: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->init_cycle, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_INIT]); - } + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_INIT, + "INIT"); sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); @@ -1377,20 +1395,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, * all transports have been tried at the current * timeout. */ - asoc->init_err_counter++; - - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] *= 2; - if (asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] > - asoc->max_init_timeo) { - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE] = - asoc->max_init_timeo; - } - SCTP_DEBUG_PRINTK( - "T1 COOKIE Timeout adjustment" - " init_err_counter: %d" - " timeout: %ld\n", - asoc->init_err_counter, - asoc->timeouts[SCTP_EVENT_TIMEOUT_T1_COOKIE]); + sctp_cmd_t1_timer_update(asoc, + SCTP_EVENT_TIMEOUT_T1_COOKIE, + "COOKIE"); /* If we've sent any data bundled with * COOKIE-ECHO we need to resend. @@ -1422,6 +1429,10 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_INIT_COUNTER_RESET: asoc->init_err_counter = 0; asoc->init_cycle = 0; + list_for_each_entry(t, &asoc->peer.transport_addr_list, + transports) { + t->init_sent_count = 0; + } break; case SCTP_CMD_REPORT_DUP: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 8848d329aa2c..a6a0ea71ae93 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -119,7 +119,7 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, + void *arg, void *ext, sctp_cmd_seq_t *commands); static sctp_disposition_t sctp_sf_violation_ctsn( @@ -315,8 +315,10 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* 3.1 A packet containing an INIT chunk MUST have a zero Verification * Tag. @@ -635,8 +637,10 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. */ - if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) + if (ep == sctp_sk((sctp_get_ctl_sock()))->ep) { + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); return sctp_sf_tabort_8_4_8(ep, asoc, type, arg, commands); + } /* Make sure that the COOKIE_ECHO chunk has a valid length. * In this case, we check that we have enough for at least a @@ -2076,10 +2080,6 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( sctp_bind_addr_state(&asoc->base.bind_addr, &chunk->dest)) return sctp_sf_discard_chunk(ep, asoc, type, arg, commands); - /* Stop the T5-shutdown guard timer. */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - return __sctp_sf_do_9_1_abort(ep, asoc, type, arg, commands); } @@ -2544,6 +2544,7 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sctp_shutdownhdr_t *sdh; sctp_disposition_t disposition; struct sctp_ulpevent *ev; + __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(ep, asoc, type, arg, commands); @@ -2558,6 +2559,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(const struct sctp_endpoint *ep, sdh = (sctp_shutdownhdr_t *)chunk->skb->data; skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); chunk->subh.shutdown_hdr = sdh; + ctsn = ntohl(sdh->cum_tsn_ack); + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ctsn, asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); /* API 5.3.1.5 SCTP_SHUTDOWN_EVENT * When a peer sends a SHUTDOWN, SCTP delivers this notification to @@ -2599,6 +2608,51 @@ out: return disposition; } +/* + * sctp_sf_do_9_2_shut_ctsn + * + * Once an endpoint has reached the SHUTDOWN-RECEIVED state, + * it MUST NOT send a SHUTDOWN in response to a ULP request. + * The Cumulative TSN Ack of the received SHUTDOWN chunk + * MUST be processed. + */ +sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const sctp_subtype_t type, + void *arg, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + sctp_shutdownhdr_t *sdh; + + if (!sctp_vtag_verify(chunk, asoc)) + return sctp_sf_pdiscard(ep, asoc, type, arg, commands); + + /* Make sure that the SHUTDOWN chunk has a valid length. */ + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_shutdown_chunk_t))) + return sctp_sf_violation_chunklen(ep, asoc, type, arg, + commands); + + sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + + /* If Cumulative TSN Ack beyond the max tsn currently + * send, terminating the association and respond to the + * sender with an ABORT. + */ + if (!TSN_lt(ntohl(sdh->cum_tsn_ack), asoc->next_tsn)) + return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands); + + /* verify, by checking the Cumulative TSN Ack field of the + * chunk, that all its outstanding DATA chunks have been + * received by the SHUTDOWN sender. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_CTSN, + SCTP_BE32(sdh->cum_tsn_ack)); + + return SCTP_DISPOSITION_CONSUME; +} + /* RFC 2960 9.2 * If an endpoint is in SHUTDOWN-ACK-SENT state and receives an INIT chunk * (e.g., if the SHUTDOWN COMPLETE was lost) with source and destination @@ -3382,6 +3436,8 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(const struct sctp_endpoint *ep, * packet and the state function that handles OOTB SHUTDOWN_ACK is * called with a NULL association. */ + SCTP_INC_STATS(SCTP_MIB_OUTOFBLUES); + return sctp_sf_shut_8_4_5(ep, NULL, type, arg, commands); } @@ -3425,7 +3481,7 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, addr_param = (union sctp_addr_param *)hdr->params; length = ntohs(addr_param->p.length); if (length < sizeof(sctp_paramhdr_t)) - return sctp_sf_violation_paramlen(ep, asoc, type, + return sctp_sf_violation_paramlen(ep, asoc, type, arg, (void *)addr_param, commands); /* Verify the ASCONF chunk before processing it. */ @@ -3433,8 +3489,8 @@ sctp_disposition_t sctp_sf_do_asconf(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)((void *)addr_param + length), (void *)chunk->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); /* ADDIP 5.2 E1) Compare the value of the serial number to the value * the endpoint stored in a new association variable @@ -3542,8 +3598,8 @@ sctp_disposition_t sctp_sf_do_asconf_ack(const struct sctp_endpoint *ep, (sctp_paramhdr_t *)addip_hdr->params, (void *)asconf_ack->chunk_end, &err_param)) - return sctp_sf_violation_paramlen(ep, asoc, type, - (void *)&err_param, commands); + return sctp_sf_violation_paramlen(ep, asoc, type, arg, + (void *)err_param, commands); if (last_asconf) { addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; @@ -4186,11 +4242,10 @@ static sctp_disposition_t sctp_sf_abort_violation( SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); } -discard: - sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); - SCTP_INC_STATS(SCTP_MIB_ABORTEDS); +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); return SCTP_DISPOSITION_ABORT; nomem_pkt: @@ -4240,12 +4295,36 @@ static sctp_disposition_t sctp_sf_violation_paramlen( const struct sctp_endpoint *ep, const struct sctp_association *asoc, const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) { - static const char err_str[] = "The following parameter had invalid length:"; + void *arg, void *ext, + sctp_cmd_seq_t *commands) +{ + struct sctp_chunk *chunk = arg; + struct sctp_paramhdr *param = ext; + struct sctp_chunk *abort = NULL; - return sctp_sf_abort_violation(ep, asoc, arg, commands, err_str, - sizeof(err_str)); + if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) + goto discard; + + /* Make the abort chunk. */ + abort = sctp_make_violation_paramlen(asoc, chunk, param); + if (!abort) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); + SCTP_INC_STATS(SCTP_MIB_OUTCTRLCHUNKS); + + sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, + SCTP_ERROR(ECONNABORTED)); + sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, + SCTP_PERR(SCTP_ERROR_PROTO_VIOLATION)); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + +discard: + sctp_sf_pdiscard(ep, asoc, SCTP_ST_CHUNK(0), arg, commands); + return SCTP_DISPOSITION_ABORT; +nomem: + return SCTP_DISPOSITION_NOMEM; } /* Handle a protocol violation when the peer trying to advance the @@ -4517,13 +4596,6 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); - disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, @@ -4968,6 +5040,13 @@ sctp_disposition_t sctp_sf_do_9_2_start_shutdown( sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, SCTP_TO(SCTP_EVENT_TIMEOUT_T2_SHUTDOWN)); + /* RFC 4960 Section 9.2 + * The sender of the SHUTDOWN MAY also start an overall guard timer + * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. + */ + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + if (asoc->autoclose) sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); @@ -5279,6 +5358,8 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep if (!repl) return SCTP_DISPOSITION_NOMEM; + sctp_add_cmd_sf(commands, SCTP_CMD_INIT_CHOOSE_TRANSPORT, + SCTP_CHUNK(repl)); /* Issue a sideeffect to do the needed accounting. */ sctp_add_cmd_sf(commands, SCTP_CMD_COOKIEECHO_RESTART, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_COOKIE)); @@ -5406,7 +5487,7 @@ sctp_disposition_t sctp_sf_t4_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); SCTP_INC_STATS(SCTP_MIB_ABORTEDS); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); return SCTP_DISPOSITION_ABORT; } @@ -5462,6 +5543,9 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_ASSOC_FAILED, SCTP_PERR(SCTP_ERROR_NO_ERROR)); + SCTP_INC_STATS(SCTP_MIB_ABORTEDS); + SCTP_DEC_STATS(SCTP_MIB_CURRESTAB); + return SCTP_DISPOSITION_DELETE_TCB; nomem: return SCTP_DISPOSITION_NOMEM; @@ -5494,12 +5578,6 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_SHUTDOWN_PENDING)); - /* sctpimpguide-05 Section 2.12.2 - * The sender of the SHUTDOWN MAY also start an overall guard timer - * 'T5-shutdown-guard' to bound the overall time for shutdown sequence. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); disposition = SCTP_DISPOSITION_CONSUME; if (sctp_outq_is_empty(&asoc->outqueue)) { disposition = sctp_sf_do_9_2_start_shutdown(ep, asoc, type, diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index d991237fb400..5c8186d88c61 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -266,11 +266,11 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_do_9_2_shutdown_ack), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ - TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ + TYPE_SCTP_FUNC(sctp_sf_do_9_2_shut_ctsn), \ /* SCTP_STATE_SHUTDOWN_ACK_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_discard_chunk), \ } /* TYPE_SCTP_SHUTDOWN */ @@ -897,7 +897,7 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ /* SCTP_STATE_ESTABLISHED */ \ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_PENDING */ \ - TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ + TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ /* SCTP_STATE_SHUTDOWN_SENT */ \ TYPE_SCTP_FUNC(sctp_sf_t5_timer_expire), \ /* SCTP_STATE_SHUTDOWN_RECEIVED */ \ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 5ffb9dec1c3f..a1b904529d5e 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2309,7 +2309,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) @@ -4062,7 +4062,7 @@ static int sctp_getsockopt_peer_addr_params(struct sock *sk, int len, /* If an address other than INADDR_ANY is specified, and * no transport is found, then the request is invalid. */ - if (!sctp_is_any(( union sctp_addr *)¶ms.spp_address)) { + if (!sctp_is_any(sk, ( union sctp_addr *)¶ms.spp_address)) { trans = sctp_addr_id2transport(sk, ¶ms.spp_address, params.spp_assoc_id); if (!trans) { @@ -4414,7 +4414,7 @@ static int sctp_getsockopt_local_addrs_num_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { rcu_read_lock(); list_for_each_entry_rcu(addr, &sctp_local_addr_list, list) { @@ -4602,7 +4602,7 @@ static int sctp_getsockopt_local_addrs_old(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs_old(sk, bp->port, getaddrs.addr_num, addrs, &bytes_copied); @@ -4695,7 +4695,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, if (sctp_list_single_entry(&bp->address_list)) { addr = list_entry(bp->address_list.next, struct sctp_sockaddr_entry, list); - if (sctp_is_any(&addr->a)) { + if (sctp_is_any(sk, &addr->a)) { cnt = sctp_copy_laddrs(sk, bp->port, addrs, space_left, &bytes_copied); if (cnt < 0) { diff --git a/net/sctp/tsnmap.c b/net/sctp/tsnmap.c index f3e58b275905..35c73e82553a 100644 --- a/net/sctp/tsnmap.c +++ b/net/sctp/tsnmap.c @@ -43,37 +43,44 @@ */ #include <linux/types.h> +#include <linux/bitmap.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> static void sctp_tsnmap_update(struct sctp_tsnmap *map); -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end); +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end); +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap); /* Initialize a block of memory as a tsnmap. */ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, - __u32 initial_tsn) + __u32 initial_tsn, gfp_t gfp) { - map->tsn_map = map->raw_map; - map->overflow_map = map->tsn_map + len; - map->len = len; - - /* Clear out a TSN ack status. */ - memset(map->tsn_map, 0x00, map->len + map->len); + if (!map->tsn_map) { + map->tsn_map = kzalloc(len>>3, gfp); + if (map->tsn_map == NULL) + return NULL; + + map->len = len; + } else { + bitmap_zero(map->tsn_map, map->len); + } /* Keep track of TSNs represented by tsn_map. */ map->base_tsn = initial_tsn; - map->overflow_tsn = initial_tsn + map->len; map->cumulative_tsn_ack_point = initial_tsn - 1; map->max_tsn_seen = map->cumulative_tsn_ack_point; - map->malloced = 0; map->num_dup_tsns = 0; return map; } +void sctp_tsnmap_free(struct sctp_tsnmap *map) +{ + map->len = 0; + kfree(map->tsn_map); +} + /* Test the tracking state of this TSN. * Returns: * 0 if the TSN has not yet been seen @@ -82,66 +89,69 @@ struct sctp_tsnmap *sctp_tsnmap_init(struct sctp_tsnmap *map, __u16 len, */ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; - int dup; + u32 gap; + + /* Check to see if this is an old TSN */ + if (TSN_lte(tsn, map->cumulative_tsn_ack_point)) + return 1; + + /* Verify that we can hold this TSN and that it will not + * overlfow our map + */ + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) + return -1; /* Calculate the index into the mapping arrays. */ gap = tsn - map->base_tsn; - /* Verify that we can hold this TSN. */ - if (gap >= (/* base */ map->len + /* overflow */ map->len)) { - dup = -1; - goto out; - } - - /* Honk if we've already seen this TSN. - * We have three cases: - * 1. The TSN is ancient or belongs to a previous tsn_map. - * 2. The TSN is already marked in the tsn_map. - * 3. The TSN is already marked in the tsn_map_overflow. - */ - if (gap < 0 || - (gap < map->len && map->tsn_map[gap]) || - (gap >= map->len && map->overflow_map[gap - map->len])) - dup = 1; + /* Check to see if TSN has already been recorded. */ + if (gap < map->len && test_bit(gap, map->tsn_map)) + return 1; else - dup = 0; - -out: - return dup; + return 0; } /* Mark this TSN as seen. */ -void sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) +int sctp_tsnmap_mark(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u16 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) - return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) - return; - - /* Bump the max. */ - if (TSN_lt(map->max_tsn_seen, tsn)) - map->max_tsn_seen = tsn; + return 0; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; - /* Mark the TSN as received. */ - if (gap < map->len) - map->tsn_map[gap]++; - else - map->overflow_map[gap - map->len]++; + if (gap >= map->len && !sctp_tsnmap_grow(map, gap)) + return -ENOMEM; - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); + if (!sctp_tsnmap_has_gap(map) && gap == 0) { + /* In this case the map has no gaps and the tsn we are + * recording is the next expected tsn. We don't touch + * the map but simply bump the values. + */ + map->max_tsn_seen++; + map->cumulative_tsn_ack_point++; + map->base_tsn++; + } else { + /* Either we already have a gap, or about to record a gap, so + * have work to do. + * + * Bump the max. + */ + if (TSN_lt(map->max_tsn_seen, tsn)) + map->max_tsn_seen = tsn; + + /* Mark the TSN as received. */ + set_bit(gap, map->tsn_map); + + /* Go fixup any internal TSN mapping variables including + * cumulative_tsn_ack_point. + */ + sctp_tsnmap_update(map); + } + + return 0; } @@ -160,66 +170,34 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, struct sctp_tsnmap_iter *iter, __u16 *start, __u16 *end) { - int started, ended; - __u16 start_, end_, offset; - - /* We haven't found a gap yet. */ - started = ended = 0; + int ended = 0; + __u16 start_ = 0, end_ = 0, offset; /* If there are no more gap acks possible, get out fast. */ if (TSN_lte(map->max_tsn_seen, iter->start)) return 0; - /* Search the first mapping array. */ - if (iter->start - map->base_tsn < map->len) { - - offset = iter->start - map->base_tsn; - sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, 0, - &started, &start_, &ended, &end_); - } - - /* Do we need to check the overflow map? */ - if (!ended) { - /* Fix up where we'd like to start searching in the - * overflow map. - */ - if (iter->start - map->base_tsn < map->len) - offset = 0; - else - offset = iter->start - map->base_tsn - map->len; - - /* Search the overflow map. */ - sctp_tsnmap_find_gap_ack(map->overflow_map, - offset, - map->len, - map->len, - &started, &start_, - &ended, &end_); - } + offset = iter->start - map->base_tsn; + sctp_tsnmap_find_gap_ack(map->tsn_map, offset, map->len, + &start_, &end_); - /* The Gap Ack Block happens to end at the end of the - * overflow map. - */ - if (started && !ended) { - ended++; - end_ = map->len + map->len - 1; - } + /* The Gap Ack Block happens to end at the end of the map. */ + if (start_ && !end_) + end_ = map->len - 1; /* If we found a Gap Ack Block, return the start and end and * bump the iterator forward. */ - if (ended) { + if (end_) { /* Fix up the start and end based on the - * Cumulative TSN Ack offset into the map. + * Cumulative TSN Ack which is always 1 behind base. */ - int gap = map->cumulative_tsn_ack_point - - map->base_tsn; - - *start = start_ - gap; - *end = end_ - gap; + *start = start_ + 1; + *end = end_ + 1; /* Move the iterator forward. */ iter->start = map->cumulative_tsn_ack_point + *end + 1; + ended = 1; } return ended; @@ -228,35 +206,33 @@ SCTP_STATIC int sctp_tsnmap_next_gap_ack(const struct sctp_tsnmap *map, /* Mark this and any lower TSN as seen. */ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; - /* Vacuously mark any TSN which precedes the map base or - * exceeds the end of the map. - */ if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE)) return; /* Bump the max. */ if (TSN_lt(map->max_tsn_seen, tsn)) map->max_tsn_seen = tsn; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn + 1; - /* Mark the TSNs as received. */ - if (gap <= map->len) - memset(map->tsn_map, 0x01, gap); - else { - memset(map->tsn_map, 0x01, map->len); - memset(map->overflow_map, 0x01, (gap - map->len)); + map->base_tsn += gap; + map->cumulative_tsn_ack_point += gap; + if (gap >= map->len) { + /* If our gap is larger then the map size, just + * zero out the map. + */ + bitmap_zero(map->tsn_map, map->len); + } else { + /* If the gap is smaller then the map size, + * shift the map by 'gap' bits and update further. + */ + bitmap_shift_right(map->tsn_map, map->tsn_map, gap, map->len); + sctp_tsnmap_update(map); } - - /* Go fixup any internal TSN mapping variables including - * cumulative_tsn_ack_point. - */ - sctp_tsnmap_update(map); } /******************************************************************** @@ -268,27 +244,19 @@ void sctp_tsnmap_skip(struct sctp_tsnmap *map, __u32 tsn) */ static void sctp_tsnmap_update(struct sctp_tsnmap *map) { - __u32 ctsn; - - ctsn = map->cumulative_tsn_ack_point; - do { - ctsn++; - if (ctsn == map->overflow_tsn) { - /* Now tsn_map must have been all '1's, - * so we swap the map and check the overflow table - */ - __u8 *tmp = map->tsn_map; - memset(tmp, 0, map->len); - map->tsn_map = map->overflow_map; - map->overflow_map = tmp; - - /* Update the tsn_map boundaries. */ - map->base_tsn += map->len; - map->overflow_tsn += map->len; - } - } while (map->tsn_map[ctsn - map->base_tsn]); + u16 len; + unsigned long zero_bit; + + + len = map->max_tsn_seen - map->cumulative_tsn_ack_point; + zero_bit = find_first_zero_bit(map->tsn_map, len); + if (!zero_bit) + return; /* The first 0-bit is bit 0. nothing to do */ + + map->base_tsn += zero_bit; + map->cumulative_tsn_ack_point += zero_bit; - map->cumulative_tsn_ack_point = ctsn - 1; /* Back up one. */ + bitmap_shift_right(map->tsn_map, map->tsn_map, zero_bit, map->len); } /* How many data chunks are we missing from our peer? @@ -299,31 +267,19 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map) __u32 max_tsn = map->max_tsn_seen; __u32 base_tsn = map->base_tsn; __u16 pending_data; - __s32 gap, start, end, i; + u32 gap, i; pending_data = max_tsn - cum_tsn; gap = max_tsn - base_tsn; - if (gap <= 0 || gap >= (map->len + map->len)) + if (gap == 0 || gap >= map->len) goto out; - start = ((cum_tsn >= base_tsn) ? (cum_tsn - base_tsn + 1) : 0); - end = ((gap > map->len ) ? map->len : gap + 1); - - for (i = start; i < end; i++) { - if (map->tsn_map[i]) + for (i = 0; i < gap+1; i++) { + if (test_bit(i, map->tsn_map)) pending_data--; } - if (gap >= map->len) { - start = 0; - end = gap - map->len + 1; - for (i = start; i < end; i++) { - if (map->overflow_map[i]) - pending_data--; - } - } - out: return pending_data; } @@ -334,10 +290,8 @@ out: * The flags "started" and "ended" tell is if we found the beginning * or (respectively) the end of a Gap Ack Block. */ -static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, - __u16 len, __u16 base, - int *started, __u16 *start, - int *ended, __u16 *end) +static void sctp_tsnmap_find_gap_ack(unsigned long *map, __u16 off, + __u16 len, __u16 *start, __u16 *end) { int i = off; @@ -348,56 +302,44 @@ static void sctp_tsnmap_find_gap_ack(__u8 *map, __u16 off, /* Also, stop looking past the maximum TSN seen. */ /* Look for the start. */ - if (!(*started)) { - for (; i < len; i++) { - if (map[i]) { - (*started)++; - *start = base + i; - break; - } - } - } + i = find_next_bit(map, len, off); + if (i < len) + *start = i; /* Look for the end. */ - if (*started) { + if (*start) { /* We have found the start, let's find the * end. If we find the end, break out. */ - for (; i < len; i++) { - if (!map[i]) { - (*ended)++; - *end = base + i - 1; - break; - } - } + i = find_next_zero_bit(map, len, i); + if (i < len) + *end = i - 1; } } /* Renege that we have seen a TSN. */ void sctp_tsnmap_renege(struct sctp_tsnmap *map, __u32 tsn) { - __s32 gap; + u32 gap; if (TSN_lt(tsn, map->base_tsn)) return; - if (!TSN_lt(tsn, map->base_tsn + map->len + map->len)) + /* Assert: TSN is in range. */ + if (!TSN_lt(tsn, map->base_tsn + map->len)) return; - /* Assert: TSN is in range. */ gap = tsn - map->base_tsn; /* Pretend we never saw the TSN. */ - if (gap < map->len) - map->tsn_map[gap] = 0; - else - map->overflow_map[gap - map->len] = 0; + clear_bit(gap, map->tsn_map); } /* How many gap ack blocks do we have recorded? */ -__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) +__u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map, + struct sctp_gap_ack_block *gabs) { struct sctp_tsnmap_iter iter; - int gabs = 0; + int ngaps = 0; /* Refresh the gap ack information. */ if (sctp_tsnmap_has_gap(map)) { @@ -407,12 +349,36 @@ __u16 sctp_tsnmap_num_gabs(struct sctp_tsnmap *map) &start, &end)) { - map->gabs[gabs].start = htons(start); - map->gabs[gabs].end = htons(end); - gabs++; - if (gabs >= SCTP_MAX_GABS) + gabs[ngaps].start = htons(start); + gabs[ngaps].end = htons(end); + ngaps++; + if (ngaps >= SCTP_MAX_GABS) break; } } - return gabs; + return ngaps; +} + +static int sctp_tsnmap_grow(struct sctp_tsnmap *map, u16 gap) +{ + unsigned long *new; + unsigned long inc; + u16 len; + + if (gap >= SCTP_TSN_MAP_SIZE) + return 0; + + inc = ALIGN((gap - map->len),BITS_PER_LONG) + SCTP_TSN_MAP_INCREMENT; + len = min_t(u16, map->len + inc, SCTP_TSN_MAP_SIZE); + + new = kzalloc(len>>3, GFP_ATOMIC); + if (!new) + return 0; + + bitmap_copy(new, map->tsn_map, map->max_tsn_seen - map->base_tsn); + kfree(map->tsn_map); + map->tsn_map = new; + map->len = len; + + return 1; } diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index a1f654aea268..5f186ca550d7 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -713,7 +713,9 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, /* Now that all memory allocations for this chunk succeeded, we * can mark it as received so the tsn_map is updated correctly. */ - sctp_tsnmap_mark(&asoc->peer.tsn_map, ntohl(chunk->subh.data_hdr->tsn)); + if (sctp_tsnmap_mark(&asoc->peer.tsn_map, + ntohl(chunk->subh.data_hdr->tsn))) + goto fail_mark; /* First calculate the padding, so we don't inadvertently * pass up the wrong length to the user. @@ -755,8 +757,12 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, event->msg_flags |= chunk->chunk_hdr->flags; event->iif = sctp_chunk_iif(chunk); -fail: return event; + +fail_mark: + kfree_skb(skb); +fail: + return NULL; } /* Create a partial delivery related event. diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 5061a26c5028..7b23803343cc 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -317,7 +317,7 @@ static void sctp_ulpq_store_reasm(struct sctp_ulpq *ulpq, } /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->reasm); + __skb_queue_before(&ulpq->reasm, pos, sctp_event2skb(event)); } @@ -825,8 +825,7 @@ static void sctp_ulpq_store_ordered(struct sctp_ulpq *ulpq, /* Insert before pos. */ - __skb_insert(sctp_event2skb(event), pos->prev, pos, &ulpq->lobby); - + __skb_queue_before(&ulpq->lobby, pos, sctp_event2skb(event)); } static struct sctp_ulpevent *sctp_ulpq_order(struct sctp_ulpq *ulpq, diff --git a/net/socket.c b/net/socket.c index 3e8d4e35c08f..92764d836891 100644 --- a/net/socket.c +++ b/net/socket.c @@ -990,7 +990,6 @@ static int sock_close(struct inode *inode, struct file *filp) printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } - sock_fasync(-1, filp, 0); sock_release(SOCKET_I(inode)); return 0; } @@ -1142,7 +1141,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, sock->type = type; -#if defined(CONFIG_KMOD) +#ifdef CONFIG_MODULES /* Attempt to load a protocol module if the find failed. * * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user @@ -1427,8 +1426,8 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -long do_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, int flags) +asmlinkage long sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags) { struct socket *sock, *newsock; struct file *newfile; @@ -1511,66 +1510,10 @@ out_fd: goto out_put; } -#if 0 -#ifdef HAVE_SET_RESTORE_SIGMASK -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - sigset_t ksigmask, sigsaved; - int ret; - - if (sigmask) { - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(sigset_t)) - return -EINVAL; - if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask))) - return -EFAULT; - - sigdelsetmask(&ksigmask, sigmask(SIGKILL)|sigmask(SIGSTOP)); - sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved); - } - - ret = do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); - - if (ret < 0 && signal_pending(current)) { - /* - * Don't restore the signal mask yet. Let do_signal() deliver - * the signal on the way back to userspace, before the signal - * mask is restored. - */ - if (sigmask) { - memcpy(¤t->saved_sigmask, &sigsaved, - sizeof(sigsaved)); - set_restore_sigmask(); - } - } else if (sigmask) - sigprocmask(SIG_SETMASK, &sigsaved, NULL); - - return ret; -} -#else -asmlinkage long sys_paccept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen, - const sigset_t __user *sigmask, - size_t sigsetsize, int flags) -{ - /* The platform does not support restoring the signal mask in the - * return path. So we do not allow using paccept() with a signal - * mask. */ - if (sigmask) - return -EINVAL; - - return do_accept(fd, upeer_sockaddr, upeer_addrlen, flags); -} -#endif -#endif - asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { - return do_accept(fd, upeer_sockaddr, upeer_addrlen, 0); + return sys_accept4(fd, upeer_sockaddr, upeer_addrlen, 0); } /* @@ -2097,7 +2040,7 @@ static const unsigned char nargs[19]={ AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(6) + AL(4) }; #undef AL @@ -2116,7 +2059,7 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) unsigned long a0, a1; int err; - if (call < 1 || call > SYS_PACCEPT) + if (call < 1 || call > SYS_ACCEPT4) return -EINVAL; /* copy_from_user should be SMP safe. */ @@ -2144,9 +2087,8 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) err = sys_listen(a0, a1); break; case SYS_ACCEPT: - err = - do_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], 0); + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], 0); break; case SYS_GETSOCKNAME: err = @@ -2193,12 +2135,9 @@ asmlinkage long sys_socketcall(int call, unsigned long __user *args) case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; - case SYS_PACCEPT: - err = - sys_paccept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2], - (const sigset_t __user *) a[3], - a[4], a[5]); + case SYS_ACCEPT4: + err = sys_accept4(a0, (struct sockaddr __user *)a1, + (int __user *)a[2], a[3]); break; default: err = -EINVAL; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 6bfea9ed6869..cb216b2df666 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -83,10 +83,8 @@ rpcauth_create(rpc_authflavor_t pseudoflavor, struct rpc_clnt *clnt) if (flavor >= RPC_AUTH_MAXFLAVOR) goto out; -#ifdef CONFIG_KMOD if ((ops = auth_flavors[flavor]) == NULL) request_module("rpc-auth-%u", flavor); -#endif spin_lock(&rpc_authflavor_lock); ops = auth_flavors[flavor]; if (ops == NULL || !try_module_get(ops->owner)) { @@ -230,19 +228,21 @@ static int rpcauth_prune_expired(struct list_head *free, int nr_to_scan) { spinlock_t *cache_lock; - struct rpc_cred *cred; + struct rpc_cred *cred, *next; unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM; - while (!list_empty(&cred_unused)) { - cred = list_entry(cred_unused.next, struct rpc_cred, cr_lru); + list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) { + + /* Enforce a 60 second garbage collection moratorium */ + if (time_in_range(cred->cr_expire, expired, jiffies) && + test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) + continue; + list_del_init(&cred->cr_lru); number_cred_unused--; if (atomic_read(&cred->cr_count) != 0) continue; - /* Enforce a 5 second garbage collection moratorium */ - if (time_in_range(cred->cr_expire, expired, jiffies) && - test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) - continue; + cache_lock = &cred->cr_auth->au_credcache->lock; spin_lock(cache_lock); if (atomic_read(&cred->cr_count) == 0) { @@ -455,7 +455,7 @@ need_lock: } if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) == 0) rpcauth_unhash_cred(cred); - else if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { + if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) { cred->cr_expire = jiffies; list_add_tail(&cred->cr_lru, &cred_unused); number_cred_unused++; diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c index 744b79fdcb19..4028502f0528 100644 --- a/net/sunrpc/auth_generic.c +++ b/net/sunrpc/auth_generic.c @@ -133,13 +133,29 @@ static int generic_match(struct auth_cred *acred, struct rpc_cred *cred, int flags) { struct generic_cred *gcred = container_of(cred, struct generic_cred, gc_base); + int i; if (gcred->acred.uid != acred->uid || gcred->acred.gid != acred->gid || - gcred->acred.group_info != acred->group_info || gcred->acred.machine_cred != acred->machine_cred) - return 0; + goto out_nomatch; + + /* Optimisation in the case where pointers are identical... */ + if (gcred->acred.group_info == acred->group_info) + goto out_match; + + /* Slow path... */ + if (gcred->acred.group_info->ngroups != acred->group_info->ngroups) + goto out_nomatch; + for (i = 0; i < gcred->acred.group_info->ngroups; i++) { + if (GROUP_AT(gcred->acred.group_info, i) != + GROUP_AT(acred->group_info, i)) + goto out_nomatch; + } +out_match: return 1; +out_nomatch: + return 0; } void __init rpc_init_generic_auth(void) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 76739e928d0d..4895c341e46d 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -174,7 +174,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru clnt->cl_procinfo = version->procs; clnt->cl_maxproc = version->nrprocs; clnt->cl_protname = program->name; - clnt->cl_prog = program->number; + clnt->cl_prog = args->prognumber ? : program->number; clnt->cl_vers = version->number; clnt->cl_stats = program->stats; clnt->cl_metrics = rpc_alloc_iostats(clnt); @@ -213,10 +213,10 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru } /* save the nodename */ - clnt->cl_nodelen = strlen(utsname()->nodename); + clnt->cl_nodelen = strlen(init_utsname()->nodename); if (clnt->cl_nodelen > UNX_MAXNODENAME) clnt->cl_nodelen = UNX_MAXNODENAME; - memcpy(clnt->cl_nodename, utsname()->nodename, clnt->cl_nodelen); + memcpy(clnt->cl_nodename, init_utsname()->nodename, clnt->cl_nodelen); rpc_register_client(clnt); return clnt; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 24db2b4d12d3..41013dd66ac3 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -20,6 +20,7 @@ #include <linux/in6.h> #include <linux/kernel.h> #include <linux/errno.h> +#include <net/ipv6.h> #include <linux/sunrpc/clnt.h> #include <linux/sunrpc/sched.h> @@ -176,13 +177,12 @@ static struct rpc_clnt *rpcb_create(char *hostname, struct sockaddr *srvaddr, } static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, - u32 version, struct rpc_message *msg, - int *result) + u32 version, struct rpc_message *msg) { struct rpc_clnt *rpcb_clnt; - int error = 0; + int result, error = 0; - *result = 0; + msg->rpc_resp = &result; rpcb_clnt = rpcb_create_local(addr, addrlen, version); if (!IS_ERR(rpcb_clnt)) { @@ -191,12 +191,15 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, } else error = PTR_ERR(rpcb_clnt); - if (error < 0) + if (error < 0) { printk(KERN_WARNING "RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); - dprintk("RPC: registration status %d/%d\n", error, *result); + return error; + } - return error; + if (!result) + return -EACCES; + return 0; } /** @@ -205,7 +208,11 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * @vers: RPC version number to bind * @prot: transport protocol to register * @port: port value to register - * @okay: OUT: result code + * + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). * * RPC services invoke this function to advertise their contact * information via the system's rpcbind daemon. RPC services @@ -217,15 +224,6 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * all registered transports for [program, version] from the local * rpcbind database. * - * Returns zero if the registration request was dispatched - * successfully and a reply was received. The rpcbind daemon's - * boolean result code is stored in *okay. - * - * Returns an errno value and sets *result to zero if there was - * some problem that prevented the rpcbind request from being - * dispatched, or if the rpcbind daemon did not respond within - * the timeout. - * * This function uses rpcbind protocol version 2 to contact the * local rpcbind daemon. * @@ -236,7 +234,7 @@ static int rpcb_register_call(struct sockaddr *addr, size_t addrlen, * IN6ADDR_ANY (ie available for all AF_INET and AF_INET6 * addresses). */ -int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) +int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port) { struct rpcbind_args map = { .r_prog = prog, @@ -246,7 +244,6 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) }; struct rpc_message msg = { .rpc_argp = &map, - .rpc_resp = okay, }; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " @@ -259,7 +256,7 @@ int rpcb_register(u32 prog, u32 vers, int prot, unsigned short port, int *okay) return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, sizeof(rpcb_inaddr_loopback), - RPCBVERS_2, &msg, okay); + RPCBVERS_2, &msg); } /* @@ -290,7 +287,7 @@ static int rpcb_register_netid4(struct sockaddr_in *address_to_register, return rpcb_register_call((struct sockaddr *)&rpcb_inaddr_loopback, sizeof(rpcb_inaddr_loopback), - RPCBVERS_4, msg, msg->rpc_resp); + RPCBVERS_4, msg); } /* @@ -304,10 +301,13 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, char buf[64]; /* Construct AF_INET6 universal address */ - snprintf(buf, sizeof(buf), - NIP6_FMT".%u.%u", - NIP6(address_to_register->sin6_addr), - port >> 8, port & 0xff); + if (ipv6_addr_any(&address_to_register->sin6_addr)) + snprintf(buf, sizeof(buf), "::.%u.%u", + port >> 8, port & 0xff); + else + snprintf(buf, sizeof(buf), NIP6_FMT".%u.%u", + NIP6(address_to_register->sin6_addr), + port >> 8, port & 0xff); map->r_addr = buf; dprintk("RPC: %sregistering [%u, %u, %s, '%s'] with " @@ -321,7 +321,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, return rpcb_register_call((struct sockaddr *)&rpcb_in6addr_loopback, sizeof(rpcb_in6addr_loopback), - RPCBVERS_4, msg, msg->rpc_resp); + RPCBVERS_4, msg); } /** @@ -330,7 +330,11 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * @version: RPC version number of service to (un)register * @address: address family, IP address, and port to (un)register * @netid: netid of transport protocol to (un)register - * @result: result code from rpcbind RPC call + * + * Returns zero if the registration request was dispatched successfully + * and the rpcbind daemon returned success. Otherwise, returns an errno + * value that reflects the nature of the error (request could not be + * dispatched, timed out, or rpcbind returned an error). * * RPC services invoke this function to advertise their contact * information via the system's rpcbind daemon. RPC services @@ -342,15 +346,6 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * to zero. Callers pass a netid of "" to unregister all * transport netids associated with [program, version, address]. * - * Returns zero if the registration request was dispatched - * successfully and a reply was received. The rpcbind daemon's - * result code is stored in *result. - * - * Returns an errno value and sets *result to zero if there was - * some problem that prevented the rpcbind request from being - * dispatched, or if the rpcbind daemon did not respond within - * the timeout. - * * This function uses rpcbind protocol version 4 to contact the * local rpcbind daemon. The local rpcbind daemon must support * version 4 of the rpcbind protocol in order for these functions @@ -372,8 +367,7 @@ static int rpcb_register_netid6(struct sockaddr_in6 *address_to_register, * advertises the service on all IPv4 and IPv6 addresses. */ int rpcb_v4_register(const u32 program, const u32 version, - const struct sockaddr *address, const char *netid, - int *result) + const struct sockaddr *address, const char *netid) { struct rpcbind_args map = { .r_prog = program, @@ -383,11 +377,8 @@ int rpcb_v4_register(const u32 program, const u32 version, }; struct rpc_message msg = { .rpc_argp = &map, - .rpc_resp = result, }; - *result = 0; - switch (address->sa_family) { case AF_INET: return rpcb_register_netid4((struct sockaddr_in *)address, @@ -469,6 +460,28 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi return rpc_run_task(&task_setup_data); } +/* + * In the case where rpc clients have been cloned, we want to make + * sure that we use the program number/version etc of the actual + * owner of the xprt. To do so, we walk back up the tree of parents + * to find whoever created the transport and/or whoever has the + * autobind flag set. + */ +static struct rpc_clnt *rpcb_find_transport_owner(struct rpc_clnt *clnt) +{ + struct rpc_clnt *parent = clnt->cl_parent; + + while (parent != clnt) { + if (parent->cl_xprt != clnt->cl_xprt) + break; + if (clnt->cl_autobind) + break; + clnt = parent; + parent = parent->cl_parent; + } + return clnt; +} + /** * rpcb_getport_async - obtain the port for a given RPC service on a given host * @task: task that is waiting for portmapper request @@ -478,10 +491,10 @@ static struct rpc_task *rpcb_call_async(struct rpc_clnt *rpcb_clnt, struct rpcbi */ void rpcb_getport_async(struct rpc_task *task) { - struct rpc_clnt *clnt = task->tk_client; + struct rpc_clnt *clnt; struct rpc_procinfo *proc; u32 bind_version; - struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_xprt *xprt; struct rpc_clnt *rpcb_clnt; static struct rpcbind_args *map; struct rpc_task *child; @@ -490,13 +503,13 @@ void rpcb_getport_async(struct rpc_task *task) size_t salen; int status; + clnt = rpcb_find_transport_owner(task->tk_client); + xprt = clnt->cl_xprt; + dprintk("RPC: %5u %s(%s, %u, %u, %d)\n", task->tk_pid, __func__, clnt->cl_server, clnt->cl_prog, clnt->cl_vers, xprt->prot); - /* Autobind on cloned rpc clients is discouraged */ - BUG_ON(clnt->cl_parent != clnt); - /* Put self on the wait queue to ensure we get notified if * some other task is already attempting to bind the port */ rpc_sleep_on(&xprt->binding, task, NULL); @@ -558,7 +571,7 @@ void rpcb_getport_async(struct rpc_task *task) status = -ENOMEM; dprintk("RPC: %5u %s: no memory available\n", task->tk_pid, __func__); - goto bailout_nofree; + goto bailout_release_client; } map->r_prog = clnt->cl_prog; map->r_vers = clnt->cl_vers; @@ -578,11 +591,13 @@ void rpcb_getport_async(struct rpc_task *task) task->tk_pid, __func__); return; } - rpc_put_task(child); - task->tk_xprt->stat.bind_count++; + xprt->stat.bind_count++; + rpc_put_task(child); return; +bailout_release_client: + rpc_release_client(rpcb_clnt); bailout_nofree: rpcb_wake_rpcbind_waiters(xprt, status); task->tk_status = status; @@ -633,7 +648,7 @@ static void rpcb_getport_done(struct rpc_task *child, void *data) static int rpcb_encode_mapping(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { - dprintk("RPC: rpcb_encode_mapping(%u, %u, %d, %u)\n", + dprintk("RPC: encoding rpcb request (%u, %u, %d, %u)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_prot, rpcb->r_port); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); @@ -648,7 +663,7 @@ static int rpcb_decode_getport(struct rpc_rqst *req, __be32 *p, unsigned short *portp) { *portp = (unsigned short) ntohl(*p++); - dprintk("RPC: rpcb_decode_getport result %u\n", + dprintk("RPC: rpcb getport result: %u\n", *portp); return 0; } @@ -657,7 +672,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, unsigned int *boolp) { *boolp = (unsigned int) ntohl(*p++); - dprintk("RPC: rpcb_decode_set: call %s\n", + dprintk("RPC: rpcb set/unset call %s\n", (*boolp ? "succeeded" : "failed")); return 0; } @@ -665,7 +680,7 @@ static int rpcb_decode_set(struct rpc_rqst *req, __be32 *p, static int rpcb_encode_getaddr(struct rpc_rqst *req, __be32 *p, struct rpcbind_args *rpcb) { - dprintk("RPC: rpcb_encode_getaddr(%u, %u, %s)\n", + dprintk("RPC: encoding rpcb request (%u, %u, %s)\n", rpcb->r_prog, rpcb->r_vers, rpcb->r_addr); *p++ = htonl(rpcb->r_prog); *p++ = htonl(rpcb->r_vers); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 5a32cb7c4bb4..54c98d876847 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -28,6 +28,8 @@ #define RPCDBG_FACILITY RPCDBG_SVCDSP +static void svc_unregister(const struct svc_serv *serv); + #define svc_serv_is_pooled(serv) ((serv)->sv_function) /* @@ -357,7 +359,7 @@ svc_pool_for_cpu(struct svc_serv *serv, int cpu) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - void (*shutdown)(struct svc_serv *serv)) + sa_family_t family, void (*shutdown)(struct svc_serv *serv)) { struct svc_serv *serv; unsigned int vers; @@ -366,6 +368,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL))) return NULL; + serv->sv_family = family; serv->sv_name = prog->pg_name; serv->sv_program = prog; serv->sv_nrthreads = 1; @@ -416,30 +419,29 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, spin_lock_init(&pool->sp_lock); } - /* Remove any stale portmap registrations */ - svc_register(serv, 0, 0); + svc_unregister(serv); return serv; } struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv)) + sa_family_t family, void (*shutdown)(struct svc_serv *serv)) { - return __svc_create(prog, bufsize, /*npools*/1, shutdown); + return __svc_create(prog, bufsize, /*npools*/1, family, shutdown); } EXPORT_SYMBOL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - void (*shutdown)(struct svc_serv *serv), + sa_family_t family, void (*shutdown)(struct svc_serv *serv), svc_thread_fn func, struct module *mod) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); - serv = __svc_create(prog, bufsize, npools, shutdown); + serv = __svc_create(prog, bufsize, npools, family, shutdown); if (serv != NULL) { serv->sv_function = func; @@ -486,8 +488,7 @@ svc_destroy(struct svc_serv *serv) if (svc_serv_is_pooled(serv)) svc_pool_map_put(); - /* Unregister service with the portmapper */ - svc_register(serv, 0, 0); + svc_unregister(serv); kfree(serv->sv_pools); kfree(serv); } @@ -718,55 +719,245 @@ svc_exit_thread(struct svc_rqst *rqstp) } EXPORT_SYMBOL(svc_exit_thread); +#ifdef CONFIG_SUNRPC_REGISTER_V4 + /* - * Register an RPC service with the local portmapper. - * To unregister a service, call this routine with - * proto and port == 0. + * Register an "inet" protocol family netid with the local + * rpcbind daemon via an rpcbind v4 SET request. + * + * No netconfig infrastructure is available in the kernel, so + * we map IP_ protocol numbers to netids by hand. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. */ -int -svc_register(struct svc_serv *serv, int proto, unsigned short port) +static int __svc_rpcb_register4(const u32 program, const u32 version, + const unsigned short protocol, + const unsigned short port) +{ + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_addr.s_addr = htonl(INADDR_ANY), + .sin_port = htons(port), + }; + char *netid; + + switch (protocol) { + case IPPROTO_UDP: + netid = RPCBIND_NETID_UDP; + break; + case IPPROTO_TCP: + netid = RPCBIND_NETID_TCP; + break; + default: + return -EPROTONOSUPPORT; + } + + return rpcb_v4_register(program, version, + (struct sockaddr *)&sin, netid); +} + +/* + * Register an "inet6" protocol family netid with the local + * rpcbind daemon via an rpcbind v4 SET request. + * + * No netconfig infrastructure is available in the kernel, so + * we map IP_ protocol numbers to netids by hand. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_rpcb_register6(const u32 program, const u32 version, + const unsigned short protocol, + const unsigned short port) +{ + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + char *netid; + + switch (protocol) { + case IPPROTO_UDP: + netid = RPCBIND_NETID_UDP6; + break; + case IPPROTO_TCP: + netid = RPCBIND_NETID_TCP6; + break; + default: + return -EPROTONOSUPPORT; + } + + return rpcb_v4_register(program, version, + (struct sockaddr *)&sin6, netid); +} + +/* + * Register a kernel RPC service via rpcbind version 4. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_register(const u32 program, const u32 version, + const sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + int error; + + switch (family) { + case AF_INET: + return __svc_rpcb_register4(program, version, + protocol, port); + case AF_INET6: + error = __svc_rpcb_register6(program, version, + protocol, port); + if (error < 0) + return error; + + /* + * Work around bug in some versions of Linux rpcbind + * which don't allow registration of both inet and + * inet6 netids. + * + * Error return ignored for now. + */ + __svc_rpcb_register4(program, version, + protocol, port); + return 0; + } + + return -EAFNOSUPPORT; +} + +#else /* CONFIG_SUNRPC_REGISTER_V4 */ + +/* + * Register a kernel RPC service via rpcbind version 2. + * + * Returns zero on success; a negative errno value is returned + * if any error occurs. + */ +static int __svc_register(const u32 program, const u32 version, + sa_family_t family, + const unsigned short protocol, + const unsigned short port) +{ + if (family != AF_INET) + return -EAFNOSUPPORT; + + return rpcb_register(program, version, protocol, port); +} + +#endif /* CONFIG_SUNRPC_REGISTER_V4 */ + +/** + * svc_register - register an RPC service with the local portmapper + * @serv: svc_serv struct for the service to register + * @proto: transport protocol number to advertise + * @port: port to advertise + * + * Service is registered for any address in serv's address family + */ +int svc_register(const struct svc_serv *serv, const unsigned short proto, + const unsigned short port) { struct svc_program *progp; - unsigned long flags; unsigned int i; - int error = 0, dummy; + int error = 0; - if (!port) - clear_thread_flag(TIF_SIGPENDING); + BUG_ON(proto == 0 && port == 0); for (progp = serv->sv_program; progp; progp = progp->pg_next) { for (i = 0; i < progp->pg_nvers; i++) { if (progp->pg_vers[i] == NULL) continue; - dprintk("svc: svc_register(%s, %s, %d, %d)%s\n", + dprintk("svc: svc_register(%sv%d, %s, %u, %u)%s\n", progp->pg_name, + i, proto == IPPROTO_UDP? "udp" : "tcp", port, - i, + serv->sv_family, progp->pg_vers[i]->vs_hidden? " (but not telling portmap)" : ""); if (progp->pg_vers[i]->vs_hidden) continue; - error = rpcb_register(progp->pg_prog, i, proto, port, &dummy); + error = __svc_register(progp->pg_prog, i, + serv->sv_family, proto, port); if (error < 0) break; - if (port && !dummy) { - error = -EACCES; - break; - } } } - if (!port) { - spin_lock_irqsave(¤t->sighand->siglock, flags); - recalc_sigpending(); - spin_unlock_irqrestore(¤t->sighand->siglock, flags); + return error; +} + +#ifdef CONFIG_SUNRPC_REGISTER_V4 + +static void __svc_unregister(const u32 program, const u32 version, + const char *progname) +{ + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = 0, + }; + int error; + + error = rpcb_v4_register(program, version, + (struct sockaddr *)&sin6, ""); + dprintk("svc: %s(%sv%u), error %d\n", + __func__, progname, version, error); +} + +#else /* CONFIG_SUNRPC_REGISTER_V4 */ + +static void __svc_unregister(const u32 program, const u32 version, + const char *progname) +{ + int error; + + error = rpcb_register(program, version, 0, 0); + dprintk("svc: %s(%sv%u), error %d\n", + __func__, progname, version, error); +} + +#endif /* CONFIG_SUNRPC_REGISTER_V4 */ + +/* + * All netids, bind addresses and ports registered for [program, version] + * are removed from the local rpcbind database (if the service is not + * hidden) to make way for a new instance of the service. + * + * The result of unregistration is reported via dprintk for those who want + * verification of the result, but is otherwise not important. + */ +static void svc_unregister(const struct svc_serv *serv) +{ + struct svc_program *progp; + unsigned long flags; + unsigned int i; + + clear_thread_flag(TIF_SIGPENDING); + + for (progp = serv->sv_program; progp; progp = progp->pg_next) { + for (i = 0; i < progp->pg_nvers; i++) { + if (progp->pg_vers[i] == NULL) + continue; + if (progp->pg_vers[i]->vs_hidden) + continue; + + __svc_unregister(progp->pg_prog, i, progp->pg_name); + } } - return error; + spin_lock_irqsave(¤t->sighand->siglock, flags); + recalc_sigpending(); + spin_unlock_irqrestore(¤t->sighand->siglock, flags); } /* diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e46c825f4954..bf5b5cdafebf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -159,15 +159,44 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, } EXPORT_SYMBOL_GPL(svc_xprt_init); -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, - int flags) +static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, + struct svc_serv *serv, + unsigned short port, int flags) { - struct svc_xprt_class *xcl; struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; + struct sockaddr *sap; + size_t len; + + switch (serv->sv_family) { + case AF_INET: + sap = (struct sockaddr *)&sin; + len = sizeof(sin); + break; + case AF_INET6: + sap = (struct sockaddr *)&sin6; + len = sizeof(sin6); + break; + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + return xcl->xcl_ops->xpo_create(serv, sap, len, flags); +} + +int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, + int flags) +{ + struct svc_xprt_class *xcl; + dprintk("svc: creating transport %s[%d]\n", xprt_name, port); spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { @@ -180,9 +209,7 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = xcl->xcl_ops-> - xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), - flags); + newxprt = __svc_xpo_create(xcl, serv, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e65719f1ef6..95293f549e9c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -1114,6 +1114,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); + int val; dprintk("svc: svc_setup_socket %p\n", sock); if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { @@ -1146,6 +1147,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); + /* + * We start one listener per sv_serv. We want AF_INET + * requests to be automatically shunted to our AF_INET6 + * listener using a mapped IPv4 address. Make sure + * no-one starts an equivalent IPv4 listener, which + * would steal our incoming connections. + */ + val = 0; + if (serv->sv_family == AF_INET6) + kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY, + (char *)&val, sizeof(val)); + dprintk("svc: svc_setup_socket created %p (inet %p)\n", svsk, svsk->sk_sk); @@ -1154,8 +1167,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int svc_addsock(struct svc_serv *serv, int fd, - char *name_return, - int *proto) + char *name_return) { int err = 0; struct socket *so = sockfd_lookup(fd, &err); @@ -1190,7 +1202,6 @@ int svc_addsock(struct svc_serv *serv, sockfd_put(so); return err; } - if (proto) *proto = so->sk->sk_protocol; return one_sock_name(name_return, svsk); } EXPORT_SYMBOL_GPL(svc_addsock); diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 99a52aabe332..29e401bb612e 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -108,13 +108,10 @@ int xprt_register_transport(struct xprt_class *transport) goto out; } - result = -EINVAL; - if (try_module_get(THIS_MODULE)) { - list_add_tail(&transport->list, &xprt_list); - printk(KERN_INFO "RPC: Registered %s transport module.\n", - transport->name); - result = 0; - } + list_add_tail(&transport->list, &xprt_list); + printk(KERN_INFO "RPC: Registered %s transport module.\n", + transport->name); + result = 0; out: spin_unlock(&xprt_list_lock); @@ -143,7 +140,6 @@ int xprt_unregister_transport(struct xprt_class *transport) "RPC: Unregistered %s transport module.\n", transport->name); list_del_init(&transport->list); - module_put(THIS_MODULE); goto out; } } diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index e55427f73dfe..14106d26bb95 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -118,6 +118,10 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, } if (xdrbuf->tail[0].iov_len) { + /* the rpcrdma protocol allows us to omit any trailing + * xdr pad bytes, saving the server an RDMA operation. */ + if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize) + return n; if (n == nsegs) return 0; seg[n].mr_page = NULL; @@ -508,8 +512,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) if (hdrlen == 0) return -1; - dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd\n" - " headerp 0x%p base 0x%p lkey 0x%x\n", + dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd" + " headerp 0x%p base 0x%p lkey 0x%x\n", __func__, transfertypes[wtype], hdrlen, rpclen, padlen, headerp, base, req->rl_iov.lkey); @@ -594,7 +598,7 @@ rpcrdma_count_chunks(struct rpcrdma_rep *rep, unsigned int max, int wrchunk, __b * Scatter inline received data back into provided iov's. */ static void -rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) +rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad) { int i, npages, curlen, olen; char *destp; @@ -660,6 +664,13 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len) } else rqst->rq_rcv_buf.tail[0].iov_len = 0; + if (pad) { + /* implicit padding on terminal chunk */ + unsigned char *p = rqst->rq_rcv_buf.tail[0].iov_base; + while (pad--) + p[rqst->rq_rcv_buf.tail[0].iov_len++] = 0; + } + if (copy_len) dprintk("RPC: %s: %d bytes in" " %d extra segments (%d lost)\n", @@ -681,12 +692,14 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep) struct rpc_xprt *xprt = ep->rep_xprt; spin_lock_bh(&xprt->transport_lock); + if (++xprt->connect_cookie == 0) /* maintain a reserved value */ + ++xprt->connect_cookie; if (ep->rep_connected > 0) { if (!xprt_test_and_set_connected(xprt)) xprt_wake_pending_tasks(xprt, 0); } else { if (xprt_test_and_clear_connected(xprt)) - xprt_wake_pending_tasks(xprt, ep->rep_connected); + xprt_wake_pending_tasks(xprt, -ENOTCONN); } spin_unlock_bh(&xprt->transport_lock); } @@ -769,7 +782,7 @@ repost: /* check for expected message types */ /* The order of some of these tests is important. */ switch (headerp->rm_type) { - case __constant_htonl(RDMA_MSG): + case htonl(RDMA_MSG): /* never expect read chunks */ /* never expect reply chunks (two ways to check) */ /* never expect write chunks without having offered RDMA */ @@ -792,17 +805,23 @@ repost: ((unsigned char *)iptr - (unsigned char *)headerp); status = rep->rr_len + rdmalen; r_xprt->rx_stats.total_rdma_reply += rdmalen; + /* special case - last chunk may omit padding */ + if (rdmalen &= 3) { + rdmalen = 4 - rdmalen; + status += rdmalen; + } } else { /* else ordinary inline */ + rdmalen = 0; iptr = (__be32 *)((unsigned char *)headerp + 28); rep->rr_len -= 28; /*sizeof *headerp;*/ status = rep->rr_len; } /* Fix up the rpc results for upper layer */ - rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len); + rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, rdmalen); break; - case __constant_htonl(RDMA_NOMSG): + case htonl(RDMA_NOMSG): /* never expect read or write chunks, always reply chunks */ if (headerp->rm_body.rm_chunks[0] != xdr_zero || headerp->rm_body.rm_chunks[1] != xdr_zero || diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index 74de31a06616..a4756576d687 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -116,7 +116,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, * * Assumptions: * - chunk[0]->position points to pages[0] at an offset of 0 - * - pages[] is not physically or virtually contigous and consists of + * - pages[] is not physically or virtually contiguous and consists of * PAGE_SIZE elements. * * Output: @@ -125,7 +125,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp, * chunk in the read list * */ -static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, +static int map_read_chunks(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, struct svc_rdma_op_ctxt *head, struct rpcrdma_msg *rmsgp, @@ -211,26 +211,128 @@ static int rdma_rcl_to_sge(struct svcxprt_rdma *xprt, return sge_no; } -static void rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, - struct svc_rdma_op_ctxt *ctxt, - struct kvec *vec, - u64 *sgl_offset, - int count) +/* Map a read-chunk-list to an XDR and fast register the page-list. + * + * Assumptions: + * - chunk[0] position points to pages[0] at an offset of 0 + * - pages[] will be made physically contiguous by creating a one-off memory + * region using the fastreg verb. + * - byte_count is # of bytes in read-chunk-list + * - ch_count is # of chunks in read-chunk-list + * + * Output: + * - sge array pointing into pages[] array. + * - chunk_sge array specifying sge index and count for each + * chunk in the read list + */ +static int fast_reg_read_chunks(struct svcxprt_rdma *xprt, + struct svc_rqst *rqstp, + struct svc_rdma_op_ctxt *head, + struct rpcrdma_msg *rmsgp, + struct svc_rdma_req_map *rpl_map, + struct svc_rdma_req_map *chl_map, + int ch_count, + int byte_count) +{ + int page_no; + int ch_no; + u32 offset; + struct rpcrdma_read_chunk *ch; + struct svc_rdma_fastreg_mr *frmr; + int ret = 0; + + frmr = svc_rdma_get_frmr(xprt); + if (IS_ERR(frmr)) + return -ENOMEM; + + head->frmr = frmr; + head->arg.head[0] = rqstp->rq_arg.head[0]; + head->arg.tail[0] = rqstp->rq_arg.tail[0]; + head->arg.pages = &head->pages[head->count]; + head->hdr_count = head->count; /* save count of hdr pages */ + head->arg.page_base = 0; + head->arg.page_len = byte_count; + head->arg.len = rqstp->rq_arg.len + byte_count; + head->arg.buflen = rqstp->rq_arg.buflen + byte_count; + + /* Fast register the page list */ + frmr->kva = page_address(rqstp->rq_arg.pages[0]); + frmr->direction = DMA_FROM_DEVICE; + frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE); + frmr->map_len = byte_count; + frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, + page_address(rqstp->rq_arg.pages[page_no]), + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no]; + } + head->count += page_no; + + /* rq_respages points one past arg pages */ + rqstp->rq_respages = &rqstp->rq_arg.pages[page_no]; + + /* Create the reply and chunk maps */ + offset = 0; + ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0]; + for (ch_no = 0; ch_no < ch_count; ch_no++) { + rpl_map->sge[ch_no].iov_base = frmr->kva + offset; + rpl_map->sge[ch_no].iov_len = ch->rc_target.rs_length; + chl_map->ch[ch_no].count = 1; + chl_map->ch[ch_no].start = ch_no; + offset += ch->rc_target.rs_length; + ch++; + } + + ret = svc_rdma_fastreg(xprt, frmr); + if (ret) + goto fatal_err; + + return ch_no; + + fatal_err: + printk("svcrdma: error fast registering xdr for xprt %p", xprt); + svc_rdma_put_frmr(xprt, frmr); + return -EIO; +} + +static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt, + struct svc_rdma_fastreg_mr *frmr, + struct kvec *vec, + u64 *sgl_offset, + int count) { int i; ctxt->count = count; ctxt->direction = DMA_FROM_DEVICE; for (i = 0; i < count; i++) { - atomic_inc(&xprt->sc_dma_used); - ctxt->sge[i].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - vec[i].iov_base, vec[i].iov_len, - DMA_FROM_DEVICE); + ctxt->sge[i].length = 0; /* in case map fails */ + if (!frmr) { + ctxt->sge[i].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + vec[i].iov_base, + vec[i].iov_len, + DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + ctxt->sge[i].addr)) + return -EINVAL; + ctxt->sge[i].lkey = xprt->sc_dma_lkey; + atomic_inc(&xprt->sc_dma_used); + } else { + ctxt->sge[i].addr = (unsigned long)vec[i].iov_base; + ctxt->sge[i].lkey = frmr->mr->lkey; + } ctxt->sge[i].length = vec[i].iov_len; - ctxt->sge[i].lkey = xprt->sc_phys_mr->lkey; *sgl_offset = *sgl_offset + vec[i].iov_len; } + return 0; } static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count) @@ -278,6 +380,7 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, struct svc_rdma_op_ctxt *hdr_ctxt) { struct ib_send_wr read_wr; + struct ib_send_wr inv_wr; int err = 0; int ch_no; int ch_count; @@ -301,9 +404,20 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, svc_rdma_rcl_chunk_counts(ch, &ch_count, &byte_count); if (ch_count > RPCSVC_MAXPAGES) return -EINVAL; - sge_count = rdma_rcl_to_sge(xprt, rqstp, hdr_ctxt, rmsgp, - rpl_map, chl_map, - ch_count, byte_count); + + if (!xprt->sc_frmr_pg_list_len) + sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + else + sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp, + rpl_map, chl_map, ch_count, + byte_count); + if (sge_count < 0) { + err = -EIO; + goto out; + } + sgl_offset = 0; ch_no = 0; @@ -312,13 +426,16 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt, next_sge: ctxt = svc_rdma_get_context(xprt); ctxt->direction = DMA_FROM_DEVICE; + ctxt->frmr = hdr_ctxt->frmr; + ctxt->read_hdr = NULL; clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare READ WR */ memset(&read_wr, 0, sizeof read_wr); - ctxt->wr_op = IB_WR_RDMA_READ; read_wr.wr_id = (unsigned long)ctxt; read_wr.opcode = IB_WR_RDMA_READ; + ctxt->wr_op = read_wr.opcode; read_wr.send_flags = IB_SEND_SIGNALED; read_wr.wr.rdma.rkey = ch->rc_target.rs_handle; read_wr.wr.rdma.remote_addr = @@ -327,10 +444,15 @@ next_sge: read_wr.sg_list = ctxt->sge; read_wr.num_sge = rdma_read_max_sge(xprt, chl_map->ch[ch_no].count); - rdma_set_ctxt_sge(xprt, ctxt, - &rpl_map->sge[chl_map->ch[ch_no].start], - &sgl_offset, - read_wr.num_sge); + err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr, + &rpl_map->sge[chl_map->ch[ch_no].start], + &sgl_offset, + read_wr.num_sge); + if (err) { + svc_rdma_unmap_dma(ctxt); + svc_rdma_put_context(ctxt, 0); + goto out; + } if (((ch+1)->rc_discrim == 0) && (read_wr.num_sge == chl_map->ch[ch_no].count)) { /* @@ -339,6 +461,29 @@ next_sge: * the client and the RPC needs to be enqueued. */ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags); + if (hdr_ctxt->frmr) { + set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + /* + * Invalidate the local MR used to map the data + * sink. + */ + if (xprt->sc_dev_caps & + SVCRDMA_DEVCAP_READ_W_INV) { + read_wr.opcode = + IB_WR_RDMA_READ_WITH_INV; + ctxt->wr_op = read_wr.opcode; + read_wr.ex.invalidate_rkey = + ctxt->frmr->mr->lkey; + } else { + /* Prepare INVALIDATE WR */ + memset(&inv_wr, 0, sizeof inv_wr); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = + hdr_ctxt->frmr->mr->lkey; + read_wr.next = &inv_wr; + } + } ctxt->read_hdr = hdr_ctxt; } /* Post the read */ diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index 84d328329d98..9a7a8e7ae038 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -69,9 +69,127 @@ * array is only concerned with the reply we are assured that we have * on extra page for the RPCRMDA header. */ -static void xdr_to_sge(struct svcxprt_rdma *xprt, - struct xdr_buf *xdr, - struct svc_rdma_req_map *vec) +int fast_reg_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) +{ + int sge_no; + u32 sge_bytes; + u32 page_bytes; + u32 page_off; + int page_no = 0; + u8 *frva; + struct svc_rdma_fastreg_mr *frmr; + + frmr = svc_rdma_get_frmr(xprt); + if (IS_ERR(frmr)) + return -ENOMEM; + vec->frmr = frmr; + + /* Skip the RPCRDMA header */ + sge_no = 1; + + /* Map the head. */ + frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK); + vec->sge[sge_no].iov_base = xdr->head[0].iov_base; + vec->sge[sge_no].iov_len = xdr->head[0].iov_len; + vec->count = 2; + sge_no++; + + /* Build the FRMR */ + frmr->kva = frva; + frmr->direction = DMA_TO_DEVICE; + frmr->access_flags = 0; + frmr->map_len = PAGE_SIZE; + frmr->page_list_len = 1; + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, + (void *)xdr->head[0].iov_base, + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + + page_off = xdr->page_base; + page_bytes = xdr->page_len + page_off; + if (!page_bytes) + goto encode_tail; + + /* Map the pages */ + vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; + vec->sge[sge_no].iov_len = page_bytes; + sge_no++; + while (page_bytes) { + struct page *page; + + page = xdr->pages[page_no++]; + sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off)); + page_bytes -= sge_bytes; + + frmr->page_list->page_list[page_no] = + ib_dma_map_page(xprt->sc_cm_id->device, page, 0, + PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + + atomic_inc(&xprt->sc_dma_used); + page_off = 0; /* reset for next time through loop */ + frmr->map_len += PAGE_SIZE; + frmr->page_list_len++; + } + vec->count++; + + encode_tail: + /* Map tail */ + if (0 == xdr->tail[0].iov_len) + goto done; + + vec->count++; + vec->sge[sge_no].iov_len = xdr->tail[0].iov_len; + + if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) == + ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) { + /* + * If head and tail use the same page, we don't need + * to map it again. + */ + vec->sge[sge_no].iov_base = xdr->tail[0].iov_base; + } else { + void *va; + + /* Map another page for the tail */ + page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK; + va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK); + vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off; + + frmr->page_list->page_list[page_no] = + ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + frmr->page_list->page_list[page_no])) + goto fatal_err; + atomic_inc(&xprt->sc_dma_used); + frmr->map_len += PAGE_SIZE; + frmr->page_list_len++; + } + + done: + if (svc_rdma_fastreg(xprt, frmr)) + goto fatal_err; + + return 0; + + fatal_err: + printk("svcrdma: Error fast registering memory for xprt %p\n", xprt); + svc_rdma_put_frmr(xprt, frmr); + return -EIO; +} + +static int map_xdr(struct svcxprt_rdma *xprt, + struct xdr_buf *xdr, + struct svc_rdma_req_map *vec) { int sge_max = (xdr->len+PAGE_SIZE-1) / PAGE_SIZE + 3; int sge_no; @@ -83,6 +201,9 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, BUG_ON(xdr->len != (xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len)); + if (xprt->sc_frmr_pg_list_len) + return fast_reg_xdr(xprt, xdr, vec); + /* Skip the first sge, this is for the RPCRDMA header */ sge_no = 1; @@ -116,9 +237,12 @@ static void xdr_to_sge(struct svcxprt_rdma *xprt, BUG_ON(sge_no > sge_max); vec->count = sge_no; + return 0; } /* Assumptions: + * - We are using FRMR + * - or - * - The specified write_len can be represented in sc_max_sge * PAGE_SIZE */ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, @@ -158,30 +282,35 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp, sge_no = 0; /* Copy the remaining SGE */ - while (bc != 0 && xdr_sge_no < vec->count) { - sge[sge_no].lkey = xprt->sc_phys_mr->lkey; - sge_bytes = min((size_t)bc, - (size_t)(vec->sge[xdr_sge_no].iov_len-sge_off)); + while (bc != 0) { + sge_bytes = min_t(size_t, + bc, vec->sge[xdr_sge_no].iov_len-sge_off); sge[sge_no].length = sge_bytes; - atomic_inc(&xprt->sc_dma_used); - sge[sge_no].addr = - ib_dma_map_single(xprt->sc_cm_id->device, - (void *) - vec->sge[xdr_sge_no].iov_base + sge_off, - sge_bytes, DMA_TO_DEVICE); - if (dma_mapping_error(xprt->sc_cm_id->device->dma_device, - sge[sge_no].addr)) - goto err; + if (!vec->frmr) { + sge[sge_no].addr = + ib_dma_map_single(xprt->sc_cm_id->device, + (void *) + vec->sge[xdr_sge_no].iov_base + sge_off, + sge_bytes, DMA_TO_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, + sge[sge_no].addr)) + goto err; + atomic_inc(&xprt->sc_dma_used); + sge[sge_no].lkey = xprt->sc_dma_lkey; + } else { + sge[sge_no].addr = (unsigned long) + vec->sge[xdr_sge_no].iov_base + sge_off; + sge[sge_no].lkey = vec->frmr->mr->lkey; + } + ctxt->count++; + ctxt->frmr = vec->frmr; sge_off = 0; sge_no++; - ctxt->count++; xdr_sge_no++; + BUG_ON(xdr_sge_no > vec->count); bc -= sge_bytes; } - BUG_ON(bc != 0); - BUG_ON(xdr_sge_no > vec->count); - /* Prepare WRITE WR */ memset(&write_wr, 0, sizeof write_wr); ctxt->wr_op = IB_WR_RDMA_WRITE; @@ -226,7 +355,10 @@ static int send_write_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[1]; - max_write = xprt->sc_max_sge * PAGE_SIZE; + if (vec->frmr) + max_write = vec->frmr->map_len; + else + max_write = xprt->sc_max_sge * PAGE_SIZE; /* Write chunks start at the pagelist */ for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0; @@ -297,7 +429,10 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, res_ary = (struct rpcrdma_write_array *) &rdma_resp->rm_body.rm_chunks[2]; - max_write = xprt->sc_max_sge * PAGE_SIZE; + if (vec->frmr) + max_write = vec->frmr->map_len; + else + max_write = xprt->sc_max_sge * PAGE_SIZE; /* xdr offset starts at RPC message */ for (xdr_off = 0, chunk_no = 0; @@ -307,7 +442,6 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt, ch = &arg_ary->wc_array[chunk_no].wc_target; write_len = min(xfer_len, ch->rs_length); - /* Prepare the reply chunk given the length actually * written */ rs_offset = get_unaligned(&(ch->rs_offset)); @@ -366,6 +500,7 @@ static int send_reply(struct svcxprt_rdma *rdma, int byte_count) { struct ib_send_wr send_wr; + struct ib_send_wr inv_wr; int sge_no; int sge_bytes; int page_no; @@ -385,27 +520,45 @@ static int send_reply(struct svcxprt_rdma *rdma, /* Prepare the context */ ctxt->pages[0] = page; ctxt->count = 1; + ctxt->frmr = vec->frmr; + if (vec->frmr) + set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); + else + clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags); /* Prepare the SGE for the RPCRDMA Header */ - atomic_inc(&rdma->sc_dma_used); ctxt->sge[0].addr = ib_dma_map_page(rdma->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->direction = DMA_TO_DEVICE; + ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp); - ctxt->sge[0].lkey = rdma->sc_phys_mr->lkey; + ctxt->sge[0].lkey = rdma->sc_dma_lkey; /* Determine how many of our SGE are to be transmitted */ for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) { sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count); byte_count -= sge_bytes; - atomic_inc(&rdma->sc_dma_used); - ctxt->sge[sge_no].addr = - ib_dma_map_single(rdma->sc_cm_id->device, - vec->sge[sge_no].iov_base, - sge_bytes, DMA_TO_DEVICE); + if (!vec->frmr) { + ctxt->sge[sge_no].addr = + ib_dma_map_single(rdma->sc_cm_id->device, + vec->sge[sge_no].iov_base, + sge_bytes, DMA_TO_DEVICE); + if (ib_dma_mapping_error(rdma->sc_cm_id->device, + ctxt->sge[sge_no].addr)) + goto err; + atomic_inc(&rdma->sc_dma_used); + ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey; + } else { + ctxt->sge[sge_no].addr = (unsigned long) + vec->sge[sge_no].iov_base; + ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey; + } ctxt->sge[sge_no].length = sge_bytes; - ctxt->sge[sge_no].lkey = rdma->sc_phys_mr->lkey; } BUG_ON(byte_count != 0); @@ -417,11 +570,16 @@ static int send_reply(struct svcxprt_rdma *rdma, ctxt->pages[page_no+1] = rqstp->rq_respages[page_no]; ctxt->count++; rqstp->rq_respages[page_no] = NULL; - /* If there are more pages than SGE, terminate SGE list */ + /* + * If there are more pages than SGE, terminate SGE + * list so that svc_rdma_unmap_dma doesn't attempt to + * unmap garbage. + */ if (page_no+1 >= sge_no) ctxt->sge[page_no+1].length = 0; } BUG_ON(sge_no > rdma->sc_max_sge); + BUG_ON(sge_no > ctxt->count); memset(&send_wr, 0, sizeof send_wr); ctxt->wr_op = IB_WR_SEND; send_wr.wr_id = (unsigned long)ctxt; @@ -429,12 +587,26 @@ static int send_reply(struct svcxprt_rdma *rdma, send_wr.num_sge = sge_no; send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; + if (vec->frmr) { + /* Prepare INVALIDATE WR */ + memset(&inv_wr, 0, sizeof inv_wr); + inv_wr.opcode = IB_WR_LOCAL_INV; + inv_wr.send_flags = IB_SEND_SIGNALED; + inv_wr.ex.invalidate_rkey = + vec->frmr->mr->lkey; + send_wr.next = &inv_wr; + } ret = svc_rdma_send(rdma, &send_wr); if (ret) - svc_rdma_put_context(ctxt, 1); + goto err; - return ret; + return 0; + + err: + svc_rdma_put_frmr(rdma, vec->frmr); + svc_rdma_put_context(ctxt, 1); + return -EIO; } void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp) @@ -477,8 +649,9 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) ctxt = svc_rdma_get_context(rdma); ctxt->direction = DMA_TO_DEVICE; vec = svc_rdma_get_req_map(); - xdr_to_sge(rdma, &rqstp->rq_res, vec); - + ret = map_xdr(rdma, &rqstp->rq_res, vec); + if (ret) + goto err0; inline_bytes = rqstp->rq_res.len; /* Create the RDMA response header */ @@ -498,7 +671,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send write chunks, rc=%d\n", ret); - goto error; + goto err1; } inline_bytes -= ret; @@ -508,7 +681,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) if (ret < 0) { printk(KERN_ERR "svcrdma: failed to send reply chunks, rc=%d\n", ret); - goto error; + goto err1; } inline_bytes -= ret; @@ -517,9 +690,11 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) svc_rdma_put_req_map(vec); dprintk("svcrdma: send_reply returns %d\n", ret); return ret; - error: + + err1: + put_page(res_page); + err0: svc_rdma_put_req_map(vec); svc_rdma_put_context(ctxt, 0); - put_page(res_page); return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 900cb69728c6..6fb493cbd29f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -100,20 +100,29 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt) ctxt->xprt = xprt; INIT_LIST_HEAD(&ctxt->dto_q); ctxt->count = 0; + ctxt->frmr = NULL; atomic_inc(&xprt->sc_ctxt_used); return ctxt; } -static void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) +void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt) { struct svcxprt_rdma *xprt = ctxt->xprt; int i; for (i = 0; i < ctxt->count && ctxt->sge[i].length; i++) { - atomic_dec(&xprt->sc_dma_used); - ib_dma_unmap_single(xprt->sc_cm_id->device, - ctxt->sge[i].addr, - ctxt->sge[i].length, - ctxt->direction); + /* + * Unmap the DMA addr in the SGE if the lkey matches + * the sc_dma_lkey, otherwise, ignore it since it is + * an FRMR lkey and will be unmapped later when the + * last WR that uses it completes. + */ + if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) { + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(xprt->sc_cm_id->device, + ctxt->sge[i].addr, + ctxt->sge[i].length, + ctxt->direction); + } } } @@ -150,6 +159,7 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void) schedule_timeout_uninterruptible(msecs_to_jiffies(500)); } map->count = 0; + map->frmr = NULL; return map; } @@ -316,6 +326,50 @@ static void rq_cq_reap(struct svcxprt_rdma *xprt) } /* + * Processs a completion context + */ +static void process_context(struct svcxprt_rdma *xprt, + struct svc_rdma_op_ctxt *ctxt) +{ + svc_rdma_unmap_dma(ctxt); + + switch (ctxt->wr_op) { + case IB_WR_SEND: + if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) + svc_rdma_put_frmr(xprt, ctxt->frmr); + svc_rdma_put_context(ctxt, 1); + break; + + case IB_WR_RDMA_WRITE: + svc_rdma_put_context(ctxt, 0); + break; + + case IB_WR_RDMA_READ: + case IB_WR_RDMA_READ_WITH_INV: + if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { + struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; + BUG_ON(!read_hdr); + if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags)) + svc_rdma_put_frmr(xprt, ctxt->frmr); + spin_lock_bh(&xprt->sc_rq_dto_lock); + set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); + list_add_tail(&read_hdr->dto_q, + &xprt->sc_read_complete_q); + spin_unlock_bh(&xprt->sc_rq_dto_lock); + svc_xprt_enqueue(&xprt->sc_xprt); + } + svc_rdma_put_context(ctxt, 0); + break; + + default: + printk(KERN_ERR "svcrdma: unexpected completion type, " + "opcode=%d\n", + ctxt->wr_op); + break; + } +} + +/* * Send Queue Completion Handler - potentially called on interrupt context. * * Note that caller must hold a transport reference. @@ -327,17 +381,12 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) struct ib_cq *cq = xprt->sc_sq_cq; int ret; - if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags)) return; ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP); atomic_inc(&rdma_stat_sq_poll); while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) { - ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; - xprt = ctxt->xprt; - - svc_rdma_unmap_dma(ctxt); if (wc.status != IB_WC_SUCCESS) /* Close the transport */ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); @@ -346,35 +395,10 @@ static void sq_cq_reap(struct svcxprt_rdma *xprt) atomic_dec(&xprt->sc_sq_count); wake_up(&xprt->sc_send_wait); - switch (ctxt->wr_op) { - case IB_WR_SEND: - svc_rdma_put_context(ctxt, 1); - break; - - case IB_WR_RDMA_WRITE: - svc_rdma_put_context(ctxt, 0); - break; - - case IB_WR_RDMA_READ: - if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) { - struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr; - BUG_ON(!read_hdr); - spin_lock_bh(&xprt->sc_rq_dto_lock); - set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); - list_add_tail(&read_hdr->dto_q, - &xprt->sc_read_complete_q); - spin_unlock_bh(&xprt->sc_rq_dto_lock); - svc_xprt_enqueue(&xprt->sc_xprt); - } - svc_rdma_put_context(ctxt, 0); - break; + ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id; + if (ctxt) + process_context(xprt, ctxt); - default: - printk(KERN_ERR "svcrdma: unexpected completion type, " - "opcode=%d, status=%d\n", - wc.opcode, wc.status); - break; - } svc_xprt_put(&xprt->sc_xprt); } @@ -425,10 +449,12 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv, INIT_LIST_HEAD(&cma_xprt->sc_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q); INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q); + INIT_LIST_HEAD(&cma_xprt->sc_frmr_q); init_waitqueue_head(&cma_xprt->sc_send_wait); spin_lock_init(&cma_xprt->sc_lock); spin_lock_init(&cma_xprt->sc_rq_dto_lock); + spin_lock_init(&cma_xprt->sc_frmr_q_lock); cma_xprt->sc_ord = svcrdma_ord; @@ -462,7 +488,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) struct ib_recv_wr recv_wr, *bad_recv_wr; struct svc_rdma_op_ctxt *ctxt; struct page *page; - unsigned long pa; + dma_addr_t pa; int sge_no; int buflen; int ret; @@ -474,13 +500,15 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) BUG_ON(sge_no >= xprt->sc_max_sge); page = svc_rdma_get_page(); ctxt->pages[sge_no] = page; - atomic_inc(&xprt->sc_dma_used); pa = ib_dma_map_page(xprt->sc_cm_id->device, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa)) + goto err_put_ctxt; + atomic_inc(&xprt->sc_dma_used); ctxt->sge[sge_no].addr = pa; ctxt->sge[sge_no].length = PAGE_SIZE; - ctxt->sge[sge_no].lkey = xprt->sc_phys_mr->lkey; + ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey; buflen += PAGE_SIZE; } ctxt->count = sge_no; @@ -496,6 +524,10 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt) svc_rdma_put_context(ctxt, 1); } return ret; + + err_put_ctxt: + svc_rdma_put_context(ctxt, 1); + return -ENOMEM; } /* @@ -566,7 +598,7 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, dprintk("svcrdma: Connect request on cma_id=%p, xprt = %p, " "event=%d\n", cma_id, cma_id->context, event->event); handle_connect_req(cma_id, - event->param.conn.responder_resources); + event->param.conn.initiator_depth); break; case RDMA_CM_EVENT_ESTABLISHED: @@ -686,6 +718,97 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, return ERR_PTR(ret); } +static struct svc_rdma_fastreg_mr *rdma_alloc_frmr(struct svcxprt_rdma *xprt) +{ + struct ib_mr *mr; + struct ib_fast_reg_page_list *pl; + struct svc_rdma_fastreg_mr *frmr; + + frmr = kmalloc(sizeof(*frmr), GFP_KERNEL); + if (!frmr) + goto err; + + mr = ib_alloc_fast_reg_mr(xprt->sc_pd, RPCSVC_MAXPAGES); + if (!mr) + goto err_free_frmr; + + pl = ib_alloc_fast_reg_page_list(xprt->sc_cm_id->device, + RPCSVC_MAXPAGES); + if (!pl) + goto err_free_mr; + + frmr->mr = mr; + frmr->page_list = pl; + INIT_LIST_HEAD(&frmr->frmr_list); + return frmr; + + err_free_mr: + ib_dereg_mr(mr); + err_free_frmr: + kfree(frmr); + err: + return ERR_PTR(-ENOMEM); +} + +static void rdma_dealloc_frmr_q(struct svcxprt_rdma *xprt) +{ + struct svc_rdma_fastreg_mr *frmr; + + while (!list_empty(&xprt->sc_frmr_q)) { + frmr = list_entry(xprt->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + ib_dereg_mr(frmr->mr); + ib_free_fast_reg_page_list(frmr->page_list); + kfree(frmr); + } +} + +struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma) +{ + struct svc_rdma_fastreg_mr *frmr = NULL; + + spin_lock_bh(&rdma->sc_frmr_q_lock); + if (!list_empty(&rdma->sc_frmr_q)) { + frmr = list_entry(rdma->sc_frmr_q.next, + struct svc_rdma_fastreg_mr, frmr_list); + list_del_init(&frmr->frmr_list); + frmr->map_len = 0; + frmr->page_list_len = 0; + } + spin_unlock_bh(&rdma->sc_frmr_q_lock); + if (frmr) + return frmr; + + return rdma_alloc_frmr(rdma); +} + +static void frmr_unmap_dma(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + int page_no; + for (page_no = 0; page_no < frmr->page_list_len; page_no++) { + dma_addr_t addr = frmr->page_list->page_list[page_no]; + if (ib_dma_mapping_error(frmr->mr->device, addr)) + continue; + atomic_dec(&xprt->sc_dma_used); + ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE, + frmr->direction); + } +} + +void svc_rdma_put_frmr(struct svcxprt_rdma *rdma, + struct svc_rdma_fastreg_mr *frmr) +{ + if (frmr) { + frmr_unmap_dma(rdma, frmr); + spin_lock_bh(&rdma->sc_frmr_q_lock); + BUG_ON(!list_empty(&frmr->frmr_list)); + list_add(&frmr->frmr_list, &rdma->sc_frmr_q); + spin_unlock_bh(&rdma->sc_frmr_q_lock); + } +} + /* * This is the xpo_recvfrom function for listening endpoints. Its * purpose is to accept incoming connections. The CMA callback handler @@ -704,6 +827,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct ib_device_attr devattr; + int dma_mr_acc; + int need_dma_mr; int ret; int i; @@ -819,15 +944,77 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) } newxprt->sc_qp = newxprt->sc_cm_id->qp; - /* Register all of physical memory */ - newxprt->sc_phys_mr = ib_get_dma_mr(newxprt->sc_pd, - IB_ACCESS_LOCAL_WRITE | - IB_ACCESS_REMOTE_WRITE); - if (IS_ERR(newxprt->sc_phys_mr)) { - dprintk("svcrdma: Failed to create DMA MR ret=%d\n", ret); + /* + * Use the most secure set of MR resources based on the + * transport type and available memory management features in + * the device. Here's the table implemented below: + * + * Fast Global DMA Remote WR + * Reg LKEY MR Access + * Sup'd Sup'd Needed Needed + * + * IWARP N N Y Y + * N Y Y Y + * Y N Y N + * Y Y N - + * + * IB N N Y N + * N Y N - + * Y N Y N + * Y Y N - + * + * NB: iWARP requires remote write access for the data sink + * of an RDMA_READ. IB does not. + */ + if (devattr.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) { + newxprt->sc_frmr_pg_list_len = + devattr.max_fast_reg_page_list_len; + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_FAST_REG; + } + + /* + * Determine if a DMA MR is required and if so, what privs are required + */ + switch (rdma_node_get_transport(newxprt->sc_cm_id->device->node_type)) { + case RDMA_TRANSPORT_IWARP: + newxprt->sc_dev_caps |= SVCRDMA_DEVCAP_READ_W_INV; + if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) { + need_dma_mr = 1; + dma_mr_acc = + (IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE); + } else if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + case RDMA_TRANSPORT_IB: + if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) { + need_dma_mr = 1; + dma_mr_acc = IB_ACCESS_LOCAL_WRITE; + } else + need_dma_mr = 0; + break; + default: goto errout; } + /* Create the DMA MR if needed, otherwise, use the DMA LKEY */ + if (need_dma_mr) { + /* Register all of physical memory */ + newxprt->sc_phys_mr = + ib_get_dma_mr(newxprt->sc_pd, dma_mr_acc); + if (IS_ERR(newxprt->sc_phys_mr)) { + dprintk("svcrdma: Failed to create DMA MR ret=%d\n", + ret); + goto errout; + } + newxprt->sc_dma_lkey = newxprt->sc_phys_mr->lkey; + } else + newxprt->sc_dma_lkey = + newxprt->sc_cm_id->device->local_dma_lkey; + /* Post receive buffers */ for (i = 0; i < newxprt->sc_max_requests; i++) { ret = svc_rdma_post_recv(newxprt); @@ -961,6 +1148,9 @@ static void __svc_rdma_free(struct work_struct *work) WARN_ON(atomic_read(&rdma->sc_ctxt_used) != 0); WARN_ON(atomic_read(&rdma->sc_dma_used) != 0); + /* De-allocate fastreg mr */ + rdma_dealloc_frmr_q(rdma); + /* Destroy the QP if present (not a listener) */ if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_destroy_qp(rdma->sc_qp); @@ -1014,21 +1204,59 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt) return 1; } +/* + * Attempt to register the kvec representing the RPC memory with the + * device. + * + * Returns: + * NULL : The device does not support fastreg or there were no more + * fastreg mr. + * frmr : The kvec register request was successfully posted. + * <0 : An error was encountered attempting to register the kvec. + */ +int svc_rdma_fastreg(struct svcxprt_rdma *xprt, + struct svc_rdma_fastreg_mr *frmr) +{ + struct ib_send_wr fastreg_wr; + u8 key; + + /* Bump the key */ + key = (u8)(frmr->mr->lkey & 0x000000FF); + ib_update_fast_reg_key(frmr->mr, ++key); + + /* Prepare FASTREG WR */ + memset(&fastreg_wr, 0, sizeof fastreg_wr); + fastreg_wr.opcode = IB_WR_FAST_REG_MR; + fastreg_wr.send_flags = IB_SEND_SIGNALED; + fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva; + fastreg_wr.wr.fast_reg.page_list = frmr->page_list; + fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len; + fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + fastreg_wr.wr.fast_reg.length = frmr->map_len; + fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags; + fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey; + return svc_rdma_send(xprt, &fastreg_wr); +} + int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) { - struct ib_send_wr *bad_wr; + struct ib_send_wr *bad_wr, *n_wr; + int wr_count; + int i; int ret; if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags)) return -ENOTCONN; BUG_ON(wr->send_flags != IB_SEND_SIGNALED); - BUG_ON(((struct svc_rdma_op_ctxt *)(unsigned long)wr->wr_id)->wr_op != - wr->opcode); + wr_count = 1; + for (n_wr = wr->next; n_wr; n_wr = n_wr->next) + wr_count++; + /* If the SQ is full, wait until an SQ entry is available */ while (1) { spin_lock_bh(&xprt->sc_lock); - if (xprt->sc_sq_depth == atomic_read(&xprt->sc_sq_count)) { + if (xprt->sc_sq_depth < atomic_read(&xprt->sc_sq_count) + wr_count) { spin_unlock_bh(&xprt->sc_lock); atomic_inc(&rdma_stat_sq_starve); @@ -1043,19 +1271,26 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr) return 0; continue; } - /* Bumped used SQ WR count and post */ - svc_xprt_get(&xprt->sc_xprt); + /* Take a transport ref for each WR posted */ + for (i = 0; i < wr_count; i++) + svc_xprt_get(&xprt->sc_xprt); + + /* Bump used SQ WR count and post */ + atomic_add(wr_count, &xprt->sc_sq_count); ret = ib_post_send(xprt->sc_qp, wr, &bad_wr); - if (!ret) - atomic_inc(&xprt->sc_sq_count); - else { - svc_xprt_put(&xprt->sc_xprt); + if (ret) { + set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + atomic_sub(wr_count, &xprt->sc_sq_count); + for (i = 0; i < wr_count; i ++) + svc_xprt_put(&xprt->sc_xprt); dprintk("svcrdma: failed to post SQ WR rc=%d, " "sc_sq_count=%d, sc_sq_depth=%d\n", ret, atomic_read(&xprt->sc_sq_count), xprt->sc_sq_depth); } spin_unlock_bh(&xprt->sc_lock); + if (ret) + wake_up(&xprt->sc_send_wait); break; } return ret; @@ -1079,10 +1314,14 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va); /* Prepare SGE for local address */ - atomic_inc(&xprt->sc_dma_used); sge.addr = ib_dma_map_page(xprt->sc_cm_id->device, p, 0, PAGE_SIZE, DMA_FROM_DEVICE); - sge.lkey = xprt->sc_phys_mr->lkey; + if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) { + put_page(p); + return; + } + atomic_inc(&xprt->sc_dma_used); + sge.lkey = xprt->sc_dma_lkey; sge.length = length; ctxt = svc_rdma_get_context(xprt); @@ -1103,6 +1342,9 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp, if (ret) { dprintk("svcrdma: Error %d posting send for protocol error\n", ret); + ib_dma_unmap_page(xprt->sc_cm_id->device, + sge.addr, PAGE_SIZE, + DMA_FROM_DEVICE); svc_rdma_put_context(ctxt, 1); } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index a564c1a39ec5..9839c3d94145 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -70,11 +70,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE; static unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; static unsigned int xprt_rdma_inline_write_padding; -#if !RPCRDMA_PERSISTENT_REGISTRATION -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_REGISTER; /* FMR? */ -#else -static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_ALLPHYSICAL; -#endif +static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; + int xprt_rdma_pad_optimize = 0; #ifdef RPC_DEBUG @@ -140,6 +137,14 @@ static ctl_table xr_tunables_table[] = { .extra2 = &max_memreg, }, { + .ctl_name = CTL_UNNUMBERED, + .procname = "rdma_pad_optimize", + .data = &xprt_rdma_pad_optimize, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0, }, }; @@ -458,6 +463,8 @@ xprt_rdma_close(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); dprintk("RPC: %s: closing\n", __func__); + if (r_xprt->rx_ep.rep_connected > 0) + xprt->reestablish_timeout = 0; xprt_disconnect_done(xprt); (void) rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); } @@ -485,6 +492,11 @@ xprt_rdma_connect(struct rpc_task *task) /* Reconnect */ schedule_delayed_work(&r_xprt->rdma_connect, xprt->reestablish_timeout); + xprt->reestablish_timeout <<= 1; + if (xprt->reestablish_timeout > (30 * HZ)) + xprt->reestablish_timeout = (30 * HZ); + else if (xprt->reestablish_timeout < (5 * HZ)) + xprt->reestablish_timeout = (5 * HZ); } else { schedule_delayed_work(&r_xprt->rdma_connect, 0); if (!RPC_IS_ASYNC(task)) @@ -591,6 +603,7 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size) } dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req); out: + req->rl_connect_cookie = 0; /* our reserved value */ return req->rl_xdr_buf; outfail: @@ -694,13 +707,21 @@ xprt_rdma_send_request(struct rpc_task *task) req->rl_reply->rr_xprt = xprt; } - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) { - xprt_disconnect_done(xprt); - return -ENOTCONN; /* implies disconnect */ - } + /* Must suppress retransmit to maintain credits */ + if (req->rl_connect_cookie == xprt->connect_cookie) + goto drop_connection; + req->rl_connect_cookie = xprt->connect_cookie; + + if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + goto drop_connection; + task->tk_bytes_sent += rqst->rq_snd_buf.len; rqst->rq_bytes_sent = 0; return 0; + +drop_connection: + xprt_disconnect_done(xprt); + return -ENOTCONN; /* implies disconnect */ } static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) @@ -770,7 +791,7 @@ static void __exit xprt_rdma_cleanup(void) { int rc; - dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n"); + dprintk(KERN_INFO "RPCRDMA Module Removed, deregister RPC RDMA transport\n"); #ifdef RPC_DEBUG if (sunrpc_table_header) { unregister_sysctl_table(sunrpc_table_header); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8ea283ecc522..a5fef5e6c323 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -284,6 +284,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: + ia->ri_async_rc = 0; complete(&ia->ri_done); break; case RDMA_CM_EVENT_ADDR_ERROR: @@ -338,13 +339,32 @@ connected: wake_up_all(&ep->rep_connect_wait); break; default: - ia->ri_async_rc = -EINVAL; - dprintk("RPC: %s: unexpected CM event %X\n", + dprintk("RPC: %s: unexpected CM event %d\n", __func__, event->event); - complete(&ia->ri_done); break; } +#ifdef RPC_DEBUG + if (connstate == 1) { + int ird = attr.max_dest_rd_atomic; + int tird = ep->rep_remote_cma.responder_resources; + printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " + "on %s, memreg %d slots %d ird %d%s\n", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + ia->ri_id->device->name, + ia->ri_memreg_strategy, + xprt->rx_buf.rb_max_requests, + ird, ird < 4 && ird < tird / 2 ? " (low!)" : ""); + } else if (connstate < 0) { + printk(KERN_INFO "rpcrdma: connection to %u.%u.%u.%u:%u " + "closed (%d)\n", + NIPQUAD(addr->sin_addr.s_addr), + ntohs(addr->sin_port), + connstate); + } +#endif + return 0; } @@ -355,6 +375,8 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rdma_cm_id *id; int rc; + init_completion(&ia->ri_done); + id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP); if (IS_ERR(id)) { rc = PTR_ERR(id); @@ -363,26 +385,28 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt, return id; } - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_addr(id, NULL, addr, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_addr() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; - ia->ri_async_rc = 0; + ia->ri_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); if (rc) { dprintk("RPC: %s: rdma_resolve_route() failed %i\n", __func__, rc); goto out; } - wait_for_completion(&ia->ri_done); + wait_for_completion_interruptible_timeout(&ia->ri_done, + msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1); rc = ia->ri_async_rc; if (rc) goto out; @@ -423,11 +447,10 @@ rpcrdma_clean_cq(struct ib_cq *cq) int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) { - int rc; + int rc, mem_priv; + struct ib_device_attr devattr; struct rpcrdma_ia *ia = &xprt->rx_ia; - init_completion(&ia->ri_done); - ia->ri_id = rpcrdma_create_id(xprt, ia, addr); if (IS_ERR(ia->ri_id)) { rc = PTR_ERR(ia->ri_id); @@ -443,6 +466,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } /* + * Query the device to determine if the requested memory + * registration strategy is supported. If it isn't, set the + * strategy to a globally supported model. + */ + rc = ib_query_device(ia->ri_id->device, &devattr); + if (rc) { + dprintk("RPC: %s: ib_query_device failed %d\n", + __func__, rc); + goto out2; + } + + if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) { + ia->ri_have_dma_lkey = 1; + ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey; + } + + switch (memreg) { + case RPCRDMA_MEMWINDOWS: + case RPCRDMA_MEMWINDOWS_ASYNC: + if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) { + dprintk("RPC: %s: MEMWINDOWS registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; + } + break; + case RPCRDMA_MTHCAFMR: + if (!ia->ri_id->device->alloc_fmr) { +#if RPCRDMA_PERSISTENT_REGISTRATION + dprintk("RPC: %s: MTHCAFMR registration " + "specified but not supported by adapter, " + "using riskier RPCRDMA_ALLPHYSICAL\n", + __func__); + memreg = RPCRDMA_ALLPHYSICAL; +#else + dprintk("RPC: %s: MTHCAFMR registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; +#endif + } + break; + case RPCRDMA_FRMR: + /* Requires both frmr reg and local dma lkey */ + if ((devattr.device_cap_flags & + (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) != + (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) { +#if RPCRDMA_PERSISTENT_REGISTRATION + dprintk("RPC: %s: FRMR registration " + "specified but not supported by adapter, " + "using riskier RPCRDMA_ALLPHYSICAL\n", + __func__); + memreg = RPCRDMA_ALLPHYSICAL; +#else + dprintk("RPC: %s: FRMR registration " + "specified but not supported by adapter, " + "using slower RPCRDMA_REGISTER\n", + __func__); + memreg = RPCRDMA_REGISTER; +#endif + } + break; + } + + /* * Optionally obtain an underlying physical identity mapping in * order to do a memory window-based bind. This base registration * is protected from remote access - that is enabled only by binding @@ -450,22 +540,28 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) * revoked after the corresponding completion similar to a storage * adapter. */ - if (memreg > RPCRDMA_REGISTER) { - int mem_priv = IB_ACCESS_LOCAL_WRITE; - switch (memreg) { + switch (memreg) { + case RPCRDMA_BOUNCEBUFFERS: + case RPCRDMA_REGISTER: + case RPCRDMA_FRMR: + break; #if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: - mem_priv |= IB_ACCESS_REMOTE_WRITE; - mem_priv |= IB_ACCESS_REMOTE_READ; - break; + case RPCRDMA_ALLPHYSICAL: + mem_priv = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_REMOTE_READ; + goto register_setup; #endif - case RPCRDMA_MEMWINDOWS_ASYNC: - case RPCRDMA_MEMWINDOWS: - mem_priv |= IB_ACCESS_MW_BIND; - break; - default: + case RPCRDMA_MEMWINDOWS_ASYNC: + case RPCRDMA_MEMWINDOWS: + mem_priv = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_MW_BIND; + goto register_setup; + case RPCRDMA_MTHCAFMR: + if (ia->ri_have_dma_lkey) break; - } + mem_priv = IB_ACCESS_LOCAL_WRITE; + register_setup: ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv); if (IS_ERR(ia->ri_bind_mem)) { printk(KERN_ALERT "%s: ib_get_dma_mr for " @@ -475,7 +571,15 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) memreg = RPCRDMA_REGISTER; ia->ri_bind_mem = NULL; } + break; + default: + printk(KERN_ERR "%s: invalid memory registration mode %d\n", + __func__, memreg); + rc = -EINVAL; + goto out2; } + dprintk("RPC: %s: memory registration strategy is %d\n", + __func__, memreg); /* Else will do memory reg/dereg for each chunk */ ia->ri_memreg_strategy = memreg; @@ -483,6 +587,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) return 0; out2: rdma_destroy_id(ia->ri_id); + ia->ri_id = NULL; out1: return rc; } @@ -503,15 +608,17 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) dprintk("RPC: %s: ib_dereg_mr returned %i\n", __func__, rc); } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id) && ia->ri_id->qp) - rdma_destroy_qp(ia->ri_id); + if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { + if (ia->ri_id->qp) + rdma_destroy_qp(ia->ri_id); + rdma_destroy_id(ia->ri_id); + ia->ri_id = NULL; + } if (ia->ri_pd != NULL && !IS_ERR(ia->ri_pd)) { rc = ib_dealloc_pd(ia->ri_pd); dprintk("RPC: %s: ib_dealloc_pd returned %i\n", __func__, rc); } - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) - rdma_destroy_id(ia->ri_id); } /* @@ -541,6 +648,12 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.srq = NULL; ep->rep_attr.cap.max_send_wr = cdata->max_requests; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + /* Add room for frmr register and invalidate WRs */ + ep->rep_attr.cap.max_send_wr *= 3; + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) + return -EINVAL; + break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: /* Add room for mw_binds+unbinds - overkill! */ @@ -617,29 +730,13 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_remote_cma.private_data_len = 0; /* Client offers RDMA Read but does not initiate */ - switch (ia->ri_memreg_strategy) { - case RPCRDMA_BOUNCEBUFFERS: + ep->rep_remote_cma.initiator_depth = 0; + if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS) ep->rep_remote_cma.responder_resources = 0; - break; - case RPCRDMA_MTHCAFMR: - case RPCRDMA_REGISTER: - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 8); - break; - case RPCRDMA_MEMWINDOWS: - case RPCRDMA_MEMWINDOWS_ASYNC: -#if RPCRDMA_PERSISTENT_REGISTRATION - case RPCRDMA_ALLPHYSICAL: -#endif - ep->rep_remote_cma.responder_resources = cdata->max_requests * - (RPCRDMA_MAX_DATA_SEGS / 2); - break; - default: - break; - } - if (ep->rep_remote_cma.responder_resources > devattr.max_qp_rd_atom) + else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */ + ep->rep_remote_cma.responder_resources = 32; + else ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom; - ep->rep_remote_cma.initiator_depth = 0; ep->rep_remote_cma.retry_count = 7; ep->rep_remote_cma.flow_control = 0; @@ -679,21 +776,16 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) if (rc) dprintk("RPC: %s: rpcrdma_ep_disconnect" " returned %i\n", __func__, rc); + rdma_destroy_qp(ia->ri_id); + ia->ri_id->qp = NULL; } - ep->rep_func = NULL; - /* padding - could be done in rpcrdma_buffer_destroy... */ if (ep->rep_pad_mr) { rpcrdma_deregister_internal(ia, ep->rep_pad_mr, &ep->rep_pad); ep->rep_pad_mr = NULL; } - if (ia->ri_id->qp) { - rdma_destroy_qp(ia->ri_id); - ia->ri_id->qp = NULL; - } - rpcrdma_clean_cq(ep->rep_cq); rc = ib_destroy_cq(ep->rep_cq); if (rc) @@ -712,9 +804,8 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) struct rdma_cm_id *id; int rc = 0; int retry_count = 0; - int reconnect = (ep->rep_connected != 0); - if (reconnect) { + if (ep->rep_connected != 0) { struct rpcrdma_xprt *xprt; retry: rc = rpcrdma_ep_disconnect(ep, ia); @@ -745,6 +836,7 @@ retry: goto out; } /* END TEMP */ + rdma_destroy_qp(ia->ri_id); rdma_destroy_id(ia->ri_id); ia->ri_id = id; } @@ -769,14 +861,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { } } - /* Theoretically a client initiator_depth > 0 is not needed, - * but many peers fail to complete the connection unless they - * == responder_resources! */ - if (ep->rep_remote_cma.initiator_depth != - ep->rep_remote_cma.responder_resources) - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - ep->rep_connected = 0; rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); @@ -786,9 +870,6 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { goto out; } - if (reconnect) - return 0; - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); /* @@ -805,14 +886,16 @@ if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) { if (ep->rep_connected <= 0) { /* Sometimes, the only way to reliably connect to remote * CMs is to use same nonzero values for ORD and IRD. */ - ep->rep_remote_cma.initiator_depth = - ep->rep_remote_cma.responder_resources; - if (ep->rep_remote_cma.initiator_depth == 0) - ++ep->rep_remote_cma.initiator_depth; - if (ep->rep_remote_cma.responder_resources == 0) - ++ep->rep_remote_cma.responder_resources; - if (retry_count++ == 0) + if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 && + (ep->rep_remote_cma.responder_resources == 0 || + ep->rep_remote_cma.initiator_depth != + ep->rep_remote_cma.responder_resources)) { + if (ep->rep_remote_cma.responder_resources == 0) + ep->rep_remote_cma.responder_resources = 1; + ep->rep_remote_cma.initiator_depth = + ep->rep_remote_cma.responder_resources; goto retry; + } rc = ep->rep_connected; } else { dprintk("RPC: %s: connected\n", __func__); @@ -863,6 +946,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, char *p; size_t len; int i, rc; + struct rpcrdma_mw *r; buf->rb_max_requests = cdata->max_requests; spin_lock_init(&buf->rb_lock); @@ -873,7 +957,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * 2. arrays of struct rpcrdma_req to fill in pointers * 3. array of struct rpcrdma_rep for replies * 4. padding, if any - * 5. mw's, if any + * 5. mw's, fmr's or frmr's, if any * Send/recv buffers in req/rep need to be registered */ @@ -881,6 +965,10 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, (sizeof(struct rpcrdma_req *) + sizeof(struct rpcrdma_rep *)); len += cdata->padding; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + len += buf->rb_max_requests * RPCRDMA_MAX_SEGS * + sizeof(struct rpcrdma_mw); + break; case RPCRDMA_MTHCAFMR: /* TBD we are perhaps overallocating here */ len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS * @@ -927,15 +1015,37 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, * and also reduce unbind-to-bind collision. */ INIT_LIST_HEAD(&buf->rb_mws); + r = (struct rpcrdma_mw *)p; switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) { + r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd, + RPCRDMA_MAX_SEGS); + if (IS_ERR(r->r.frmr.fr_mr)) { + rc = PTR_ERR(r->r.frmr.fr_mr); + dprintk("RPC: %s: ib_alloc_fast_reg_mr" + " failed %i\n", __func__, rc); + goto out; + } + r->r.frmr.fr_pgl = + ib_alloc_fast_reg_page_list(ia->ri_id->device, + RPCRDMA_MAX_SEGS); + if (IS_ERR(r->r.frmr.fr_pgl)) { + rc = PTR_ERR(r->r.frmr.fr_pgl); + dprintk("RPC: %s: " + "ib_alloc_fast_reg_page_list " + "failed %i\n", __func__, rc); + goto out; + } + list_add(&r->mw_list, &buf->rb_mws); + ++r; + } + break; case RPCRDMA_MTHCAFMR: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; - struct ib_fmr_attr fa = { - RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT - }; /* TBD we are perhaps overallocating here */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { + static struct ib_fmr_attr fa = + { RPCRDMA_MAX_DATA_SEGS, 1, PAGE_SHIFT }; r->r.fmr = ib_alloc_fmr(ia->ri_pd, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ, &fa); @@ -948,12 +1058,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, list_add(&r->mw_list, &buf->rb_mws); ++r; } - } break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct rpcrdma_mw *r = (struct rpcrdma_mw *)p; /* Allocate one extra request's worth, for full cycling */ for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) { r->r.mw = ib_alloc_mw(ia->ri_pd); @@ -966,7 +1073,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep, list_add(&r->mw_list, &buf->rb_mws); ++r; } - } break; default: break; @@ -1046,6 +1152,7 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) { int rc, i; struct rpcrdma_ia *ia = rdmab_to_ia(buf); + struct rpcrdma_mw *r; /* clean up in reverse order from create * 1. recv mr memory (mr free, then kfree) @@ -1065,11 +1172,19 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) } if (buf->rb_send_bufs && buf->rb_send_bufs[i]) { while (!list_empty(&buf->rb_mws)) { - struct rpcrdma_mw *r; r = list_entry(buf->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: + rc = ib_dereg_mr(r->r.frmr.fr_mr); + if (rc) + dprintk("RPC: %s:" + " ib_dereg_mr" + " failed %i\n", + __func__, rc); + ib_free_fast_reg_page_list(r->r.frmr.fr_pgl); + break; case RPCRDMA_MTHCAFMR: rc = ib_dealloc_fmr(r->r.fmr); if (rc) @@ -1115,6 +1230,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) { struct rpcrdma_req *req; unsigned long flags; + int i; + struct rpcrdma_mw *r; spin_lock_irqsave(&buffers->rb_lock, flags); if (buffers->rb_send_index == buffers->rb_max_requests) { @@ -1135,9 +1252,8 @@ rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) } buffers->rb_send_bufs[buffers->rb_send_index++] = NULL; if (!list_empty(&buffers->rb_mws)) { - int i = RPCRDMA_MAX_SEGS - 1; + i = RPCRDMA_MAX_SEGS - 1; do { - struct rpcrdma_mw *r; r = list_entry(buffers->rb_mws.next, struct rpcrdma_mw, mw_list); list_del(&r->mw_list); @@ -1171,6 +1287,7 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) req->rl_reply = NULL; } switch (ia->ri_memreg_strategy) { + case RPCRDMA_FRMR: case RPCRDMA_MTHCAFMR: case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: @@ -1252,7 +1369,11 @@ rpcrdma_register_internal(struct rpcrdma_ia *ia, void *va, int len, va, len, DMA_BIDIRECTIONAL); iov->length = len; - if (ia->ri_bind_mem != NULL) { + if (ia->ri_have_dma_lkey) { + *mrp = NULL; + iov->lkey = ia->ri_dma_lkey; + return 0; + } else if (ia->ri_bind_mem != NULL) { *mrp = NULL; iov->lkey = ia->ri_bind_mem->lkey; return 0; @@ -1329,15 +1450,292 @@ rpcrdma_unmap_one(struct rpcrdma_ia *ia, struct rpcrdma_mr_seg *seg) seg->mr_dma, seg->mr_dmalen, seg->mr_dir); } +static int +rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_send_wr frmr_wr, *bad_wr; + u8 key; + int len, pageoff; + int i, rc; + + pageoff = offset_in_page(seg1->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + seg1->mr_chunk.rl_mw->r.frmr.fr_pgl->page_list[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) + break; + } + dprintk("RPC: %s: Using frmr %p to map %d segments\n", + __func__, seg1->mr_chunk.rl_mw, i); + + /* Bump the key */ + key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF); + ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key); + + /* Prepare FRMR WR */ + memset(&frmr_wr, 0, sizeof frmr_wr); + frmr_wr.opcode = IB_WR_FAST_REG_MR; + frmr_wr.send_flags = 0; /* unsignaled */ + frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma; + frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl; + frmr_wr.wr.fast_reg.page_list_len = i; + frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; + frmr_wr.wr.fast_reg.length = i << PAGE_SHIFT; + frmr_wr.wr.fast_reg.access_flags = (writing ? + IB_ACCESS_REMOTE_WRITE : IB_ACCESS_REMOTE_READ); + frmr_wr.wr.fast_reg.rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + + rc = ib_post_send(ia->ri_id->qp, &frmr_wr, &bad_wr); + + if (rc) { + dprintk("RPC: %s: failed ib_post_send for register," + " status %i\n", __func__, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_frmr_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia, struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_send_wr invalidate_wr, *bad_wr; + int rc; + + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + + memset(&invalidate_wr, 0, sizeof invalidate_wr); + invalidate_wr.opcode = IB_WR_LOCAL_INV; + invalidate_wr.send_flags = 0; /* unsignaled */ + invalidate_wr.ex.invalidate_rkey = seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey; + DECR_CQCOUNT(&r_xprt->rx_ep); + + rc = ib_post_send(ia->ri_id->qp, &invalidate_wr, &bad_wr); + if (rc) + dprintk("RPC: %s: failed ib_post_send for invalidate," + " status %i\n", __func__, rc); + return rc; +} + +static int +rpcrdma_register_fmr_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; + int len, pageoff, i, rc; + + pageoff = offset_in_page(seg1->mr_offset); + seg1->mr_offset -= pageoff; /* start of page */ + seg1->mr_len += pageoff; + len = -pageoff; + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + physaddrs[i] = seg->mr_dma; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) + break; + } + rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, + physaddrs, i, seg1->mr_dma); + if (rc) { + dprintk("RPC: %s: failed ib_map_phys_fmr " + "%u@0x%llx+%i (%d)... status %i\n", __func__, + len, (unsigned long long)seg1->mr_dma, + pageoff, i, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; + seg1->mr_base = seg1->mr_dma + pageoff; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + LIST_HEAD(l); + int rc; + + list_add(&seg1->mr_chunk.rl_mw->r.fmr->list, &l); + rc = ib_unmap_fmr(&l); + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_unmap_fmr," + " status %i\n", __func__, rc); + return rc; +} + +static int +rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt) +{ + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct ib_mw_bind param; + int rc; + + *nsegs = 1; + rpcrdma_map_one(ia, seg, writing); + param.mr = ia->ri_bind_mem; + param.wr_id = 0ULL; /* no send cookie */ + param.addr = seg->mr_dma; + param.length = seg->mr_len; + param.send_flags = 0; + param.mw_access_flags = mem_priv; + + DECR_CQCOUNT(&r_xprt->rx_ep); + rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); + if (rc) { + dprintk("RPC: %s: failed ib_bind_mw " + "%u@0x%llx status %i\n", + __func__, seg->mr_len, + (unsigned long long)seg->mr_dma, rc); + rpcrdma_unmap_one(ia, seg); + } else { + seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; + seg->mr_base = param.addr; + seg->mr_nsegs = 1; + } + return rc; +} + +static int +rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia, + struct rpcrdma_xprt *r_xprt, void **r) +{ + struct ib_mw_bind param; + LIST_HEAD(l); + int rc; + + BUG_ON(seg->mr_nsegs != 1); + param.mr = ia->ri_bind_mem; + param.addr = 0ULL; /* unbind */ + param.length = 0; + param.mw_access_flags = 0; + if (*r) { + param.wr_id = (u64) (unsigned long) *r; + param.send_flags = IB_SEND_SIGNALED; + INIT_CQCOUNT(&r_xprt->rx_ep); + } else { + param.wr_id = 0ULL; + param.send_flags = 0; + DECR_CQCOUNT(&r_xprt->rx_ep); + } + rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, ¶m); + rpcrdma_unmap_one(ia, seg); + if (rc) + dprintk("RPC: %s: failed ib_(un)bind_mw," + " status %i\n", __func__, rc); + else + *r = NULL; /* will upcall on completion */ + return rc; +} + +static int +rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg, + int *nsegs, int writing, struct rpcrdma_ia *ia) +{ + int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : + IB_ACCESS_REMOTE_READ); + struct rpcrdma_mr_seg *seg1 = seg; + struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; + int len, i, rc = 0; + + if (*nsegs > RPCRDMA_MAX_DATA_SEGS) + *nsegs = RPCRDMA_MAX_DATA_SEGS; + for (len = 0, i = 0; i < *nsegs;) { + rpcrdma_map_one(ia, seg, writing); + ipb[i].addr = seg->mr_dma; + ipb[i].size = seg->mr_len; + len += seg->mr_len; + ++seg; + ++i; + /* Check for holes */ + if ((i < *nsegs && offset_in_page(seg->mr_offset)) || + offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) + break; + } + seg1->mr_base = seg1->mr_dma; + seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, + ipb, i, mem_priv, &seg1->mr_base); + if (IS_ERR(seg1->mr_chunk.rl_mr)) { + rc = PTR_ERR(seg1->mr_chunk.rl_mr); + dprintk("RPC: %s: failed ib_reg_phys_mr " + "%u@0x%llx (%d)... status %i\n", + __func__, len, + (unsigned long long)seg1->mr_dma, i, rc); + while (i--) + rpcrdma_unmap_one(ia, --seg); + } else { + seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; + seg1->mr_nsegs = i; + seg1->mr_len = len; + } + *nsegs = i; + return rc; +} + +static int +rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg, + struct rpcrdma_ia *ia) +{ + struct rpcrdma_mr_seg *seg1 = seg; + int rc; + + rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); + seg1->mr_chunk.rl_mr = NULL; + while (seg1->mr_nsegs--) + rpcrdma_unmap_one(ia, seg++); + if (rc) + dprintk("RPC: %s: failed ib_dereg_mr," + " status %i\n", __func__, rc); + return rc; +} + int rpcrdma_register_external(struct rpcrdma_mr_seg *seg, int nsegs, int writing, struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE : - IB_ACCESS_REMOTE_READ); - struct rpcrdma_mr_seg *seg1 = seg; - int i; int rc = 0; switch (ia->ri_memreg_strategy) { @@ -1352,114 +1750,25 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg, break; #endif - /* Registration using fast memory registration */ + /* Registration using frmr registration */ + case RPCRDMA_FRMR: + rc = rpcrdma_register_frmr_external(seg, &nsegs, writing, ia, r_xprt); + break; + + /* Registration using fmr memory registration */ case RPCRDMA_MTHCAFMR: - { - u64 physaddrs[RPCRDMA_MAX_DATA_SEGS]; - int len, pageoff = offset_in_page(seg->mr_offset); - seg1->mr_offset -= pageoff; /* start of page */ - seg1->mr_len += pageoff; - len = -pageoff; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - physaddrs[i] = seg->mr_dma; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - rc = ib_map_phys_fmr(seg1->mr_chunk.rl_mw->r.fmr, - physaddrs, nsegs, seg1->mr_dma); - if (rc) { - dprintk("RPC: %s: failed ib_map_phys_fmr " - "%u@0x%llx+%i (%d)... status %i\n", __func__, - len, (unsigned long long)seg1->mr_dma, - pageoff, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mw->r.fmr->rkey; - seg1->mr_base = seg1->mr_dma + pageoff; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } + rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia); break; /* Registration using memory windows */ case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - rpcrdma_map_one(ia, seg, writing); - param.mr = ia->ri_bind_mem; - param.wr_id = 0ULL; /* no send cookie */ - param.addr = seg->mr_dma; - param.length = seg->mr_len; - param.send_flags = 0; - param.mw_access_flags = mem_priv; - - DECR_CQCOUNT(&r_xprt->rx_ep); - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - if (rc) { - dprintk("RPC: %s: failed ib_bind_mw " - "%u@0x%llx status %i\n", - __func__, seg->mr_len, - (unsigned long long)seg->mr_dma, rc); - rpcrdma_unmap_one(ia, seg); - } else { - seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey; - seg->mr_base = param.addr; - seg->mr_nsegs = 1; - nsegs = 1; - } - } + rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt); break; /* Default registration each time */ default: - { - struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS]; - int len = 0; - if (nsegs > RPCRDMA_MAX_DATA_SEGS) - nsegs = RPCRDMA_MAX_DATA_SEGS; - for (i = 0; i < nsegs;) { - rpcrdma_map_one(ia, seg, writing); - ipb[i].addr = seg->mr_dma; - ipb[i].size = seg->mr_len; - len += seg->mr_len; - ++seg; - ++i; - /* Check for holes */ - if ((i < nsegs && offset_in_page(seg->mr_offset)) || - offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len)) - break; - } - nsegs = i; - seg1->mr_base = seg1->mr_dma; - seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd, - ipb, nsegs, mem_priv, &seg1->mr_base); - if (IS_ERR(seg1->mr_chunk.rl_mr)) { - rc = PTR_ERR(seg1->mr_chunk.rl_mr); - dprintk("RPC: %s: failed ib_reg_phys_mr " - "%u@0x%llx (%d)... status %i\n", - __func__, len, - (unsigned long long)seg1->mr_dma, nsegs, rc); - while (nsegs--) - rpcrdma_unmap_one(ia, --seg); - } else { - seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey; - seg1->mr_nsegs = nsegs; - seg1->mr_len = len; - } - } + rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia); break; } if (rc) @@ -1473,7 +1782,6 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, struct rpcrdma_xprt *r_xprt, void *r) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_mr_seg *seg1 = seg; int nsegs = seg->mr_nsegs, rc; switch (ia->ri_memreg_strategy) { @@ -1486,56 +1794,21 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg, break; #endif + case RPCRDMA_FRMR: + rc = rpcrdma_deregister_frmr_external(seg, ia, r_xprt); + break; + case RPCRDMA_MTHCAFMR: - { - LIST_HEAD(l); - list_add(&seg->mr_chunk.rl_mw->r.fmr->list, &l); - rc = ib_unmap_fmr(&l); - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - } - if (rc) - dprintk("RPC: %s: failed ib_unmap_fmr," - " status %i\n", __func__, rc); + rc = rpcrdma_deregister_fmr_external(seg, ia); break; case RPCRDMA_MEMWINDOWS_ASYNC: case RPCRDMA_MEMWINDOWS: - { - struct ib_mw_bind param; - BUG_ON(nsegs != 1); - param.mr = ia->ri_bind_mem; - param.addr = 0ULL; /* unbind */ - param.length = 0; - param.mw_access_flags = 0; - if (r) { - param.wr_id = (u64) (unsigned long) r; - param.send_flags = IB_SEND_SIGNALED; - INIT_CQCOUNT(&r_xprt->rx_ep); - } else { - param.wr_id = 0ULL; - param.send_flags = 0; - DECR_CQCOUNT(&r_xprt->rx_ep); - } - rc = ib_bind_mw(ia->ri_id->qp, - seg->mr_chunk.rl_mw->r.mw, ¶m); - rpcrdma_unmap_one(ia, seg); - } - if (rc) - dprintk("RPC: %s: failed ib_(un)bind_mw," - " status %i\n", __func__, rc); - else - r = NULL; /* will upcall on completion */ + rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r); break; default: - rc = ib_dereg_mr(seg1->mr_chunk.rl_mr); - seg1->mr_chunk.rl_mr = NULL; - while (seg1->mr_nsegs--) - rpcrdma_unmap_one(ia, seg++); - if (rc) - dprintk("RPC: %s: failed ib_dereg_mr," - " status %i\n", __func__, rc); + rc = rpcrdma_deregister_default_external(seg, ia); break; } if (r) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 2427822f8bd4..c7a7eba991bc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -51,6 +51,9 @@ #include <linux/sunrpc/rpc_rdma.h> /* RPC/RDMA protocol */ #include <linux/sunrpc/xprtrdma.h> /* xprt parameters */ +#define RDMA_RESOLVE_TIMEOUT (5000) /* 5 seconds */ +#define RDMA_CONNECT_RETRY_MAX (2) /* retries if no listener backlog */ + /* * Interface Adapter -- one per transport instance */ @@ -58,6 +61,8 @@ struct rpcrdma_ia { struct rdma_cm_id *ri_id; struct ib_pd *ri_pd; struct ib_mr *ri_bind_mem; + u32 ri_dma_lkey; + int ri_have_dma_lkey; struct completion ri_done; int ri_async_rc; enum rpcrdma_memreg ri_memreg_strategy; @@ -156,6 +161,10 @@ struct rpcrdma_mr_seg { /* chunk descriptors */ union { struct ib_mw *mw; struct ib_fmr *fmr; + struct { + struct ib_fast_reg_page_list *fr_pgl; + struct ib_mr *fr_mr; + } frmr; } r; struct list_head mw_list; } *rl_mw; @@ -175,6 +184,7 @@ struct rpcrdma_req { size_t rl_size; /* actual length of buffer */ unsigned int rl_niovs; /* 0, 2 or 4 */ unsigned int rl_nchunks; /* non-zero if chunks */ + unsigned int rl_connect_cookie; /* retry detection */ struct rpcrdma_buffer *rl_buffer; /* home base for this structure */ struct rpcrdma_rep *rl_reply;/* holder for reply buffer */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS];/* chunk segments */ @@ -198,7 +208,7 @@ struct rpcrdma_buffer { atomic_t rb_credits; /* most recent server credits */ unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */ int rb_max_requests;/* client max requests */ - struct list_head rb_mws; /* optional memory windows/fmrs */ + struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */ int rb_send_index; struct rpcrdma_req **rb_send_bufs; int rb_recv_index; @@ -273,6 +283,11 @@ struct rpcrdma_xprt { #define rpcx_to_rdmax(x) container_of(x, struct rpcrdma_xprt, xprt) #define rpcx_to_rdmad(x) (rpcx_to_rdmax(x)->rx_data) +/* Setting this to 0 ensures interoperability with early servers. + * Setting this to 1 enhances certain unaligned read/write performance. + * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ +extern int xprt_rdma_pad_optimize; + /* * Interface Adapter calls - xprtrdma/verbs.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4486c59c3aca..0a50361e3d83 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -3,8 +3,8 @@ * * Client-side transport implementation for sockets. * - * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com> - * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com> + * TCP callback races fixes (C) 1998 Red Hat + * TCP send fixes (C) 1998 Red Hat * TCP NFS related read + write fixes * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie> * @@ -249,6 +249,7 @@ struct sock_xprt { void (*old_data_ready)(struct sock *, int); void (*old_state_change)(struct sock *); void (*old_write_space)(struct sock *); + void (*old_error_report)(struct sock *); }; /* @@ -698,8 +699,9 @@ static int xs_tcp_send_request(struct rpc_task *task) case -EAGAIN: xs_nospace(task); break; - case -ECONNREFUSED: case -ECONNRESET: + xs_tcp_shutdown(xprt); + case -ECONNREFUSED: case -ENOTCONN: case -EPIPE: status = -ENOTCONN; @@ -742,6 +744,22 @@ out_release: xprt_release_xprt(xprt, task); } +static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk) +{ + transport->old_data_ready = sk->sk_data_ready; + transport->old_state_change = sk->sk_state_change; + transport->old_write_space = sk->sk_write_space; + transport->old_error_report = sk->sk_error_report; +} + +static void xs_restore_old_callbacks(struct sock_xprt *transport, struct sock *sk) +{ + sk->sk_data_ready = transport->old_data_ready; + sk->sk_state_change = transport->old_state_change; + sk->sk_write_space = transport->old_write_space; + sk->sk_error_report = transport->old_error_report; +} + /** * xs_close - close a socket * @xprt: transport @@ -765,9 +783,8 @@ static void xs_close(struct rpc_xprt *xprt) transport->sock = NULL; sk->sk_user_data = NULL; - sk->sk_data_ready = transport->old_data_ready; - sk->sk_state_change = transport->old_state_change; - sk->sk_write_space = transport->old_write_space; + + xs_restore_old_callbacks(transport, sk); write_unlock_bh(&sk->sk_callback_lock); sk->sk_no_check = 0; @@ -1180,6 +1197,28 @@ static void xs_tcp_state_change(struct sock *sk) } /** + * xs_tcp_error_report - callback mainly for catching RST events + * @sk: socket + */ +static void xs_tcp_error_report(struct sock *sk) +{ + struct rpc_xprt *xprt; + + read_lock(&sk->sk_callback_lock); + if (sk->sk_err != ECONNRESET || sk->sk_state != TCP_ESTABLISHED) + goto out; + if (!(xprt = xprt_from_sock(sk))) + goto out; + dprintk("RPC: %s client %p...\n" + "RPC: error %d\n", + __func__, xprt, sk->sk_err); + + xprt_force_disconnect(xprt); +out: + read_unlock(&sk->sk_callback_lock); +} + +/** * xs_udp_write_space - callback invoked when socket buffer space * becomes available * @sk: socket whose state has changed @@ -1454,10 +1493,9 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_lock_bh(&sk->sk_callback_lock); + xs_save_old_callbacks(transport, sk); + sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_udp_data_ready; sk->sk_write_space = xs_udp_write_space; sk->sk_no_check = UDP_CSUM_NORCV; @@ -1589,13 +1627,13 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) write_lock_bh(&sk->sk_callback_lock); + xs_save_old_callbacks(transport, sk); + sk->sk_user_data = xprt; - transport->old_data_ready = sk->sk_data_ready; - transport->old_state_change = sk->sk_state_change; - transport->old_write_space = sk->sk_write_space; sk->sk_data_ready = xs_tcp_data_ready; sk->sk_state_change = xs_tcp_state_change; sk->sk_write_space = xs_tcp_write_space; + sk->sk_error_report = xs_tcp_error_report; sk->sk_allocation = GFP_ATOMIC; /* socket options */ diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 015606b54d9b..eb90f77bb0e2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1,7 +1,7 @@ /* * NET4: Implementation of BSD Unix domain sockets. * - * Authors: Alan Cox, <alan.cox@linux.org> + * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -711,28 +711,30 @@ static struct sock *unix_find_other(struct net *net, int type, unsigned hash, int *error) { struct sock *u; - struct nameidata nd; + struct path path; int err = 0; if (sunname->sun_path[0]) { - err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); + struct inode *inode; + err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path); if (err) goto fail; - err = vfs_permission(&nd, MAY_WRITE); + inode = path.dentry->d_inode; + err = inode_permission(inode, MAY_WRITE); if (err) goto put_fail; err = -ECONNREFUSED; - if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode)) + if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, nd.path.dentry->d_inode); + u = unix_find_socket_byinode(net, inode); if (!u) goto put_fail; if (u->sk_type == type) - touch_atime(nd.path.mnt, nd.path.dentry); + touch_atime(path.mnt, path.dentry); - path_put(&nd.path); + path_put(&path); err=-EPROTOTYPE; if (u->sk_type != type) { @@ -753,7 +755,7 @@ static struct sock *unix_find_other(struct net *net, return u; put_fail: - path_put(&nd.path); + path_put(&path); fail: *error=err; return NULL; @@ -1300,14 +1302,23 @@ static void unix_destruct_fds(struct sk_buff *skb) sock_wfree(skb); } -static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; + + /* + * Need to duplicate file references for the sake of garbage + * collection. Otherwise a socket in the fps might become a + * candidate for GC while the skb is not yet queued. + */ + UNIXCB(skb).fp = scm_fp_dup(scm->fp); + if (!UNIXCB(skb).fp) + return -ENOMEM; + for (i=scm->fp->count-1; i>=0; i--) unix_inflight(scm->fp->fp[i]); - UNIXCB(skb).fp = scm->fp; skb->destructor = unix_destruct_fds; - scm->fp = NULL; + return 0; } /* @@ -1366,8 +1377,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, goto out; memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) + goto out_free; + } unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1536,8 +1550,13 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, size = min_t(int, size, skb_tailroom(skb)); memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) - unix_attach_fds(siocb->scm, skb); + if (siocb->scm->fp) { + err = unix_attach_fds(siocb->scm, skb); + if (err) { + kfree_skb(skb); + goto out_err; + } + } if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) { kfree_skb(skb); @@ -2211,7 +2230,7 @@ static int unix_net_init(struct net *net) #endif error = 0; out: - return 0; + return error; } static void unix_net_exit(struct net *net) diff --git a/net/unix/garbage.c b/net/unix/garbage.c index 2a27b84f740b..6d4a9a8de5ef 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), */ struct sock *sk = unix_get_socket(*fp++); if (sk) { - hit = true; - func(unix_sk(sk)); + struct unix_sock *u = unix_sk(sk); + + /* + * Ignore non-candidates, they could + * have been added to the queues after + * starting the garbage collection + */ + if (u->gc_candidate) { + hit = true; + func(u); + } } } if (hit && hitlist != NULL) { @@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struct unix_sock *u) { atomic_long_inc(&u->inflight); /* - * If this is still a candidate, move it to the end of the - * list, so that it's checked even if it was already passed - * over + * If this still might be part of a cycle, move it to the end + * of the list, so that it's checked even if it was already + * passed over */ - if (u->gc_candidate) + if (u->gc_maybe_cycle) list_move_tail(&u->link, &gc_candidates); } @@ -267,6 +276,7 @@ void unix_gc(void) struct unix_sock *next; struct sk_buff_head hitlist; struct list_head cursor; + LIST_HEAD(not_cycle_list); spin_lock(&unix_gc_lock); @@ -282,10 +292,14 @@ void unix_gc(void) * * Holding unix_gc_lock will protect these candidates from * being detached, and hence from gaining an external - * reference. This also means, that since there are no - * possible receivers, the receive queues of these sockets are - * static during the GC, even though the dequeue is done - * before the detach without atomicity guarantees. + * reference. Since there are no possible receivers, all + * buffers currently on the candidates' queues stay there + * during the garbage collection. + * + * We also know that no new candidate can be added onto the + * receive queues. Other, non candidate sockets _can_ be + * added to queue, so we must make sure only to touch + * candidates. */ list_for_each_entry_safe(u, next, &gc_inflight_list, link) { long total_refs; @@ -299,6 +313,7 @@ void unix_gc(void) if (total_refs == inflight_refs) { list_move_tail(&u->link, &gc_candidates); u->gc_candidate = 1; + u->gc_maybe_cycle = 1; } } @@ -325,14 +340,24 @@ void unix_gc(void) list_move(&cursor, &u->link); if (atomic_long_read(&u->inflight) > 0) { - list_move_tail(&u->link, &gc_inflight_list); - u->gc_candidate = 0; + list_move_tail(&u->link, ¬_cycle_list); + u->gc_maybe_cycle = 0; scan_children(&u->sk, inc_inflight_move_tail, NULL); } } list_del(&cursor); /* + * not_cycle_list contains those sockets which do not make up a + * cycle. Restore these to the inflight list. + */ + while (!list_empty(¬_cycle_list)) { + u = list_entry(not_cycle_list.next, struct unix_sock, link); + u->gc_candidate = 0; + list_move_tail(&u->link, &gc_inflight_list); + } + + /* * Now gc_candidates contains only garbage. Restore original * inflight counters for these as well, and remove the skbuffs * which are creating the cycle(s). diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 833b024f8f66..646c7121dbc0 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -14,6 +14,37 @@ config NL80211 If unsure, say Y. +config WIRELESS_OLD_REGULATORY + bool "Old wireless static regulatory definitions" + default y + ---help--- + This option enables the old static regulatory information + and uses it within the new framework. This is available + temporarily as an option to help prevent immediate issues + due to the switch to the new regulatory framework which + does require a new userspace application which has the + database of regulatory information (CRDA) and another for + setting regulatory domains (iw). + + For more information see: + + http://wireless.kernel.org/en/developers/Regulatory/CRDA + http://wireless.kernel.org/en/users/Documentation/iw + + It is important to note though that if you *do* have CRDA present + and if this option is enabled CRDA *will* be called to update the + regulatory domain (for US and JP only). Support for letting the user + set the regulatory domain through iw is also supported. This option + mainly exists to leave around for a kernel release some old static + regulatory domains that were defined and to keep around the old + ieee80211_regdom module parameter. This is being phased out and you + should stop using them ASAP. + + Say Y unless you have installed a new userspace application. + Also say Y if have one currently depending on the ieee80211_regdom + module parameter and cannot port it to use the new userspace + interfaces. + config WIRELESS_EXT bool "Wireless extensions" default n diff --git a/net/wireless/core.c b/net/wireless/core.c index f1da0b93bc56..5031db7b275b 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1,7 +1,7 @@ /* * This is the linux wireless configuration interface. * - * Copyright 2006, 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2006-2008 Johannes Berg <johannes@sipsolutions.net> */ #include <linux/if.h> @@ -19,6 +19,7 @@ #include "nl80211.h" #include "core.h" #include "sysfs.h" +#include "reg.h" /* name for sysfs, %d is appended */ #define PHY_NAME "phy" @@ -32,7 +33,6 @@ MODULE_DESCRIPTION("wireless configuration support"); * often because we need to do it for each command */ LIST_HEAD(cfg80211_drv_list); DEFINE_MUTEX(cfg80211_drv_mutex); -static int wiphy_counter; /* for debugfs */ static struct dentry *ieee80211_debugfs_dir; @@ -184,7 +184,8 @@ int cfg80211_dev_rename(struct cfg80211_registered_device *rdev, if (result) goto out_unlock; - if (!debugfs_rename(rdev->wiphy.debugfsdir->d_parent, + if (rdev->wiphy.debugfsdir && + !debugfs_rename(rdev->wiphy.debugfsdir->d_parent, rdev->wiphy.debugfsdir, rdev->wiphy.debugfsdir->d_parent, newname)) @@ -204,6 +205,8 @@ out_unlock: struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) { + static int wiphy_counter; + struct cfg80211_registered_device *drv; int alloc_size; @@ -220,21 +223,18 @@ struct wiphy *wiphy_new(struct cfg80211_ops *ops, int sizeof_priv) mutex_lock(&cfg80211_drv_mutex); - drv->idx = wiphy_counter; - - /* now increase counter for the next device unless - * it has wrapped previously */ - if (wiphy_counter >= 0) - wiphy_counter++; - - mutex_unlock(&cfg80211_drv_mutex); + drv->idx = wiphy_counter++; if (unlikely(drv->idx < 0)) { + wiphy_counter--; + mutex_unlock(&cfg80211_drv_mutex); /* ugh, wrapped! */ kfree(drv); return NULL; } + mutex_unlock(&cfg80211_drv_mutex); + /* give it a proper name */ snprintf(drv->wiphy.dev.bus_id, BUS_ID_SIZE, PHY_NAME "%d", drv->idx); @@ -259,6 +259,13 @@ int wiphy_register(struct wiphy *wiphy) struct ieee80211_supported_band *sband; bool have_band = false; int i; + u16 ifmodes = wiphy->interface_modes; + + /* sanity check ifmodes */ + WARN_ON(!ifmodes); + ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + if (WARN_ON(ifmodes != wiphy->interface_modes)) + wiphy->interface_modes = ifmodes; /* sanity check supported bands/channels */ for (band = 0; band < IEEE80211_NUM_BANDS; band++) { @@ -295,7 +302,9 @@ int wiphy_register(struct wiphy *wiphy) ieee80211_set_bitrate_flags(wiphy); /* set up regulatory info */ - wiphy_update_regulatory(wiphy); + mutex_lock(&cfg80211_reg_mutex); + wiphy_update_regulatory(wiphy, REGDOM_SET_BY_CORE); + mutex_unlock(&cfg80211_reg_mutex); mutex_lock(&cfg80211_drv_mutex); @@ -309,6 +318,8 @@ int wiphy_register(struct wiphy *wiphy) drv->wiphy.debugfsdir = debugfs_create_dir(wiphy_name(&drv->wiphy), ieee80211_debugfs_dir); + if (IS_ERR(drv->wiphy.debugfsdir)) + drv->wiphy.debugfsdir = NULL; res = 0; out_unlock: @@ -373,6 +384,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, rdev = wiphy_to_dev(dev->ieee80211_ptr->wiphy); + WARN_ON(dev->ieee80211_ptr->iftype == NL80211_IFTYPE_UNSPECIFIED); + switch (state) { case NETDEV_REGISTER: mutex_lock(&rdev->devlist_mtx); @@ -404,7 +417,9 @@ static struct notifier_block cfg80211_netdev_notifier = { static int cfg80211_init(void) { - int err = wiphy_sysfs_init(); + int err; + + err = wiphy_sysfs_init(); if (err) goto out_fail_sysfs; @@ -418,8 +433,14 @@ static int cfg80211_init(void) ieee80211_debugfs_dir = debugfs_create_dir("ieee80211", NULL); + err = regulatory_init(); + if (err) + goto out_fail_reg; + return 0; +out_fail_reg: + debugfs_remove(ieee80211_debugfs_dir); out_fail_nl80211: unregister_netdevice_notifier(&cfg80211_netdev_notifier); out_fail_notifier: @@ -427,6 +448,7 @@ out_fail_notifier: out_fail_sysfs: return err; } + subsys_initcall(cfg80211_init); static void cfg80211_exit(void) @@ -435,5 +457,6 @@ static void cfg80211_exit(void) nl80211_exit(); unregister_netdevice_notifier(&cfg80211_netdev_notifier); wiphy_sysfs_exit(); + regulatory_exit(); } module_exit(cfg80211_exit); diff --git a/net/wireless/core.h b/net/wireless/core.h index 7a02c356d63d..771cc5cc7658 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -79,6 +79,6 @@ extern int cfg80211_dev_rename(struct cfg80211_registered_device *drv, char *newname); void ieee80211_set_bitrate_flags(struct wiphy *wiphy); -void wiphy_update_regulatory(struct wiphy *wiphy); +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby); #endif /* __NET_WIRELESS_CORE_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 59eb2cf42e5f..572793c8c7ab 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -18,6 +18,7 @@ #include <net/cfg80211.h> #include "core.h" #include "nl80211.h" +#include "reg.h" /* the netlink family */ static struct genl_family nl80211_fam = { @@ -87,6 +88,16 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { [NL80211_ATTR_MESH_ID] = { .type = NLA_BINARY, .len = IEEE80211_MAX_MESH_ID_LEN }, [NL80211_ATTR_MPATH_NEXT_HOP] = { .type = NLA_U32 }, + + [NL80211_ATTR_REG_ALPHA2] = { .type = NLA_STRING, .len = 2 }, + [NL80211_ATTR_REG_RULES] = { .type = NLA_NESTED }, + + [NL80211_ATTR_BSS_CTS_PROT] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_PREAMBLE] = { .type = NLA_U8 }, + [NL80211_ATTR_BSS_SHORT_SLOT_TIME] = { .type = NLA_U8 }, + + [NL80211_ATTR_HT_CAPABILITY] = { .type = NLA_BINARY, + .len = NL80211_HT_CAPABILITY_LEN }, }; /* message building helper */ @@ -106,10 +117,12 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct nlattr *nl_bands, *nl_band; struct nlattr *nl_freqs, *nl_freq; struct nlattr *nl_rates, *nl_rate; + struct nlattr *nl_modes; enum ieee80211_band band; struct ieee80211_channel *chan; struct ieee80211_rate *rate; int i; + u16 ifmodes = dev->wiphy.interface_modes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -118,6 +131,20 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, dev->idx); NLA_PUT_STRING(msg, NL80211_ATTR_WIPHY_NAME, wiphy_name(&dev->wiphy)); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); + if (!nl_modes) + goto nla_put_failure; + + i = 0; + while (ifmodes) { + if (ifmodes & 1) + NLA_PUT_FLAG(msg, i); + ifmodes >>= 1; + i++; + } + + nla_nest_end(msg, nl_modes); + nl_bands = nla_nest_start(msg, NL80211_ATTR_WIPHY_BANDS); if (!nl_bands) goto nla_put_failure; @@ -272,7 +299,7 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex); NLA_PUT_STRING(msg, NL80211_ATTR_IFNAME, dev->name); - /* TODO: interface type */ + NLA_PUT_U32(msg, NL80211_ATTR_IFTYPE, dev->ieee80211_ptr->iftype); return genlmsg_end(msg, hdr); nla_put_failure: @@ -391,40 +418,56 @@ static int nl80211_set_interface(struct sk_buff *skb, struct genl_info *info) int err, ifindex; enum nl80211_iftype type; struct net_device *dev; - u32 flags; + u32 _flags, *flags = NULL; memset(¶ms, 0, sizeof(params)); - if (info->attrs[NL80211_ATTR_IFTYPE]) { - type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); - if (type > NL80211_IFTYPE_MAX) - return -EINVAL; - } else - return -EINVAL; - err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); if (err) return err; ifindex = dev->ifindex; + type = dev->ieee80211_ptr->iftype; dev_put(dev); - if (!drv->ops->change_virtual_intf) { + err = -EINVAL; + if (info->attrs[NL80211_ATTR_IFTYPE]) { + type = nla_get_u32(info->attrs[NL80211_ATTR_IFTYPE]); + if (type > NL80211_IFTYPE_MAX) + goto unlock; + } + + if (!drv->ops->change_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } - if (type == NL80211_IFTYPE_MESH_POINT && - info->attrs[NL80211_ATTR_MESH_ID]) { + if (info->attrs[NL80211_ATTR_MESH_ID]) { + if (type != NL80211_IFTYPE_MESH_POINT) { + err = -EINVAL; + goto unlock; + } params.mesh_id = nla_data(info->attrs[NL80211_ATTR_MESH_ID]); params.mesh_id_len = nla_len(info->attrs[NL80211_ATTR_MESH_ID]); } + if (info->attrs[NL80211_ATTR_MNTR_FLAGS]) { + if (type != NL80211_IFTYPE_MONITOR) { + err = -EINVAL; + goto unlock; + } + err = parse_monitor_flags(info->attrs[NL80211_ATTR_MNTR_FLAGS], + &_flags); + if (!err) + flags = &_flags; + } rtnl_lock(); - err = parse_monitor_flags(type == NL80211_IFTYPE_MONITOR ? - info->attrs[NL80211_ATTR_MNTR_FLAGS] : NULL, - &flags); err = drv->ops->change_virtual_intf(&drv->wiphy, ifindex, - type, err ? NULL : &flags, ¶ms); + type, flags, ¶ms); + + dev = __dev_get_by_index(&init_net, ifindex); + WARN_ON(!dev || (!err && dev->ieee80211_ptr->iftype != type)); + rtnl_unlock(); unlock: @@ -455,7 +498,8 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(drv)) return PTR_ERR(drv); - if (!drv->ops->add_virtual_intf) { + if (!drv->ops->add_virtual_intf || + !(drv->wiphy.interface_modes & (1 << type))) { err = -EOPNOTSUPP; goto unlock; } @@ -1125,6 +1169,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) return -EINVAL; @@ -1188,6 +1236,9 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.listen_interval = nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); + if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) + params.ht_capa = + nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); if (parse_station_flags(info->attrs[NL80211_ATTR_STA_FLAGS], ¶ms.station_flags)) @@ -1525,6 +1576,183 @@ static int nl80211_del_mpath(struct sk_buff *skb, struct genl_info *info) return err; } +static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *drv; + int err; + struct net_device *dev; + struct bss_parameters params; + + memset(¶ms, 0, sizeof(params)); + /* default to not changing parameters */ + params.use_cts_prot = -1; + params.use_short_preamble = -1; + params.use_short_slot_time = -1; + + if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) + params.use_cts_prot = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_CTS_PROT]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]) + params.use_short_preamble = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_PREAMBLE]); + if (info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]) + params.use_short_slot_time = + nla_get_u8(info->attrs[NL80211_ATTR_BSS_SHORT_SLOT_TIME]); + + err = get_drv_dev_by_info_ifindex(info->attrs, &drv, &dev); + if (err) + return err; + + if (!drv->ops->change_bss) { + err = -EOPNOTSUPP; + goto out; + } + + rtnl_lock(); + err = drv->ops->change_bss(&drv->wiphy, dev, ¶ms); + rtnl_unlock(); + + out: + cfg80211_put_dev(drv); + dev_put(dev); + return err; +} + +static const struct nla_policy + reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { + [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, + [NL80211_ATTR_FREQ_RANGE_MAX_BW] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN] = { .type = NLA_U32 }, + [NL80211_ATTR_POWER_RULE_MAX_EIRP] = { .type = NLA_U32 }, +}; + +static int parse_reg_rule(struct nlattr *tb[], + struct ieee80211_reg_rule *reg_rule) +{ + struct ieee80211_freq_range *freq_range = ®_rule->freq_range; + struct ieee80211_power_rule *power_rule = ®_rule->power_rule; + + if (!tb[NL80211_ATTR_REG_RULE_FLAGS]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_START]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_END]) + return -EINVAL; + if (!tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]) + return -EINVAL; + if (!tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]) + return -EINVAL; + + reg_rule->flags = nla_get_u32(tb[NL80211_ATTR_REG_RULE_FLAGS]); + + freq_range->start_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_START]); + freq_range->end_freq_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_END]); + freq_range->max_bandwidth_khz = + nla_get_u32(tb[NL80211_ATTR_FREQ_RANGE_MAX_BW]); + + power_rule->max_eirp = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_EIRP]); + + if (tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]) + power_rule->max_antenna_gain = + nla_get_u32(tb[NL80211_ATTR_POWER_RULE_MAX_ANT_GAIN]); + + return 0; +} + +static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + int r; + char *data = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + /* We ignore world regdom requests with the old regdom setup */ + if (is_world_regdom(data)) + return -EINVAL; +#endif + mutex_lock(&cfg80211_drv_mutex); + r = __regulatory_hint(NULL, REGDOM_SET_BY_USER, data, NULL); + mutex_unlock(&cfg80211_drv_mutex); + return r; +} + +static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1]; + struct nlattr *nl_reg_rule; + char *alpha2 = NULL; + int rem_reg_rules = 0, r = 0; + u32 num_rules = 0, rule_idx = 0, size_of_regd; + struct ieee80211_regdomain *rd = NULL; + + if (!info->attrs[NL80211_ATTR_REG_ALPHA2]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_REG_RULES]) + return -EINVAL; + + alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]); + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + num_rules++; + if (num_rules > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + if (!reg_is_valid_request(alpha2)) + return -EINVAL; + + size_of_regd = sizeof(struct ieee80211_regdomain) + + (num_rules * sizeof(struct ieee80211_reg_rule)); + + rd = kzalloc(size_of_regd, GFP_KERNEL); + if (!rd) + return -ENOMEM; + + rd->n_reg_rules = num_rules; + rd->alpha2[0] = alpha2[0]; + rd->alpha2[1] = alpha2[1]; + + nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES], + rem_reg_rules) { + nla_parse(tb, NL80211_REG_RULE_ATTR_MAX, + nla_data(nl_reg_rule), nla_len(nl_reg_rule), + reg_rule_policy); + r = parse_reg_rule(tb, &rd->reg_rules[rule_idx]); + if (r) + goto bad_reg; + + rule_idx++; + + if (rule_idx > NL80211_MAX_SUPP_REG_RULES) + goto bad_reg; + } + + BUG_ON(rule_idx != num_rules); + + mutex_lock(&cfg80211_drv_mutex); + r = set_regdom(rd); + mutex_unlock(&cfg80211_drv_mutex); + if (r) + goto bad_reg; + + return r; + +bad_reg: + kfree(rd); + return -EINVAL; +} + static struct genl_ops nl80211_ops[] = { { .cmd = NL80211_CMD_GET_WIPHY, @@ -1656,6 +1884,24 @@ static struct genl_ops nl80211_ops[] = { .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, + { + .cmd = NL80211_CMD_SET_BSS, + .doit = nl80211_set_bss, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_SET_REG, + .doit = nl80211_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { + .cmd = NL80211_CMD_REQ_SET_REG, + .doit = nl80211_req_set_reg, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, }; /* multicast groups */ diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 855bff4b3250..626dbb688499 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2,179 +2,871 @@ * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2008 Luis R. Rodriguez <lrodriguz@atheros.com> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -/* - * This regulatory domain control implementation is highly incomplete, it - * only exists for the purpose of not regressing mac80211. - * - * For now, drivers can restrict the set of allowed channels by either - * not registering those channels or setting the IEEE80211_CHAN_DISABLED - * flag; that flag will only be *set* by this code, never *cleared. +/** + * DOC: Wireless regulatory infrastructure * * The usual implementation is for a driver to read a device EEPROM to * determine which regulatory domain it should be operating under, then * looking up the allowable channels in a driver-local table and finally * registering those channels in the wiphy structure. * - * Alternatively, drivers that trust the regulatory domain control here - * will register a complete set of capabilities and the control code - * will restrict the set by setting the IEEE80211_CHAN_* flags. + * Another set of compliance enforcement is for drivers to use their + * own compliance limits which can be stored on the EEPROM. The host + * driver or firmware may ensure these are used. + * + * In addition to all this we provide an extra layer of regulatory + * conformance. For drivers which do not have any regulatory + * information CRDA provides the complete regulatory solution. + * For others it provides a community effort on further restrictions + * to enhance compliance. + * + * Note: When number of rules --> infinity we will not be able to + * index on alpha2 any more, instead we'll probably have to + * rely on some SHA1 checksum of the regdomain for example. + * */ #include <linux/kernel.h> +#include <linux/list.h> +#include <linux/random.h> +#include <linux/nl80211.h> +#include <linux/platform_device.h> #include <net/wireless.h> +#include <net/cfg80211.h> #include "core.h" +#include "reg.h" -static char *ieee80211_regdom = "US"; -module_param(ieee80211_regdom, charp, 0444); -MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); - -struct ieee80211_channel_range { - short start_freq; - short end_freq; - int max_power; - int max_antenna_gain; - u32 flags; +/* wiphy is set if this request's initiator is REGDOM_SET_BY_DRIVER */ +struct regulatory_request { + struct list_head list; + struct wiphy *wiphy; + int granted; + enum reg_set_by initiator; + char alpha2[2]; }; -struct ieee80211_regdomain { - const char *code; - const struct ieee80211_channel_range *ranges; - int n_ranges; +static LIST_HEAD(regulatory_requests); +DEFINE_MUTEX(cfg80211_reg_mutex); + +/* To trigger userspace events */ +static struct platform_device *reg_pdev; + +/* Keep the ordering from large to small */ +static u32 supported_bandwidths[] = { + MHZ_TO_KHZ(40), + MHZ_TO_KHZ(20), }; -#define RANGE_PWR(_start, _end, _pwr, _ag, _flags) \ - { _start, _end, _pwr, _ag, _flags } +static struct list_head regulatory_requests; +/* Central wireless core regulatory domains, we only need two, + * the current one and a world regulatory domain in case we have no + * information to give us an alpha2 */ +static const struct ieee80211_regdomain *cfg80211_regdomain; -/* - * Ideally, in the future, these definitions will be loaded from a - * userspace table via some daemon. - */ -static const struct ieee80211_channel_range ieee80211_US_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), - /* IEEE 802.11a, channel 36*/ - RANGE_PWR(5180, 5180, 23, 6, 0), - /* IEEE 802.11a, channel 40*/ - RANGE_PWR(5200, 5200, 23, 6, 0), - /* IEEE 802.11a, channel 44*/ - RANGE_PWR(5220, 5220, 23, 6, 0), - /* IEEE 802.11a, channels 48..64 */ - RANGE_PWR(5240, 5320, 23, 6, 0), - /* IEEE 802.11a, channels 149..165, outdoor */ - RANGE_PWR(5745, 5825, 30, 6, 0), +/* We keep a static world regulatory domain in case of the absence of CRDA */ +static const struct ieee80211_regdomain world_regdom = { + .n_reg_rules = 1, + .alpha2 = "00", + .reg_rules = { + REG_RULE(2412-10, 2462+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN | + NL80211_RRF_NO_IBSS), + } }; -static const struct ieee80211_channel_range ieee80211_JP_channels[] = { - /* IEEE 802.11b/g, channels 1..14 */ - RANGE_PWR(2412, 2484, 20, 6, 0), - /* IEEE 802.11a, channels 34..48 */ - RANGE_PWR(5170, 5240, 20, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channels 52..64 */ - RANGE_PWR(5260, 5320, 20, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), -}; +static const struct ieee80211_regdomain *cfg80211_world_regdom = + &world_regdom; + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY +static char *ieee80211_regdom = "US"; +module_param(ieee80211_regdom, charp, 0444); +MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code"); + +/* We assume 40 MHz bandwidth for the old regulatory work. + * We make emphasis we are using the exact same frequencies + * as before */ -static const struct ieee80211_channel_range ieee80211_EU_channels[] = { - /* IEEE 802.11b/g, channels 1..13 */ - RANGE_PWR(2412, 2472, 20, 6, 0), - /* IEEE 802.11a, channel 36*/ - RANGE_PWR(5180, 5180, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channel 40*/ - RANGE_PWR(5200, 5200, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channel 44*/ - RANGE_PWR(5220, 5220, 23, 6, IEEE80211_CHAN_PASSIVE_SCAN), - /* IEEE 802.11a, channels 48..64 */ - RANGE_PWR(5240, 5320, 23, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), - /* IEEE 802.11a, channels 100..140 */ - RANGE_PWR(5500, 5700, 30, 6, IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR), +static const struct ieee80211_regdomain us_regdom = { + .n_reg_rules = 6, + .alpha2 = "US", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..11 */ + REG_RULE(2412-10, 2462+10, 40, 6, 27, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, 0), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 23, 0), + /* IEEE 802.11a, channels 149..165, outdoor */ + REG_RULE(5745-10, 5825+10, 40, 6, 30, 0), + } }; -#define REGDOM(_code) \ - { \ - .code = __stringify(_code), \ - .ranges = ieee80211_ ##_code## _channels, \ - .n_ranges = ARRAY_SIZE(ieee80211_ ##_code## _channels), \ +static const struct ieee80211_regdomain jp_regdom = { + .n_reg_rules = 3, + .alpha2 = "JP", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..14 */ + REG_RULE(2412-10, 2484+10, 40, 6, 20, 0), + /* IEEE 802.11a, channels 34..48 */ + REG_RULE(5170-10, 5240+10, 40, 6, 20, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 52..64 */ + REG_RULE(5260-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), } +}; -static const struct ieee80211_regdomain ieee80211_regdoms[] = { - REGDOM(US), - REGDOM(JP), - REGDOM(EU), +static const struct ieee80211_regdomain eu_regdom = { + .n_reg_rules = 6, + /* This alpha2 is bogus, we leave it here just for stupid + * backward compatibility */ + .alpha2 = "EU", + .reg_rules = { + /* IEEE 802.11b/g, channels 1..13 */ + REG_RULE(2412-10, 2472+10, 40, 6, 20, 0), + /* IEEE 802.11a, channel 36 */ + REG_RULE(5180-10, 5180+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 40 */ + REG_RULE(5200-10, 5200+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channel 44 */ + REG_RULE(5220-10, 5220+10, 40, 6, 23, + NL80211_RRF_PASSIVE_SCAN), + /* IEEE 802.11a, channels 48..64 */ + REG_RULE(5240-10, 5320+10, 40, 6, 20, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + /* IEEE 802.11a, channels 100..140 */ + REG_RULE(5500-10, 5700+10, 40, 6, 30, + NL80211_RRF_NO_IBSS | + NL80211_RRF_DFS), + } }; +static const struct ieee80211_regdomain *static_regdom(char *alpha2) +{ + if (alpha2[0] == 'U' && alpha2[1] == 'S') + return &us_regdom; + if (alpha2[0] == 'J' && alpha2[1] == 'P') + return &jp_regdom; + if (alpha2[0] == 'E' && alpha2[1] == 'U') + return &eu_regdom; + /* Default, as per the old rules */ + return &us_regdom; +} + +static bool is_old_static_regdom(const struct ieee80211_regdomain *rd) +{ + if (rd == &us_regdom || rd == &jp_regdom || rd == &eu_regdom) + return true; + return false; +} +#else +static inline bool is_old_static_regdom(const struct ieee80211_regdomain *rd) +{ + return false; +} +#endif -static const struct ieee80211_regdomain *get_regdom(void) +static void reset_regdomains(void) { - static const struct ieee80211_channel_range - ieee80211_world_channels[] = { - /* IEEE 802.11b/g, channels 1..11 */ - RANGE_PWR(2412, 2462, 27, 6, 0), + /* avoid freeing static information or freeing something twice */ + if (cfg80211_regdomain == cfg80211_world_regdom) + cfg80211_regdomain = NULL; + if (cfg80211_world_regdom == &world_regdom) + cfg80211_world_regdom = NULL; + if (cfg80211_regdomain == &world_regdom) + cfg80211_regdomain = NULL; + if (is_old_static_regdom(cfg80211_regdomain)) + cfg80211_regdomain = NULL; + + kfree(cfg80211_regdomain); + kfree(cfg80211_world_regdom); + + cfg80211_world_regdom = &world_regdom; + cfg80211_regdomain = NULL; +} + +/* Dynamic world regulatory domain requested by the wireless + * core upon initialization */ +static void update_world_regdomain(const struct ieee80211_regdomain *rd) +{ + BUG_ON(list_empty(®ulatory_requests)); + + reset_regdomains(); + + cfg80211_world_regdom = rd; + cfg80211_regdomain = rd; +} + +bool is_world_regdom(const char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] == '0' && alpha2[1] == '0') + return true; + return false; +} + +static bool is_alpha2_set(const char *alpha2) +{ + if (!alpha2) + return false; + if (alpha2[0] != 0 && alpha2[1] != 0) + return true; + return false; +} + +static bool is_alpha_upper(char letter) +{ + /* ASCII A - Z */ + if (letter >= 65 && letter <= 90) + return true; + return false; +} + +static bool is_unknown_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + /* Special case where regulatory domain was built by driver + * but a specific alpha2 cannot be determined */ + if (alpha2[0] == '9' && alpha2[1] == '9') + return true; + return false; +} + +static bool is_an_alpha2(const char *alpha2) +{ + if (!alpha2) + return false; + if (is_alpha_upper(alpha2[0]) && is_alpha_upper(alpha2[1])) + return true; + return false; +} + +static bool alpha2_equal(const char *alpha2_x, const char *alpha2_y) +{ + if (!alpha2_x || !alpha2_y) + return false; + if (alpha2_x[0] == alpha2_y[0] && + alpha2_x[1] == alpha2_y[1]) + return true; + return false; +} + +static bool regdom_changed(const char *alpha2) +{ + if (!cfg80211_regdomain) + return true; + if (alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return false; + return true; +} + +/* This lets us keep regulatory code which is updated on a regulatory + * basis in userspace. */ +static int call_crda(const char *alpha2) +{ + char country_env[9 + 2] = "COUNTRY="; + char *envp[] = { + country_env, + NULL }; - static const struct ieee80211_regdomain regdom_world = REGDOM(world); - int i; - for (i = 0; i < ARRAY_SIZE(ieee80211_regdoms); i++) - if (strcmp(ieee80211_regdom, ieee80211_regdoms[i].code) == 0) - return &ieee80211_regdoms[i]; + if (!is_world_regdom((char *) alpha2)) + printk(KERN_INFO "cfg80211: Calling CRDA for country: %c%c\n", + alpha2[0], alpha2[1]); + else + printk(KERN_INFO "cfg80211: Calling CRDA to update world " + "regulatory domain\n"); + + country_env[8] = alpha2[0]; + country_env[9] = alpha2[1]; - return ®dom_world; + return kobject_uevent_env(®_pdev->dev.kobj, KOBJ_CHANGE, envp); } +/* This has the logic which determines when a new request + * should be ignored. */ +static int ignore_request(struct wiphy *wiphy, enum reg_set_by set_by, + char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *last_request = NULL; + + /* All initial requests are respected */ + if (list_empty(®ulatory_requests)) + return 0; + + last_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); -static void handle_channel(struct ieee80211_channel *chan, - const struct ieee80211_regdomain *rd) + switch (set_by) { + case REGDOM_SET_BY_INIT: + return -EINVAL; + case REGDOM_SET_BY_CORE: + /* Always respect new wireless core hints, should only + * come in for updating the world regulatory domain at init + * anyway */ + return 0; + case REGDOM_SET_BY_COUNTRY_IE: + if (last_request->initiator == set_by) { + if (last_request->wiphy != wiphy) { + /* Two cards with two APs claiming different + * different Country IE alpha2s! + * You're special!! */ + if (!alpha2_equal(last_request->alpha2, + cfg80211_regdomain->alpha2)) { + /* XXX: Deal with conflict, consider + * building a new one out of the + * intersection */ + WARN_ON(1); + return -EOPNOTSUPP; + } + return -EALREADY; + } + /* Two consecutive Country IE hints on the same wiphy */ + if (!alpha2_equal(cfg80211_regdomain->alpha2, alpha2)) + return 0; + return -EALREADY; + } + if (WARN_ON(!is_alpha2_set(alpha2) || !is_an_alpha2(alpha2)), + "Invalid Country IE regulatory hint passed " + "to the wireless core\n") + return -EINVAL; + /* We ignore Country IE hints for now, as we haven't yet + * added the dot11MultiDomainCapabilityEnabled flag + * for wiphys */ + return 1; + case REGDOM_SET_BY_DRIVER: + BUG_ON(!wiphy); + if (last_request->initiator == set_by) { + /* Two separate drivers hinting different things, + * this is possible if you have two devices present + * on a system with different EEPROM regulatory + * readings. XXX: Do intersection, we support only + * the first regulatory hint for now */ + if (last_request->wiphy != wiphy) + return -EALREADY; + if (rd) + return -EALREADY; + /* Driver should not be trying to hint different + * regulatory domains! */ + BUG_ON(!alpha2_equal(alpha2, + cfg80211_regdomain->alpha2)); + return -EALREADY; + } + if (last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* XXX: Handle intersection, and add the + * dot11MultiDomainCapabilityEnabled flag to wiphy. For now + * we assume the driver has this set to false, following the + * 802.11d dot11MultiDomainCapabilityEnabled documentation */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return 0; + return 0; + case REGDOM_SET_BY_USER: + if (last_request->initiator == set_by || + last_request->initiator == REGDOM_SET_BY_CORE) + return 0; + /* Drivers can use their wiphy's reg_notifier() + * to override any information */ + if (last_request->initiator == REGDOM_SET_BY_DRIVER) + return 0; + /* XXX: Handle intersection */ + if (last_request->initiator == REGDOM_SET_BY_COUNTRY_IE) + return -EOPNOTSUPP; + return 0; + default: + return -EINVAL; + } +} + +static bool __reg_is_valid_request(const char *alpha2, + struct regulatory_request **request) +{ + struct regulatory_request *req; + if (list_empty(®ulatory_requests)) + return false; + list_for_each_entry(req, ®ulatory_requests, list) { + if (alpha2_equal(req->alpha2, alpha2)) { + *request = req; + return true; + } + } + return false; +} + +/* Used by nl80211 before kmalloc'ing our regulatory domain */ +bool reg_is_valid_request(const char *alpha2) +{ + struct regulatory_request *request = NULL; + return __reg_is_valid_request(alpha2, &request); +} + +/* Sanity check on a regulatory rule */ +static bool is_valid_reg_rule(const struct ieee80211_reg_rule *rule) +{ + const struct ieee80211_freq_range *freq_range = &rule->freq_range; + u32 freq_diff; + + if (freq_range->start_freq_khz == 0 || freq_range->end_freq_khz == 0) + return false; + + if (freq_range->start_freq_khz > freq_range->end_freq_khz) + return false; + + freq_diff = freq_range->end_freq_khz - freq_range->start_freq_khz; + + if (freq_range->max_bandwidth_khz > freq_diff) + return false; + + return true; +} + +static bool is_valid_rd(const struct ieee80211_regdomain *rd) +{ + const struct ieee80211_reg_rule *reg_rule = NULL; + unsigned int i; + + if (!rd->n_reg_rules) + return false; + + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + if (!is_valid_reg_rule(reg_rule)) + return false; + } + + return true; +} + +/* Returns value in KHz */ +static u32 freq_max_bandwidth(const struct ieee80211_freq_range *freq_range, + u32 freq) +{ + unsigned int i; + for (i = 0; i < ARRAY_SIZE(supported_bandwidths); i++) { + u32 start_freq_khz = freq - supported_bandwidths[i]/2; + u32 end_freq_khz = freq + supported_bandwidths[i]/2; + if (start_freq_khz >= freq_range->start_freq_khz && + end_freq_khz <= freq_range->end_freq_khz) + return supported_bandwidths[i]; + } + return 0; +} + +/* XXX: add support for the rest of enum nl80211_reg_rule_flags, we may + * want to just have the channel structure use these */ +static u32 map_regdom_flags(u32 rd_flags) +{ + u32 channel_flags = 0; + if (rd_flags & NL80211_RRF_PASSIVE_SCAN) + channel_flags |= IEEE80211_CHAN_PASSIVE_SCAN; + if (rd_flags & NL80211_RRF_NO_IBSS) + channel_flags |= IEEE80211_CHAN_NO_IBSS; + if (rd_flags & NL80211_RRF_DFS) + channel_flags |= IEEE80211_CHAN_RADAR; + return channel_flags; +} + +/** + * freq_reg_info - get regulatory information for the given frequency + * @center_freq: Frequency in KHz for which we want regulatory information for + * @bandwidth: the bandwidth requirement you have in KHz, if you do not have one + * you can set this to 0. If this frequency is allowed we then set + * this value to the maximum allowed bandwidth. + * @reg_rule: the regulatory rule which we have for this frequency + * + * Use this function to get the regulatory rule for a specific frequency. + */ +static int freq_reg_info(u32 center_freq, u32 *bandwidth, + const struct ieee80211_reg_rule **reg_rule) { int i; - u32 flags = chan->orig_flags; - const struct ieee80211_channel_range *rg = NULL; + u32 max_bandwidth = 0; - for (i = 0; i < rd->n_ranges; i++) { - if (rd->ranges[i].start_freq <= chan->center_freq && - chan->center_freq <= rd->ranges[i].end_freq) { - rg = &rd->ranges[i]; + if (!cfg80211_regdomain) + return -EINVAL; + + for (i = 0; i < cfg80211_regdomain->n_reg_rules; i++) { + const struct ieee80211_reg_rule *rr; + const struct ieee80211_freq_range *fr = NULL; + const struct ieee80211_power_rule *pr = NULL; + + rr = &cfg80211_regdomain->reg_rules[i]; + fr = &rr->freq_range; + pr = &rr->power_rule; + max_bandwidth = freq_max_bandwidth(fr, center_freq); + if (max_bandwidth && *bandwidth <= max_bandwidth) { + *reg_rule = rr; + *bandwidth = max_bandwidth; break; } } - if (!rg) { - /* not found */ + return !max_bandwidth; +} + +static void handle_channel(struct ieee80211_channel *chan) +{ + int r; + u32 flags = chan->orig_flags; + u32 max_bandwidth = 0; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + + r = freq_reg_info(MHZ_TO_KHZ(chan->center_freq), + &max_bandwidth, ®_rule); + + if (r) { flags |= IEEE80211_CHAN_DISABLED; chan->flags = flags; return; } - chan->flags = flags; + power_rule = ®_rule->power_rule; + + chan->flags = flags | map_regdom_flags(reg_rule->flags); chan->max_antenna_gain = min(chan->orig_mag, - rg->max_antenna_gain); + (int) MBI_TO_DBI(power_rule->max_antenna_gain)); + chan->max_bandwidth = KHZ_TO_MHZ(max_bandwidth); if (chan->orig_mpwr) - chan->max_power = min(chan->orig_mpwr, rg->max_power); + chan->max_power = min(chan->orig_mpwr, + (int) MBM_TO_DBM(power_rule->max_eirp)); else - chan->max_power = rg->max_power; + chan->max_power = (int) MBM_TO_DBM(power_rule->max_eirp); } -static void handle_band(struct ieee80211_supported_band *sband, - const struct ieee80211_regdomain *rd) +static void handle_band(struct ieee80211_supported_band *sband) { int i; for (i = 0; i < sband->n_channels; i++) - handle_channel(&sband->channels[i], rd); + handle_channel(&sband->channels[i]); } -void wiphy_update_regulatory(struct wiphy *wiphy) +static void update_all_wiphy_regulatory(enum reg_set_by setby) { - enum ieee80211_band band; - const struct ieee80211_regdomain *rd = get_regdom(); + struct cfg80211_registered_device *drv; + + list_for_each_entry(drv, &cfg80211_drv_list, list) + wiphy_update_regulatory(&drv->wiphy, setby); +} - for (band = 0; band < IEEE80211_NUM_BANDS; band++) +void wiphy_update_regulatory(struct wiphy *wiphy, enum reg_set_by setby) +{ + enum ieee80211_band band; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) - handle_band(wiphy->bands[band], rd); + handle_band(wiphy->bands[band]); + if (wiphy->reg_notifier) + wiphy->reg_notifier(wiphy, setby); + } +} + +/* Caller must hold &cfg80211_drv_mutex */ +int __regulatory_hint(struct wiphy *wiphy, enum reg_set_by set_by, + const char *alpha2, struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request; + char *rd_alpha2; + int r = 0; + + r = ignore_request(wiphy, set_by, (char *) alpha2, rd); + if (r) + return r; + + if (rd) + rd_alpha2 = rd->alpha2; + else + rd_alpha2 = (char *) alpha2; + + switch (set_by) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_COUNTRY_IE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + request = kzalloc(sizeof(struct regulatory_request), + GFP_KERNEL); + if (!request) + return -ENOMEM; + + request->alpha2[0] = rd_alpha2[0]; + request->alpha2[1] = rd_alpha2[1]; + request->initiator = set_by; + request->wiphy = wiphy; + + list_add_tail(&request->list, ®ulatory_requests); + if (rd) + break; + r = call_crda(alpha2); +#ifndef CONFIG_WIRELESS_OLD_REGULATORY + if (r) + printk(KERN_ERR "cfg80211: Failed calling CRDA\n"); +#endif + break; + default: + r = -ENOTSUPP; + break; + } + + return r; +} + +/* If rd is not NULL and if this call fails the caller must free it */ +int regulatory_hint(struct wiphy *wiphy, const char *alpha2, + struct ieee80211_regdomain *rd) +{ + int r; + BUG_ON(!rd && !alpha2); + + mutex_lock(&cfg80211_drv_mutex); + + r = __regulatory_hint(wiphy, REGDOM_SET_BY_DRIVER, alpha2, rd); + if (r || !rd) + goto unlock_and_exit; + + /* If the driver passed a regulatory domain we skipped asking + * userspace for one so we can now go ahead and set it */ + r = set_regdom(rd); + +unlock_and_exit: + mutex_unlock(&cfg80211_drv_mutex); + return r; +} +EXPORT_SYMBOL(regulatory_hint); + + +static void print_rd_rules(const struct ieee80211_regdomain *rd) +{ + unsigned int i; + const struct ieee80211_reg_rule *reg_rule = NULL; + const struct ieee80211_freq_range *freq_range = NULL; + const struct ieee80211_power_rule *power_rule = NULL; + + printk(KERN_INFO "\t(start_freq - end_freq @ bandwidth), " + "(max_antenna_gain, max_eirp)\n"); + + for (i = 0; i < rd->n_reg_rules; i++) { + reg_rule = &rd->reg_rules[i]; + freq_range = ®_rule->freq_range; + power_rule = ®_rule->power_rule; + + /* There may not be documentation for max antenna gain + * in certain regions */ + if (power_rule->max_antenna_gain) + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(%d mBi, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_antenna_gain, + power_rule->max_eirp); + else + printk(KERN_INFO "\t(%d KHz - %d KHz @ %d KHz), " + "(N/A, %d mBm)\n", + freq_range->start_freq_khz, + freq_range->end_freq_khz, + freq_range->max_bandwidth_khz, + power_rule->max_eirp); + } +} + +static void print_regdomain(const struct ieee80211_regdomain *rd) +{ + + if (is_world_regdom(rd->alpha2)) + printk(KERN_INFO "cfg80211: World regulatory " + "domain updated:\n"); + else { + if (is_unknown_alpha2(rd->alpha2)) + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to driver built-in settings " + "(unknown country)\n"); + else + printk(KERN_INFO "cfg80211: Regulatory domain " + "changed to country: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + } + print_rd_rules(rd); +} + +void print_regdomain_info(const struct ieee80211_regdomain *rd) +{ + printk(KERN_INFO "cfg80211: Regulatory domain: %c%c\n", + rd->alpha2[0], rd->alpha2[1]); + print_rd_rules(rd); +} + +static int __set_regdom(const struct ieee80211_regdomain *rd) +{ + struct regulatory_request *request = NULL; + + /* Some basic sanity checks first */ + + if (is_world_regdom(rd->alpha2)) { + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + update_world_regdomain(rd); + return 0; + } + + if (!is_alpha2_set(rd->alpha2) && !is_an_alpha2(rd->alpha2) && + !is_unknown_alpha2(rd->alpha2)) + return -EINVAL; + + if (list_empty(®ulatory_requests)) + return -EINVAL; + + /* allow overriding the static definitions if CRDA is present */ + if (!is_old_static_regdom(cfg80211_regdomain) && + !regdom_changed(rd->alpha2)) + return -EINVAL; + + /* Now lets set the regulatory domain, update all driver channels + * and finally inform them of what we have done, in case they want + * to review or adjust their own settings based on their own + * internal EEPROM data */ + + if (WARN_ON(!__reg_is_valid_request(rd->alpha2, &request))) + return -EINVAL; + + reset_regdomains(); + + /* Country IE parsing coming soon */ + switch (request->initiator) { + case REGDOM_SET_BY_CORE: + case REGDOM_SET_BY_DRIVER: + case REGDOM_SET_BY_USER: + if (!is_valid_rd(rd)) { + printk(KERN_ERR "cfg80211: Invalid " + "regulatory domain detected:\n"); + print_regdomain_info(rd); + return -EINVAL; + } + break; + case REGDOM_SET_BY_COUNTRY_IE: /* Not yet */ + WARN_ON(1); + default: + return -EOPNOTSUPP; + } + + /* Tada! */ + cfg80211_regdomain = rd; + request->granted = 1; + + return 0; +} + + +/* Use this call to set the current regulatory domain. Conflicts with + * multiple drivers can be ironed out later. Caller must've already + * kmalloc'd the rd structure. If this calls fails you should kfree() + * the passed rd. Caller must hold cfg80211_drv_mutex */ +int set_regdom(const struct ieee80211_regdomain *rd) +{ + struct regulatory_request *this_request = NULL, *prev_request = NULL; + int r; + + if (!list_empty(®ulatory_requests)) + prev_request = list_first_entry(®ulatory_requests, + struct regulatory_request, list); + + /* Note that this doesn't update the wiphys, this is done below */ + r = __set_regdom(rd); + if (r) + return r; + + BUG_ON((!__reg_is_valid_request(rd->alpha2, &this_request))); + + /* The initial standard core update of the world regulatory domain, no + * need to keep that request info around if it didn't fail. */ + if (is_world_regdom(rd->alpha2) && + this_request->initiator == REGDOM_SET_BY_CORE && + this_request->granted) { + list_del(&this_request->list); + kfree(this_request); + this_request = NULL; + } + + /* Remove old requests, we only leave behind the last one */ + if (prev_request) { + list_del(&prev_request->list); + kfree(prev_request); + prev_request = NULL; + } + + /* This would make this whole thing pointless */ + BUG_ON(rd != cfg80211_regdomain); + + /* update all wiphys now with the new established regulatory domain */ + update_all_wiphy_regulatory(this_request->initiator); + + print_regdomain(rd); + + return r; +} + +int regulatory_init(void) +{ + int err; + + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); + if (IS_ERR(reg_pdev)) + return PTR_ERR(reg_pdev); + +#ifdef CONFIG_WIRELESS_OLD_REGULATORY + cfg80211_regdomain = static_regdom(ieee80211_regdom); + + printk(KERN_INFO "cfg80211: Using static regulatory domain info\n"); + print_regdomain_info(cfg80211_regdomain); + /* The old code still requests for a new regdomain and if + * you have CRDA you get it updated, otherwise you get + * stuck with the static values. We ignore "EU" code as + * that is not a valid ISO / IEC 3166 alpha2 */ + if (ieee80211_regdom[0] != 'E' && ieee80211_regdom[1] != 'U') + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, + ieee80211_regdom, NULL); +#else + cfg80211_regdomain = cfg80211_world_regdom; + + err = __regulatory_hint(NULL, REGDOM_SET_BY_CORE, "00", NULL); + if (err) + printk(KERN_ERR "cfg80211: calling CRDA failed - " + "unable to update world regulatory domain, " + "using static definition\n"); +#endif + + return 0; +} + +void regulatory_exit(void) +{ + struct regulatory_request *req, *req_tmp; + + mutex_lock(&cfg80211_drv_mutex); + + reset_regdomains(); + + list_for_each_entry_safe(req, req_tmp, ®ulatory_requests, list) { + list_del(&req->list); + kfree(req); + } + platform_device_unregister(reg_pdev); + + mutex_unlock(&cfg80211_drv_mutex); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h new file mode 100644 index 000000000000..a33362872f3c --- /dev/null +++ b/net/wireless/reg.h @@ -0,0 +1,13 @@ +#ifndef __NET_WIRELESS_REG_H +#define __NET_WIRELESS_REG_H + +extern struct mutex cfg80211_reg_mutex; +bool is_world_regdom(const char *alpha2); +bool reg_is_valid_request(const char *alpha2); + +int regulatory_init(void); +void regulatory_exit(void); + +int set_regdom(const struct ieee80211_regdomain *rd); + +#endif /* __NET_WIRELESS_REG_H */ diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index ac25b4c0e982..dc50f1e71f76 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -27,10 +27,14 @@ static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); - if (nhead > 0 || ntail > 0) - return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); - - return 0; + if (nhead <= 0) { + if (ntail <= 0) + return 0; + nhead = 0; + } else if (ntail < 0) + ntail = 0; + + return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } static int xfrm_output_one(struct sk_buff *skb, int err) diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index b7754b1b73a4..058f04f54b90 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -34,7 +34,7 @@ #include "xfrm_hash.h" -int sysctl_xfrm_larval_drop __read_mostly; +int sysctl_xfrm_larval_drop __read_mostly = 1; #ifdef CONFIG_XFRM_STATISTICS DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly; @@ -46,7 +46,7 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -static struct list_head xfrm_policy_bytype[XFRM_POLICY_TYPE_MAX]; +static struct list_head xfrm_policy_all; unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_count); @@ -164,7 +164,7 @@ static void xfrm_policy_timer(unsigned long data) read_lock(&xp->lock); - if (xp->dead) + if (xp->walk.dead) goto out; dir = xfrm_policy_id2dir(xp->index); @@ -236,7 +236,7 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_LIST_HEAD(&policy->bytype); + INIT_LIST_HEAD(&policy->walk.all); INIT_HLIST_NODE(&policy->bydst); INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); @@ -252,17 +252,13 @@ EXPORT_SYMBOL(xfrm_policy_alloc); void xfrm_policy_destroy(struct xfrm_policy *policy) { - BUG_ON(!policy->dead); + BUG_ON(!policy->walk.dead); BUG_ON(policy->bundles); if (del_timer(&policy->timer)) BUG(); - write_lock_bh(&xfrm_policy_lock); - list_del(&policy->bytype); - write_unlock_bh(&xfrm_policy_lock); - security_xfrm_policy_free(policy->security); kfree(policy); } @@ -310,8 +306,8 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) int dead; write_lock_bh(&policy->lock); - dead = policy->dead; - policy->dead = 1; + dead = policy->walk.dead; + policy->walk.dead = 1; write_unlock_bh(&policy->lock); if (unlikely(dead)) { @@ -319,9 +315,9 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) return; } - spin_lock(&xfrm_policy_gc_lock); + spin_lock_bh(&xfrm_policy_gc_lock); hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); - spin_unlock(&xfrm_policy_gc_lock); + spin_unlock_bh(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } @@ -609,6 +605,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) { hlist_del(&delpol->bydst); hlist_del(&delpol->byidx); + list_del(&delpol->walk.all); xfrm_policy_count[dir]--; } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); @@ -617,7 +614,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add_tail(&policy->bytype, &xfrm_policy_bytype[policy->type]); + list_add(&policy->walk.all, &xfrm_policy_all); write_unlock_bh(&xfrm_policy_lock); if (delpol) @@ -684,6 +681,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -727,6 +725,7 @@ struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete, } hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; } ret = pol; @@ -840,6 +839,7 @@ int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info) continue; hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); write_unlock_bh(&xfrm_policy_lock); xfrm_audit_policy_delete(pol, 1, @@ -867,60 +867,68 @@ int xfrm_policy_walk(struct xfrm_policy_walk *walk, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *old, *pol, *last = NULL; + struct xfrm_policy *pol; + struct xfrm_policy_walk_entry *x; int error = 0; if (walk->type >= XFRM_POLICY_TYPE_MAX && walk->type != XFRM_POLICY_TYPE_ANY) return -EINVAL; - if (walk->policy == NULL && walk->count != 0) + if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - old = pol = walk->policy; - walk->policy = NULL; - read_lock_bh(&xfrm_policy_lock); - - for (; walk->cur_type < XFRM_POLICY_TYPE_MAX; walk->cur_type++) { - if (walk->type != walk->cur_type && - walk->type != XFRM_POLICY_TYPE_ANY) + write_lock_bh(&xfrm_policy_lock); + if (list_empty(&walk->walk.all)) + x = list_first_entry(&xfrm_policy_all, struct xfrm_policy_walk_entry, all); + else + x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &xfrm_policy_all, all) { + if (x->dead) continue; - - if (pol == NULL) { - pol = list_first_entry(&xfrm_policy_bytype[walk->cur_type], - struct xfrm_policy, bytype); - } - list_for_each_entry_from(pol, &xfrm_policy_bytype[walk->cur_type], bytype) { - if (pol->dead) - continue; - if (last) { - error = func(last, xfrm_policy_id2dir(last->index), - walk->count, data); - if (error) { - xfrm_pol_hold(last); - walk->policy = last; - goto out; - } - } - last = pol; - walk->count++; + pol = container_of(x, struct xfrm_policy, walk); + if (walk->type != XFRM_POLICY_TYPE_ANY && + walk->type != pol->type) + continue; + error = func(pol, xfrm_policy_id2dir(pol->index), + walk->seq, data); + if (error) { + list_move_tail(&walk->walk.all, &x->all); + goto out; } - pol = NULL; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { error = -ENOENT; goto out; } - if (last) - error = func(last, xfrm_policy_id2dir(last->index), 0, data); + list_del_init(&walk->walk.all); out: - read_unlock_bh(&xfrm_policy_lock); - if (old != NULL) - xfrm_pol_put(old); + write_unlock_bh(&xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); +void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type) +{ + INIT_LIST_HEAD(&walk->walk.all); + walk->walk.dead = 1; + walk->type = type; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_policy_walk_init); + +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +{ + if (list_empty(&walk->walk.all)) + return; + + write_lock_bh(&xfrm_policy_lock); + list_del(&walk->walk.all); + write_unlock_bh(&xfrm_policy_lock); +} +EXPORT_SYMBOL(xfrm_policy_walk_done); + /* * Find policy to apply to this flow. * @@ -1077,7 +1085,7 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) struct hlist_head *chain = policy_hash_bysel(&pol->selector, pol->family, dir); - list_add_tail(&pol->bytype, &xfrm_policy_bytype[pol->type]); + list_add(&pol->walk.all, &xfrm_policy_all); hlist_add_head(&pol->bydst, chain); hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); xfrm_policy_count[dir]++; @@ -1095,6 +1103,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, hlist_del(&pol->bydst); hlist_del(&pol->byidx); + list_del(&pol->walk.all); xfrm_policy_count[dir]--; return pol; @@ -1242,6 +1251,8 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, -EINVAL : -EAGAIN); xfrm_state_put(x); } + else if (error == -ESRCH) + error = -EAGAIN; if (!tmpl->optional) goto fail; @@ -1720,7 +1731,7 @@ restart: for (pi = 0; pi < npols; pi++) { read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; + pol_dead |= pols[pi]->walk.dead; read_unlock_bh(&pols[pi]->lock); } @@ -2415,9 +2426,7 @@ static void __init xfrm_policy_init(void) panic("XFRM: failed to allocate bydst hash\n"); } - for (dir = 0; dir < XFRM_POLICY_TYPE_MAX; dir++) - INIT_LIST_HEAD(&xfrm_policy_bytype[dir]); - + INIT_LIST_HEAD(&xfrm_policy_all); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task); register_netdevice_notifier(&xfrm_dev_notifier); } @@ -2601,7 +2610,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol, int i, j, n = 0; write_lock_bh(&pol->lock); - if (unlikely(pol->dead)) { + if (unlikely(pol->walk.dead)) { /* target policy has been deleted */ write_unlock_bh(&pol->lock); return -ENOENT; @@ -2672,7 +2681,8 @@ static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate) } int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -2716,7 +2726,7 @@ int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type, } /* Stage 5 - announce */ - km_migrate(sel, dir, type, m, num_migrate); + km_migrate(sel, dir, type, m, num_migrate, k); xfrm_pol_put(pol); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0a8f09c3144c..508337f97249 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -408,11 +408,10 @@ static void xfrm_state_gc_task(struct work_struct *data) struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); wake_up(&km_waitq); @@ -514,7 +513,7 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->all); + INIT_LIST_HEAD(&x->km.all); INIT_HLIST_NODE(&x->bydst); INIT_HLIST_NODE(&x->bysrc); INIT_HLIST_NODE(&x->byspi); @@ -540,12 +539,8 @@ void __xfrm_state_destroy(struct xfrm_state *x) { WARN_ON(x->km.state != XFRM_STATE_DEAD); - spin_lock_bh(&xfrm_state_lock); - list_del(&x->all); - spin_unlock_bh(&xfrm_state_lock); - spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -558,6 +553,7 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); + list_del(&x->km.all); hlist_del(&x->bydst); hlist_del(&x->bysrc); if (x->id.spi) @@ -858,7 +854,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -927,7 +923,7 @@ static void __xfrm_state_insert(struct xfrm_state *x) x->genid = ++xfrm_state_genid; - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); @@ -1056,7 +1052,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re xfrm_state_hold(x); x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ; add_timer(&x->timer); - list_add_tail(&x->all, &xfrm_state_all); + list_add(&x->km.all, &xfrm_state_all); hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(daddr, saddr, family); hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); @@ -1553,47 +1549,62 @@ int xfrm_state_walk(struct xfrm_state_walk *walk, int (*func)(struct xfrm_state *, int, void*), void *data) { - struct xfrm_state *old, *x, *last = NULL; + struct xfrm_state *state; + struct xfrm_state_walk *x; int err = 0; - if (walk->state == NULL && walk->count != 0) + if (walk->seq != 0 && list_empty(&walk->all)) return 0; - old = x = walk->state; - walk->state = NULL; spin_lock_bh(&xfrm_state_lock); - if (x == NULL) - x = list_first_entry(&xfrm_state_all, struct xfrm_state, all); + if (list_empty(&walk->all)) + x = list_first_entry(&xfrm_state_all, struct xfrm_state_walk, all); + else + x = list_entry(&walk->all, struct xfrm_state_walk, all); list_for_each_entry_from(x, &xfrm_state_all, all) { - if (x->km.state == XFRM_STATE_DEAD) + if (x->state == XFRM_STATE_DEAD) continue; - if (!xfrm_id_proto_match(x->id.proto, walk->proto)) + state = container_of(x, struct xfrm_state, km); + if (!xfrm_id_proto_match(state->id.proto, walk->proto)) continue; - if (last) { - err = func(last, walk->count, data); - if (err) { - xfrm_state_hold(last); - walk->state = last; - goto out; - } + err = func(state, walk->seq, data); + if (err) { + list_move_tail(&walk->all, &x->all); + goto out; } - last = x; - walk->count++; + walk->seq++; } - if (walk->count == 0) { + if (walk->seq == 0) { err = -ENOENT; goto out; } - if (last) - err = func(last, 0, data); + list_del_init(&walk->all); out: spin_unlock_bh(&xfrm_state_lock); - if (old != NULL) - xfrm_state_put(old); return err; } EXPORT_SYMBOL(xfrm_state_walk); +void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto) +{ + INIT_LIST_HEAD(&walk->all); + walk->proto = proto; + walk->state = XFRM_STATE_DEAD; + walk->seq = 0; +} +EXPORT_SYMBOL(xfrm_state_walk_init); + +void xfrm_state_walk_done(struct xfrm_state_walk *walk) +{ + if (list_empty(&walk->all)) + return; + + spin_lock_bh(&xfrm_state_lock); + list_del(&walk->all); + spin_lock_bh(&xfrm_state_lock); +} +EXPORT_SYMBOL(xfrm_state_walk_done); + void xfrm_replay_notify(struct xfrm_state *x, int event) { @@ -1803,7 +1814,8 @@ EXPORT_SYMBOL(km_policy_expired); #ifdef CONFIG_XFRM_MIGRATE int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { int err = -EINVAL; int ret; @@ -1812,7 +1824,7 @@ int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { if (km->migrate) { - ret = km->migrate(sel, dir, type, m, num_migrate); + ret = km->migrate(sel, dir, type, m, num_migrate, k); if (!ret) err = ret; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 04c41504f84c..a278a6f3b991 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1102,7 +1102,7 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, return xp; error: *errp = err; - xp->dead = 1; + xp->walk.dead = 1; xfrm_policy_destroy(xp); return NULL; } @@ -1595,7 +1595,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENOENT; read_lock(&xp->lock); - if (xp->dead) { + if (xp->walk.dead) { read_unlock(&xp->lock); goto out; } @@ -1710,12 +1710,23 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, #ifdef CONFIG_XFRM_MIGRATE static int copy_from_user_migrate(struct xfrm_migrate *ma, + struct xfrm_kmaddress *k, struct nlattr **attrs, int *num) { struct nlattr *rt = attrs[XFRMA_MIGRATE]; struct xfrm_user_migrate *um; int i, num_migrate; + if (k != NULL) { + struct xfrm_user_kmaddress *uk; + + uk = nla_data(attrs[XFRMA_KMADDRESS]); + memcpy(&k->local, &uk->local, sizeof(k->local)); + memcpy(&k->remote, &uk->remote, sizeof(k->remote)); + k->family = uk->family; + k->reserved = uk->reserved; + } + um = nla_data(rt); num_migrate = nla_len(rt) / sizeof(*um); @@ -1745,6 +1756,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; + struct xfrm_kmaddress km, *kmp; u8 type; int err; int n = 0; @@ -1752,19 +1764,20 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_MIGRATE] == NULL) return -EINVAL; + kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; + err = copy_from_user_policy_type(&type, attrs); if (err) return err; - err = copy_from_user_migrate((struct xfrm_migrate *)m, - attrs, &n); + err = copy_from_user_migrate((struct xfrm_migrate *)m, kmp, attrs, &n); if (err) return err; if (!n) return 0; - xfrm_migrate(&pi->sel, pi->dir, type, m, n); + xfrm_migrate(&pi->sel, pi->dir, type, m, n, kmp); return 0; } @@ -1795,16 +1808,30 @@ static int copy_to_user_migrate(struct xfrm_migrate *m, struct sk_buff *skb) return nla_put(skb, XFRMA_MIGRATE, sizeof(um), &um); } -static inline size_t xfrm_migrate_msgsize(int num_migrate) +static int copy_to_user_kmaddress(struct xfrm_kmaddress *k, struct sk_buff *skb) +{ + struct xfrm_user_kmaddress uk; + + memset(&uk, 0, sizeof(uk)); + uk.family = k->family; + uk.reserved = k->reserved; + memcpy(&uk.local, &k->local, sizeof(uk.local)); + memcpy(&uk.remote, &k->remote, sizeof(uk.remote)); + + return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); +} + +static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) - + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) - + userpolicy_type_attrsize(); + + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) + + nla_total_size(sizeof(struct xfrm_user_migrate) * num_migrate) + + userpolicy_type_attrsize(); } static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, - int num_migrate, struct xfrm_selector *sel, - u8 dir, u8 type) + int num_migrate, struct xfrm_kmaddress *k, + struct xfrm_selector *sel, u8 dir, u8 type) { struct xfrm_migrate *mp; struct xfrm_userpolicy_id *pol_id; @@ -1821,6 +1848,9 @@ static int build_migrate(struct sk_buff *skb, struct xfrm_migrate *m, memcpy(&pol_id->sel, sel, sizeof(pol_id->sel)); pol_id->dir = dir; + if (k != NULL && (copy_to_user_kmaddress(k, skb) < 0)) + goto nlmsg_failure; + if (copy_to_user_policy_type(type, skb) < 0) goto nlmsg_failure; @@ -1836,23 +1866,25 @@ nlmsg_failure: } static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { struct sk_buff *skb; - skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate), GFP_ATOMIC); + skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, sel, dir, type) < 0) + if (build_migrate(skb, m, num_migrate, k, sel, dir, type) < 0) BUG(); return nlmsg_multicast(xfrm_nl, skb, 0, XFRMNLGRP_MIGRATE, GFP_ATOMIC); } #else static int xfrm_send_migrate(struct xfrm_selector *sel, u8 dir, u8 type, - struct xfrm_migrate *m, int num_migrate) + struct xfrm_migrate *m, int num_migrate, + struct xfrm_kmaddress *k) { return -ENOPROTOOPT; } @@ -1901,6 +1933,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_COADDR] = { .len = sizeof(xfrm_address_t) }, [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, + [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, }; static struct xfrm_link { |