diff options
author | Dmitry Torokhov <dtor_core@ameritech.net> | 2006-06-26 07:31:38 +0200 |
---|---|---|
committer | Dmitry Torokhov <dtor_core@ameritech.net> | 2006-06-26 07:31:38 +0200 |
commit | 4854c7b27f0975a2b629f35ea3996d2968eb7c4f (patch) | |
tree | 4102bdb70289764a2058aff0f907b13d7cf0e0d1 /net/core | |
parent | Input: fix accuracy of fixp-arith.h (diff) | |
parent | [PATCH] uclinux: use PER_LINUX_32BIT in binfmt_flat (diff) | |
download | linux-4854c7b27f0975a2b629f35ea3996d2968eb7c4f.tar.xz linux-4854c7b27f0975a2b629f35ea3996d2968eb7c4f.zip |
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/Makefile | 1 | ||||
-rw-r--r-- | net/core/dev.c | 294 | ||||
-rw-r--r-- | net/core/dev_mcast.c | 28 | ||||
-rw-r--r-- | net/core/ethtool.c | 38 | ||||
-rw-r--r-- | net/core/link_watch.c | 5 | ||||
-rw-r--r-- | net/core/netpoll.c | 9 | ||||
-rw-r--r-- | net/core/pktgen.c | 4 | ||||
-rw-r--r-- | net/core/skbuff.c | 188 | ||||
-rw-r--r-- | net/core/sock.c | 6 | ||||
-rw-r--r-- | net/core/user_dma.c | 131 |
10 files changed, 583 insertions, 121 deletions
diff --git a/net/core/Makefile b/net/core/Makefile index 79fe12cced27..e9bd2467d5a9 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -16,3 +16,4 @@ obj-$(CONFIG_NET_DIVERT) += dv.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o +obj-$(CONFIG_NET_DMA) += user_dma.o diff --git a/net/core/dev.c b/net/core/dev.c index 4fba549caf29..ea2469398bd5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -115,6 +115,8 @@ #include <net/iw_handler.h> #include <asm/current.h> #include <linux/audit.h> +#include <linux/dmaengine.h> +#include <linux/err.h> /* * The list of packet types we will receive (as opposed to discard) @@ -148,6 +150,12 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ +#ifdef CONFIG_NET_DMA +static struct dma_client *net_dma_client; +static unsigned int net_dma_count; +static spinlock_t net_dma_event_lock; +#endif + /* * The @dev_base list is protected by @dev_base_lock and the rtnl * semaphore. @@ -1041,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb) * taps currently in use. */ -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; @@ -1179,6 +1187,40 @@ out: return ret; } +/** + * skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @sg: whether scatter-gather is supported on the target. + * + * This function segments the given skb and returns a list of segments. + */ +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; + + BUG_ON(skb_shinfo(skb)->frag_list); + BUG_ON(skb->ip_summed != CHECKSUM_HW); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + segs = ptype->gso_segment(skb, sg); + break; + } + } + rcu_read_unlock(); + + return segs; +} + +EXPORT_SYMBOL(skb_gso_segment); + /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev) @@ -1215,75 +1257,95 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -/* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) +struct dev_gso_cb { + void (*destructor)(struct sk_buff *skb); +}; + +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) + +static void dev_gso_skb_destructor(struct sk_buff *skb) { - unsigned int size; - u8 *data; - long offset; - struct skb_shared_info *ninfo; - int headerlen = skb->data - skb->head; - int expand = (skb->tail + skb->data_len) - skb->end; + struct dev_gso_cb *cb; - if (skb_shared(skb)) - BUG(); + do { + struct sk_buff *nskb = skb->next; - if (expand <= 0) - expand = 0; + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); - size = skb->end - skb->head + expand; - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - return -ENOMEM; + cb = DEV_GSO_CB(skb); + if (cb->destructor) + cb->destructor(skb); +} + +/** + * dev_gso_segment - Perform emulated hardware segmentation on skb. + * @skb: buffer to segment + * + * This function segments the given skb and stores the list of segments + * in skb->next. + */ +static int dev_gso_segment(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct sk_buff *segs; - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) - BUG(); + segs = skb_gso_segment(skb, dev->features & NETIF_F_SG && + !illegal_highdma(dev, skb)); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); - /* Set up shinfo */ - ninfo = (struct skb_shared_info*)(data + size); - atomic_set(&ninfo->dataref, 1); - ninfo->tso_size = skb_shinfo(skb)->tso_size; - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; + skb->next = segs; + DEV_GSO_CB(skb)->destructor = skb->destructor; + skb->destructor = dev_gso_skb_destructor; - /* Offset between the two in bytes */ - offset = data - skb->head; + return 0; +} - /* Free old data. */ - skb_release_data(skb); +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + if (likely(!skb->next)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); - skb->head = data; - skb->end = data + size; + if (!netif_needs_gso(dev, skb)) + return dev->hard_start_xmit(skb, dev); + + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + } - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; + do { + struct sk_buff *nskb = skb->next; + int rc; - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; + skb->next = nskb->next; + nskb->next = NULL; + rc = dev->hard_start_xmit(nskb, dev); + if (unlikely(rc)) { + skb->next = nskb; + return rc; + } + } while (skb->next); + + skb->destructor = DEV_GSO_CB(skb)->destructor; - skb->tail += skb->data_len; - skb->data_len = 0; +out_kfree_skb: + kfree_skb(skb); return 0; } #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ - dev->xmit_lock_owner = cpu; \ + netif_tx_lock(dev); \ } \ } #define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ + netif_tx_unlock(dev); \ } \ } @@ -1319,9 +1381,13 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; + if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, @@ -1330,25 +1396,26 @@ int dev_queue_xmit(struct sk_buff *skb) */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; +gso: spin_lock_prefetch(&dev->queue_lock); /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ - local_bh_disable(); + rcu_read_lock_bh(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a @@ -1382,8 +1449,8 @@ int dev_queue_xmit(struct sk_buff *skb) /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics + Really, it is unlikely that netif_tx_lock protection is necessary + here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. @@ -1399,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - rc = 0; - if (!dev->hard_start_xmit(skb, dev)) { + if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } @@ -1422,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb) } rc = -ENETDOWN; - local_bh_enable(); + rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; out: - local_bh_enable(); + rcu_read_unlock_bh(); return rc; } @@ -1846,6 +1910,19 @@ static void net_rx_action(struct softirq_action *h) } } out: +#ifdef CONFIG_NET_DMA + /* + * There may not be any more sk_buffs coming right now, so push + * any pending DMA copies to hardware + */ + if (net_dma_client) { + struct dma_chan *chan; + rcu_read_lock(); + list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) + dma_async_memcpy_issue_pending(chan); + rcu_read_unlock(); + } +#endif local_irq_enable(); return; @@ -2785,7 +2862,7 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); + spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_CLS_ACT spin_lock_init(&dev->ingress_lock); @@ -2829,9 +2906,7 @@ int register_netdevice(struct net_device *dev) /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) { + !(dev->features & NETIF_F_ALL_CSUM)) { printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; @@ -3022,7 +3097,7 @@ static void netdev_wait_allrefs(struct net_device *dev) static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { - struct list_head list = LIST_HEAD_INIT(list); + struct list_head list; /* Need to guard against multiple cpu's getting out of order. */ mutex_lock(&net_todo_run_mutex); @@ -3037,9 +3112,9 @@ void netdev_run_todo(void) /* Snapshot list, allow later requests */ spin_lock(&net_todo_list_lock); - list_splice_init(&net_todo_list, &list); + list_replace_init(&net_todo_list, &list); spin_unlock(&net_todo_list_lock); - + while (!list_empty(&list)) { struct net_device *dev = list_entry(list.next, struct net_device, todo_list); @@ -3300,6 +3375,88 @@ static int dev_cpu_callback(struct notifier_block *nfb, } #endif /* CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_NET_DMA +/** + * net_dma_rebalance - + * This is called when the number of channels allocated to the net_dma_client + * changes. The net_dma_client tries to have one DMA channel per CPU. + */ +static void net_dma_rebalance(void) +{ + unsigned int cpu, i, n; + struct dma_chan *chan; + + lock_cpu_hotplug(); + + if (net_dma_count == 0) { + for_each_online_cpu(cpu) + rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); + unlock_cpu_hotplug(); + return; + } + + i = 0; + cpu = first_cpu(cpu_online_map); + + rcu_read_lock(); + list_for_each_entry(chan, &net_dma_client->channels, client_node) { + n = ((num_online_cpus() / net_dma_count) + + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + + while(n) { + per_cpu(softnet_data.net_dma, cpu) = chan; + cpu = next_cpu(cpu, cpu_online_map); + n--; + } + i++; + } + rcu_read_unlock(); + + unlock_cpu_hotplug(); +} + +/** + * netdev_dma_event - event callback for the net_dma_client + * @client: should always be net_dma_client + * @chan: DMA channel for the event + * @event: event type + */ +static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_event event) +{ + spin_lock(&net_dma_event_lock); + switch (event) { + case DMA_RESOURCE_ADDED: + net_dma_count++; + net_dma_rebalance(); + break; + case DMA_RESOURCE_REMOVED: + net_dma_count--; + net_dma_rebalance(); + break; + default: + break; + } + spin_unlock(&net_dma_event_lock); +} + +/** + * netdev_dma_regiser - register the networking subsystem as a DMA client + */ +static int __init netdev_dma_register(void) +{ + spin_lock_init(&net_dma_event_lock); + net_dma_client = dma_async_client_register(netdev_dma_event); + if (net_dma_client == NULL) + return -ENOMEM; + + dma_async_client_chan_request(net_dma_client, num_online_cpus()); + return 0; +} + +#else +static int __init netdev_dma_register(void) { return -ENODEV; } +#endif /* CONFIG_NET_DMA */ /* * Initialize the DEV module. At boot time this walks the device list and @@ -3353,6 +3510,8 @@ static int __init net_dev_init(void) atomic_set(&queue->backlog_dev.refcnt, 1); } + netdev_dma_register(); + dev_boot_phase = 0; open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); @@ -3371,7 +3530,6 @@ subsys_initcall(net_dev_init); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(__skb_linearize); EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index 05d60850840e..c57d887da2ef 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -62,7 +62,7 @@ * Device mc lists are changed by bh at least if IPv6 is enabled, * so that it must be bh protected. * - * We block accesses to device mc filters with dev->xmit_lock. + * We block accesses to device mc filters with netif_tx_lock. */ /* @@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev) void dev_mc_upload(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } /* @@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) int err = 0; struct dev_mc_list *dmi, **dmip; - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { /* @@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) */ __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; } } err = -ENOENT; done: - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return err; } @@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC); - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && dmi->dmi_addrlen == alen) { @@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) } if ((dmi = dmi1) == NULL) { - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return -ENOMEM; } memcpy(dmi->dmi_addr, addr, alen); @@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; done: - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); kfree(dmi1); return err; } @@ -204,7 +204,7 @@ done: void dev_mc_discard(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); while (dev->mc_list != NULL) { struct dev_mc_list *tmp = dev->mc_list; @@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *dev) } dev->mc_count = 0; - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); } #ifdef CONFIG_PROC_FS @@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct dev_mc_list *m; struct net_device *dev = v; - spin_lock_bh(&dev->xmit_lock); + netif_tx_lock_bh(dev); for (m = dev->mc_list; m; m = m->next) { int i; @@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - spin_unlock_bh(&dev->xmit_lock); + netif_tx_unlock_bh(dev); return 0; } diff --git a/net/core/ethtool.c b/net/core/ethtool.c index e6f76106a99b..27ce1683caf5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev) u32 ethtool_op_get_tx_csum(struct net_device *dev) { - return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0; + return (dev->features & NETIF_F_ALL_CSUM) != 0; } int ethtool_op_set_tx_csum(struct net_device *dev, u32 data) @@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr) return -EFAULT; if (edata.data && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) + !(dev->features & NETIF_F_ALL_CSUM)) return -EINVAL; return __ethtool_set_sg(dev, edata.data); @@ -591,7 +589,7 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr) static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) { - struct ethtool_value edata = { ETHTOOL_GTSO }; + struct ethtool_value edata = { ETHTOOL_GUFO }; if (!dev->ethtool_ops->get_ufo) return -EOPNOTSUPP; @@ -600,6 +598,7 @@ static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr) return -EFAULT; return 0; } + static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) { struct ethtool_value edata; @@ -615,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_ufo(dev, edata.data); } +static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGSO }; + + edata.data = dev->features & NETIF_F_GSO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data) + dev->features |= NETIF_F_GSO; + else + dev->features &= ~NETIF_F_GSO; + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -906,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); break; + case ETHTOOL_GGSO: + rc = ethtool_get_gso(dev, useraddr); + break; + case ETHTOOL_SGSO: + rc = ethtool_set_gso(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 646937cc2d84..0f37266411b5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -91,11 +91,10 @@ static void rfc2863_policy(struct net_device *dev) /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { - LIST_HEAD(head); - struct list_head *n, *next; + struct list_head head, *n, *next; spin_lock_irq(&lweventlist_lock); - list_splice_init(&lweventlist, &head); + list_replace_init(&lweventlist, &head); spin_unlock_irq(&lweventlist_lock); list_for_each_safe(n, next, &head) { diff --git a/net/core/netpoll.c b/net/core/netpoll.c index e8e05cebd95a..9cb781830380 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -273,24 +273,21 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb) do { npinfo->tries--; - spin_lock(&np->dev->xmit_lock); - np->dev->xmit_lock_owner = smp_processor_id(); + netif_tx_lock(np->dev); /* * network drivers do not expect to be called if the queue is * stopped. */ if (netif_queue_stopped(np->dev)) { - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + netif_tx_unlock(np->dev); netpoll_poll(np); udelay(50); continue; } status = np->dev->hard_start_xmit(skb, np->dev); - np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + netif_tx_unlock(np->dev); /* success */ if(!status) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index c23e9c06ee23..67ed14ddabd2 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2897,7 +2897,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - spin_lock_bh(&odev->xmit_lock); + netif_tx_lock_bh(odev); if (!netif_queue_stopped(odev)) { atomic_inc(&(pkt_dev->skb->users)); @@ -2942,7 +2942,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) pkt_dev->next_tx_ns = 0; } - spin_unlock_bh(&odev->xmit_lock); + netif_tx_unlock_bh(odev); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fb3770f9c094..8e5044ba3ab6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); shinfo->nr_frags = 0; - shinfo->tso_size = 0; - shinfo->tso_segs = 0; - shinfo->ufo_size = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->frag_list = NULL; @@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->frag_list = NULL; out: return skb; @@ -464,7 +465,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) n->tc_verd = CLR_TC_MUNGED(n->tc_verd); C(input_dev); #endif - + skb_copy_secmark(n, skb); #endif C(truesize); atomic_set(&n->users, 1); @@ -526,9 +527,11 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif new->tc_index = old->tc_index; #endif + skb_copy_secmark(new, old); atomic_set(&new->users, 1); - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; + skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } /** @@ -780,32 +783,46 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return NULL in out of memory cases. + * May return error in out of memory cases. The skb is freed on error. */ -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) +int skb_pad(struct sk_buff *skb, int pad) { - struct sk_buff *nskb; + int err; + int ntail; /* If the skbuff is non linear tailroom is always zero.. */ - if (skb_tailroom(skb) >= pad) { + if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); - return skb; + return 0; } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); + if (unlikely(err)) + goto free_skb; + } + + /* FIXME: The use of this function with non-linear skb's really needs + * to be audited. + */ + err = skb_linearize(skb); + if (unlikely(err)) + goto free_skb; + + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: kfree_skb(skb); - if (nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; + return err; } -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). +/* Trims skb to length len. It can change skb pointers. */ -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) +int ___pskb_trim(struct sk_buff *skb, unsigned int len) { int offset = skb_headlen(skb); int nfrags = skb_shinfo(skb)->nr_frags; @@ -815,7 +832,6 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) int end = offset + skb_shinfo(skb)->frags[i].size; if (end > len) { if (skb_cloned(skb)) { - BUG_ON(!realloc); if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return -ENOMEM; } @@ -1826,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) EXPORT_SYMBOL_GPL(skb_pull_rcsum); +/** + * skb_segment - Perform protocol segmentation on skb. + * @skb: buffer to segment + * @sg: whether scatter-gather can be used for generated segments + * + * This function performs segmentation on the given skb. It returns + * the segment at the given position. It returns NULL if there are + * no more segments to generate, or when an error is encountered. + */ +struct sk_buff *skb_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = NULL; + struct sk_buff *tail = NULL; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int doffset = skb->data - skb->mac.raw; + unsigned int offset = doffset; + unsigned int headroom; + unsigned int len; + int nfrags = skb_shinfo(skb)->nr_frags; + int err = -ENOMEM; + int i = 0; + int pos; + + __skb_push(skb, doffset); + headroom = skb_headroom(skb); + pos = skb_headlen(skb); + + do { + struct sk_buff *nskb; + skb_frag_t *frag; + int hsize, nsize; + int k; + int size; + + len = skb->len - offset; + if (len > mss) + len = mss; + + hsize = skb_headlen(skb) - offset; + if (hsize < 0) + hsize = 0; + nsize = hsize + doffset; + if (nsize > len + doffset || !sg) + nsize = len + doffset; + + nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + if (segs) + tail->next = nskb; + else + segs = nskb; + tail = nskb; + + nskb->dev = skb->dev; + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; + nskb->mac_len = skb->mac_len; + + skb_reserve(nskb, headroom); + nskb->mac.raw = nskb->data; + nskb->nh.raw = nskb->data + skb->mac_len; + nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); + memcpy(skb_put(nskb, doffset), skb->data, doffset); + + if (!sg) { + nskb->csum = skb_copy_and_csum_bits(skb, offset, + skb_put(nskb, len), + len, 0); + continue; + } + + frag = skb_shinfo(nskb)->frags; + k = 0; + + nskb->ip_summed = CHECKSUM_HW; + nskb->csum = skb->csum; + memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + *frag = skb_shinfo(skb)->frags[i]; + get_page(frag->page); + size = frag->size; + + if (pos < offset) { + frag->page_offset += offset - pos; + frag->size -= offset - pos; + } + + k++; + + if (pos + size <= offset + len) { + i++; + pos += size; + } else { + frag->size -= pos + size - (offset + len); + break; + } + + frag++; + } + + skb_shinfo(nskb)->nr_frags = k; + nskb->data_len = len - hsize; + nskb->len += nskb->data_len; + nskb->truesize += nskb->data_len; + } while ((offset += len) < skb->len); + + return segs; + +err: + while ((skb = segs)) { + segs = skb->next; + kfree(skb); + } + return ERR_PTR(err); +} + +EXPORT_SYMBOL_GPL(skb_segment); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", diff --git a/net/core/sock.c b/net/core/sock.c index ed2afdb9ea2d..5d820c376653 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -832,6 +832,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) atomic_set(&newsk->sk_omem_alloc, 0); skb_queue_head_init(&newsk->sk_receive_queue); skb_queue_head_init(&newsk->sk_write_queue); +#ifdef CONFIG_NET_DMA + skb_queue_head_init(&newsk->sk_async_wait_queue); +#endif rwlock_init(&newsk->sk_dst_lock); rwlock_init(&newsk->sk_callback_lock); @@ -1383,6 +1386,9 @@ void sock_init_data(struct socket *sock, struct sock *sk) skb_queue_head_init(&sk->sk_receive_queue); skb_queue_head_init(&sk->sk_write_queue); skb_queue_head_init(&sk->sk_error_queue); +#ifdef CONFIG_NET_DMA + skb_queue_head_init(&sk->sk_async_wait_queue); +#endif sk->sk_send_head = NULL; diff --git a/net/core/user_dma.c b/net/core/user_dma.c new file mode 100644 index 000000000000..b7c98dbcdb81 --- /dev/null +++ b/net/core/user_dma.c @@ -0,0 +1,131 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Portions based on net/core/datagram.c and copyrighted by their authors. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code allows the net stack to make use of a DMA engine for + * skb to iovec copies. + */ + +#include <linux/dmaengine.h> +#include <linux/socket.h> +#include <linux/rtnetlink.h> /* for BUG_TRAP */ +#include <net/tcp.h> + +#define NET_DMA_DEFAULT_COPYBREAK 4096 + +int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; + +/** + * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. + * @skb - buffer to copy + * @offset - offset in the buffer to start copying from + * @iovec - io vector to copy to + * @len - amount of data to copy from buffer to iovec + * @pinned_list - locked iovec buffer data + * + * Note: the iovec is modified during the copy. + */ +int dma_skb_copy_datagram_iovec(struct dma_chan *chan, + struct sk_buff *skb, int offset, struct iovec *to, + size_t len, struct dma_pinned_list *pinned_list) +{ + int start = skb_headlen(skb); + int i, copy = start - offset; + dma_cookie_t cookie = 0; + + /* Copy header. */ + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_memcpy_to_iovec(chan, to, pinned_list, + skb->data + offset, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + + /* Copy paged appendix. Hmm... why does this look so complicated? */ + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + skb_shinfo(skb)->frags[i].size; + copy = end - offset; + if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + struct page *page = frag->page; + + if (copy > len) + copy = len; + + cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page, + frag->page_offset + offset - start, copy); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + + if (skb_shinfo(skb)->frag_list) { + struct sk_buff *list = skb_shinfo(skb)->frag_list; + + for (; list; list = list->next) { + int end; + + BUG_TRAP(start <= offset + len); + + end = start + list->len; + copy = end - offset; + if (copy > 0) { + if (copy > len) + copy = len; + cookie = dma_skb_copy_datagram_iovec(chan, list, + offset - start, to, copy, + pinned_list); + if (cookie < 0) + goto fault; + len -= copy; + if (len == 0) + goto end; + offset += copy; + } + start = end; + } + } + +end: + if (!len) { + skb->dma_cookie = cookie; + return cookie; + } + +fault: + return -EFAULT; +} |