summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
authorDmitry Torokhov <dtor_core@ameritech.net>2006-06-26 07:31:38 +0200
committerDmitry Torokhov <dtor_core@ameritech.net>2006-06-26 07:31:38 +0200
commit4854c7b27f0975a2b629f35ea3996d2968eb7c4f (patch)
tree4102bdb70289764a2058aff0f907b13d7cf0e0d1 /net/core
parentInput: fix accuracy of fixp-arith.h (diff)
parent[PATCH] uclinux: use PER_LINUX_32BIT in binfmt_flat (diff)
downloadlinux-4854c7b27f0975a2b629f35ea3996d2968eb7c4f.tar.xz
linux-4854c7b27f0975a2b629f35ea3996d2968eb7c4f.zip
Merge rsync://rsync.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile1
-rw-r--r--net/core/dev.c294
-rw-r--r--net/core/dev_mcast.c28
-rw-r--r--net/core/ethtool.c38
-rw-r--r--net/core/link_watch.c5
-rw-r--r--net/core/netpoll.c9
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/skbuff.c188
-rw-r--r--net/core/sock.c6
-rw-r--r--net/core/user_dma.c131
10 files changed, 583 insertions, 121 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 79fe12cced27..e9bd2467d5a9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,3 +16,4 @@ obj-$(CONFIG_NET_DIVERT) += dv.o
obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_WIRELESS_EXT) += wireless.o
obj-$(CONFIG_NETPOLL) += netpoll.o
+obj-$(CONFIG_NET_DMA) += user_dma.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fba549caf29..ea2469398bd5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -115,6 +115,8 @@
#include <net/iw_handler.h>
#include <asm/current.h>
#include <linux/audit.h>
+#include <linux/dmaengine.h>
+#include <linux/err.h>
/*
* The list of packet types we will receive (as opposed to discard)
@@ -148,6 +150,12 @@ static DEFINE_SPINLOCK(ptype_lock);
static struct list_head ptype_base[16]; /* 16 way hashed list */
static struct list_head ptype_all; /* Taps */
+#ifdef CONFIG_NET_DMA
+static struct dma_client *net_dma_client;
+static unsigned int net_dma_count;
+static spinlock_t net_dma_event_lock;
+#endif
+
/*
* The @dev_base list is protected by @dev_base_lock and the rtnl
* semaphore.
@@ -1041,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb)
* taps currently in use.
*/
-void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
+static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
{
struct packet_type *ptype;
@@ -1179,6 +1187,40 @@ out:
return ret;
}
+/**
+ * skb_gso_segment - Perform segmentation on skb.
+ * @skb: buffer to segment
+ * @sg: whether scatter-gather is supported on the target.
+ *
+ * This function segments the given skb and returns a list of segments.
+ */
+struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
+ struct packet_type *ptype;
+ int type = skb->protocol;
+
+ BUG_ON(skb_shinfo(skb)->frag_list);
+ BUG_ON(skb->ip_summed != CHECKSUM_HW);
+
+ skb->mac.raw = skb->data;
+ skb->mac_len = skb->nh.raw - skb->data;
+ __skb_pull(skb, skb->mac_len);
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
+ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
+ segs = ptype->gso_segment(skb, sg);
+ break;
+ }
+ }
+ rcu_read_unlock();
+
+ return segs;
+}
+
+EXPORT_SYMBOL(skb_gso_segment);
+
/* Take action when hardware reception checksum errors are detected. */
#ifdef CONFIG_BUG
void netdev_rx_csum_fault(struct net_device *dev)
@@ -1215,75 +1257,95 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
#define illegal_highdma(dev, skb) (0)
#endif
-/* Keep head the same: replace data */
-int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask)
+struct dev_gso_cb {
+ void (*destructor)(struct sk_buff *skb);
+};
+
+#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
+static void dev_gso_skb_destructor(struct sk_buff *skb)
{
- unsigned int size;
- u8 *data;
- long offset;
- struct skb_shared_info *ninfo;
- int headerlen = skb->data - skb->head;
- int expand = (skb->tail + skb->data_len) - skb->end;
+ struct dev_gso_cb *cb;
- if (skb_shared(skb))
- BUG();
+ do {
+ struct sk_buff *nskb = skb->next;
- if (expand <= 0)
- expand = 0;
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ kfree_skb(nskb);
+ } while (skb->next);
- size = skb->end - skb->head + expand;
- size = SKB_DATA_ALIGN(size);
- data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask);
- if (!data)
- return -ENOMEM;
+ cb = DEV_GSO_CB(skb);
+ if (cb->destructor)
+ cb->destructor(skb);
+}
+
+/**
+ * dev_gso_segment - Perform emulated hardware segmentation on skb.
+ * @skb: buffer to segment
+ *
+ * This function segments the given skb and stores the list of segments
+ * in skb->next.
+ */
+static int dev_gso_segment(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+ struct sk_buff *segs;
- /* Copy entire thing */
- if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len))
- BUG();
+ segs = skb_gso_segment(skb, dev->features & NETIF_F_SG &&
+ !illegal_highdma(dev, skb));
+ if (unlikely(IS_ERR(segs)))
+ return PTR_ERR(segs);
- /* Set up shinfo */
- ninfo = (struct skb_shared_info*)(data + size);
- atomic_set(&ninfo->dataref, 1);
- ninfo->tso_size = skb_shinfo(skb)->tso_size;
- ninfo->tso_segs = skb_shinfo(skb)->tso_segs;
- ninfo->nr_frags = 0;
- ninfo->frag_list = NULL;
+ skb->next = segs;
+ DEV_GSO_CB(skb)->destructor = skb->destructor;
+ skb->destructor = dev_gso_skb_destructor;
- /* Offset between the two in bytes */
- offset = data - skb->head;
+ return 0;
+}
- /* Free old data. */
- skb_release_data(skb);
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ if (likely(!skb->next)) {
+ if (netdev_nit)
+ dev_queue_xmit_nit(skb, dev);
- skb->head = data;
- skb->end = data + size;
+ if (!netif_needs_gso(dev, skb))
+ return dev->hard_start_xmit(skb, dev);
+
+ if (unlikely(dev_gso_segment(skb)))
+ goto out_kfree_skb;
+ }
- /* Set up new pointers */
- skb->h.raw += offset;
- skb->nh.raw += offset;
- skb->mac.raw += offset;
- skb->tail += offset;
- skb->data += offset;
+ do {
+ struct sk_buff *nskb = skb->next;
+ int rc;
- /* We are no longer a clone, even if we were. */
- skb->cloned = 0;
+ skb->next = nskb->next;
+ nskb->next = NULL;
+ rc = dev->hard_start_xmit(nskb, dev);
+ if (unlikely(rc)) {
+ skb->next = nskb;
+ return rc;
+ }
+ } while (skb->next);
+
+ skb->destructor = DEV_GSO_CB(skb)->destructor;
- skb->tail += skb->data_len;
- skb->data_len = 0;
+out_kfree_skb:
+ kfree_skb(skb);
return 0;
}
#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- spin_lock(&dev->xmit_lock); \
- dev->xmit_lock_owner = cpu; \
+ netif_tx_lock(dev); \
} \
}
#define HARD_TX_UNLOCK(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- dev->xmit_lock_owner = -1; \
- spin_unlock(&dev->xmit_lock); \
+ netif_tx_unlock(dev); \
} \
}
@@ -1319,9 +1381,13 @@ int dev_queue_xmit(struct sk_buff *skb)
struct Qdisc *q;
int rc = -ENOMEM;
+ /* GSO will handle the following emulations directly. */
+ if (netif_needs_gso(dev, skb))
+ goto gso;
+
if (skb_shinfo(skb)->frag_list &&
!(dev->features & NETIF_F_FRAGLIST) &&
- __skb_linearize(skb, GFP_ATOMIC))
+ __skb_linearize(skb))
goto out_kfree_skb;
/* Fragmented skb is linearized if device does not support SG,
@@ -1330,25 +1396,26 @@ int dev_queue_xmit(struct sk_buff *skb)
*/
if (skb_shinfo(skb)->nr_frags &&
(!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
- __skb_linearize(skb, GFP_ATOMIC))
+ __skb_linearize(skb))
goto out_kfree_skb;
/* If packet is not checksummed and device does not support
* checksumming for this protocol, complete checksumming here.
*/
if (skb->ip_summed == CHECKSUM_HW &&
- (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) &&
+ (!(dev->features & NETIF_F_GEN_CSUM) &&
(!(dev->features & NETIF_F_IP_CSUM) ||
skb->protocol != htons(ETH_P_IP))))
if (skb_checksum_help(skb, 0))
goto out_kfree_skb;
+gso:
spin_lock_prefetch(&dev->queue_lock);
/* Disable soft irqs for various locks below. Also
* stops preemption for RCU.
*/
- local_bh_disable();
+ rcu_read_lock_bh();
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
@@ -1382,8 +1449,8 @@ int dev_queue_xmit(struct sk_buff *skb)
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
- Really, it is unlikely that xmit_lock protection is necessary here.
- (f.e. loopback and IP tunnels are clean ignoring statistics
+ Really, it is unlikely that netif_tx_lock protection is necessary
+ here. (f.e. loopback and IP tunnels are clean ignoring statistics
counters.)
However, it is possible, that they rely on protection
made by us here.
@@ -1399,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb)
HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
- if (netdev_nit)
- dev_queue_xmit_nit(skb, dev);
-
rc = 0;
- if (!dev->hard_start_xmit(skb, dev)) {
+ if (!dev_hard_start_xmit(skb, dev)) {
HARD_TX_UNLOCK(dev);
goto out;
}
@@ -1422,13 +1486,13 @@ int dev_queue_xmit(struct sk_buff *skb)
}
rc = -ENETDOWN;
- local_bh_enable();
+ rcu_read_unlock_bh();
out_kfree_skb:
kfree_skb(skb);
return rc;
out:
- local_bh_enable();
+ rcu_read_unlock_bh();
return rc;
}
@@ -1846,6 +1910,19 @@ static void net_rx_action(struct softirq_action *h)
}
}
out:
+#ifdef CONFIG_NET_DMA
+ /*
+ * There may not be any more sk_buffs coming right now, so push
+ * any pending DMA copies to hardware
+ */
+ if (net_dma_client) {
+ struct dma_chan *chan;
+ rcu_read_lock();
+ list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
+ dma_async_memcpy_issue_pending(chan);
+ rcu_read_unlock();
+ }
+#endif
local_irq_enable();
return;
@@ -2785,7 +2862,7 @@ int register_netdevice(struct net_device *dev)
BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
spin_lock_init(&dev->queue_lock);
- spin_lock_init(&dev->xmit_lock);
+ spin_lock_init(&dev->_xmit_lock);
dev->xmit_lock_owner = -1;
#ifdef CONFIG_NET_CLS_ACT
spin_lock_init(&dev->ingress_lock);
@@ -2829,9 +2906,7 @@ int register_netdevice(struct net_device *dev)
/* Fix illegal SG+CSUM combinations. */
if ((dev->features & NETIF_F_SG) &&
- !(dev->features & (NETIF_F_IP_CSUM |
- NETIF_F_NO_CSUM |
- NETIF_F_HW_CSUM))) {
+ !(dev->features & NETIF_F_ALL_CSUM)) {
printk("%s: Dropping NETIF_F_SG since no checksum feature.\n",
dev->name);
dev->features &= ~NETIF_F_SG;
@@ -3022,7 +3097,7 @@ static void netdev_wait_allrefs(struct net_device *dev)
static DEFINE_MUTEX(net_todo_run_mutex);
void netdev_run_todo(void)
{
- struct list_head list = LIST_HEAD_INIT(list);
+ struct list_head list;
/* Need to guard against multiple cpu's getting out of order. */
mutex_lock(&net_todo_run_mutex);
@@ -3037,9 +3112,9 @@ void netdev_run_todo(void)
/* Snapshot list, allow later requests */
spin_lock(&net_todo_list_lock);
- list_splice_init(&net_todo_list, &list);
+ list_replace_init(&net_todo_list, &list);
spin_unlock(&net_todo_list_lock);
-
+
while (!list_empty(&list)) {
struct net_device *dev
= list_entry(list.next, struct net_device, todo_list);
@@ -3300,6 +3375,88 @@ static int dev_cpu_callback(struct notifier_block *nfb,
}
#endif /* CONFIG_HOTPLUG_CPU */
+#ifdef CONFIG_NET_DMA
+/**
+ * net_dma_rebalance -
+ * This is called when the number of channels allocated to the net_dma_client
+ * changes. The net_dma_client tries to have one DMA channel per CPU.
+ */
+static void net_dma_rebalance(void)
+{
+ unsigned int cpu, i, n;
+ struct dma_chan *chan;
+
+ lock_cpu_hotplug();
+
+ if (net_dma_count == 0) {
+ for_each_online_cpu(cpu)
+ rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL);
+ unlock_cpu_hotplug();
+ return;
+ }
+
+ i = 0;
+ cpu = first_cpu(cpu_online_map);
+
+ rcu_read_lock();
+ list_for_each_entry(chan, &net_dma_client->channels, client_node) {
+ n = ((num_online_cpus() / net_dma_count)
+ + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
+
+ while(n) {
+ per_cpu(softnet_data.net_dma, cpu) = chan;
+ cpu = next_cpu(cpu, cpu_online_map);
+ n--;
+ }
+ i++;
+ }
+ rcu_read_unlock();
+
+ unlock_cpu_hotplug();
+}
+
+/**
+ * netdev_dma_event - event callback for the net_dma_client
+ * @client: should always be net_dma_client
+ * @chan: DMA channel for the event
+ * @event: event type
+ */
+static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
+ enum dma_event event)
+{
+ spin_lock(&net_dma_event_lock);
+ switch (event) {
+ case DMA_RESOURCE_ADDED:
+ net_dma_count++;
+ net_dma_rebalance();
+ break;
+ case DMA_RESOURCE_REMOVED:
+ net_dma_count--;
+ net_dma_rebalance();
+ break;
+ default:
+ break;
+ }
+ spin_unlock(&net_dma_event_lock);
+}
+
+/**
+ * netdev_dma_regiser - register the networking subsystem as a DMA client
+ */
+static int __init netdev_dma_register(void)
+{
+ spin_lock_init(&net_dma_event_lock);
+ net_dma_client = dma_async_client_register(netdev_dma_event);
+ if (net_dma_client == NULL)
+ return -ENOMEM;
+
+ dma_async_client_chan_request(net_dma_client, num_online_cpus());
+ return 0;
+}
+
+#else
+static int __init netdev_dma_register(void) { return -ENODEV; }
+#endif /* CONFIG_NET_DMA */
/*
* Initialize the DEV module. At boot time this walks the device list and
@@ -3353,6 +3510,8 @@ static int __init net_dev_init(void)
atomic_set(&queue->backlog_dev.refcnt, 1);
}
+ netdev_dma_register();
+
dev_boot_phase = 0;
open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
@@ -3371,7 +3530,6 @@ subsys_initcall(net_dev_init);
EXPORT_SYMBOL(__dev_get_by_index);
EXPORT_SYMBOL(__dev_get_by_name);
EXPORT_SYMBOL(__dev_remove_pack);
-EXPORT_SYMBOL(__skb_linearize);
EXPORT_SYMBOL(dev_valid_name);
EXPORT_SYMBOL(dev_add_pack);
EXPORT_SYMBOL(dev_alloc_name);
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index 05d60850840e..c57d887da2ef 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -62,7 +62,7 @@
* Device mc lists are changed by bh at least if IPv6 is enabled,
* so that it must be bh protected.
*
- * We block accesses to device mc filters with dev->xmit_lock.
+ * We block accesses to device mc filters with netif_tx_lock.
*/
/*
@@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev)
void dev_mc_upload(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ netif_tx_lock_bh(dev);
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
}
/*
@@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
int err = 0;
struct dev_mc_list *dmi, **dmip;
- spin_lock_bh(&dev->xmit_lock);
+ netif_tx_lock_bh(dev);
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
/*
@@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
*/
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
return 0;
}
}
err = -ENOENT;
done:
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
return err;
}
@@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
dmi1 = kmalloc(sizeof(*dmi), GFP_ATOMIC);
- spin_lock_bh(&dev->xmit_lock);
+ netif_tx_lock_bh(dev);
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
dmi->dmi_addrlen == alen) {
@@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
}
if ((dmi = dmi1) == NULL) {
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
return -ENOMEM;
}
memcpy(dmi->dmi_addr, addr, alen);
@@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
return 0;
done:
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
kfree(dmi1);
return err;
}
@@ -204,7 +204,7 @@ done:
void dev_mc_discard(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ netif_tx_lock_bh(dev);
while (dev->mc_list != NULL) {
struct dev_mc_list *tmp = dev->mc_list;
@@ -215,7 +215,7 @@ void dev_mc_discard(struct net_device *dev)
}
dev->mc_count = 0;
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
}
#ifdef CONFIG_PROC_FS
@@ -250,7 +250,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct dev_mc_list *m;
struct net_device *dev = v;
- spin_lock_bh(&dev->xmit_lock);
+ netif_tx_lock_bh(dev);
for (m = dev->mc_list; m; m = m->next) {
int i;
@@ -262,7 +262,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
}
- spin_unlock_bh(&dev->xmit_lock);
+ netif_tx_unlock_bh(dev);
return 0;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e6f76106a99b..27ce1683caf5 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -30,7 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
u32 ethtool_op_get_tx_csum(struct net_device *dev)
{
- return (dev->features & (NETIF_F_IP_CSUM | NETIF_F_HW_CSUM)) != 0;
+ return (dev->features & NETIF_F_ALL_CSUM) != 0;
}
int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
@@ -551,9 +551,7 @@ static int ethtool_set_sg(struct net_device *dev, char __user *useraddr)
return -EFAULT;
if (edata.data &&
- !(dev->features & (NETIF_F_IP_CSUM |
- NETIF_F_NO_CSUM |
- NETIF_F_HW_CSUM)))
+ !(dev->features & NETIF_F_ALL_CSUM))
return -EINVAL;
return __ethtool_set_sg(dev, edata.data);
@@ -591,7 +589,7 @@ static int ethtool_set_tso(struct net_device *dev, char __user *useraddr)
static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
{
- struct ethtool_value edata = { ETHTOOL_GTSO };
+ struct ethtool_value edata = { ETHTOOL_GUFO };
if (!dev->ethtool_ops->get_ufo)
return -EOPNOTSUPP;
@@ -600,6 +598,7 @@ static int ethtool_get_ufo(struct net_device *dev, char __user *useraddr)
return -EFAULT;
return 0;
}
+
static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
{
struct ethtool_value edata;
@@ -615,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr)
return dev->ethtool_ops->set_ufo(dev, edata.data);
}
+static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata = { ETHTOOL_GGSO };
+
+ edata.data = dev->features & NETIF_F_GSO;
+ if (copy_to_user(useraddr, &edata, sizeof(edata)))
+ return -EFAULT;
+ return 0;
+}
+
+static int ethtool_set_gso(struct net_device *dev, char __user *useraddr)
+{
+ struct ethtool_value edata;
+
+ if (copy_from_user(&edata, useraddr, sizeof(edata)))
+ return -EFAULT;
+ if (edata.data)
+ dev->features |= NETIF_F_GSO;
+ else
+ dev->features &= ~NETIF_F_GSO;
+ return 0;
+}
+
static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
{
struct ethtool_test test;
@@ -906,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr)
case ETHTOOL_SUFO:
rc = ethtool_set_ufo(dev, useraddr);
break;
+ case ETHTOOL_GGSO:
+ rc = ethtool_get_gso(dev, useraddr);
+ break;
+ case ETHTOOL_SGSO:
+ rc = ethtool_set_gso(dev, useraddr);
+ break;
default:
rc = -EOPNOTSUPP;
}
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index 646937cc2d84..0f37266411b5 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -91,11 +91,10 @@ static void rfc2863_policy(struct net_device *dev)
/* Must be called with the rtnl semaphore held */
void linkwatch_run_queue(void)
{
- LIST_HEAD(head);
- struct list_head *n, *next;
+ struct list_head head, *n, *next;
spin_lock_irq(&lweventlist_lock);
- list_splice_init(&lweventlist, &head);
+ list_replace_init(&lweventlist, &head);
spin_unlock_irq(&lweventlist_lock);
list_for_each_safe(n, next, &head) {
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e8e05cebd95a..9cb781830380 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -273,24 +273,21 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
do {
npinfo->tries--;
- spin_lock(&np->dev->xmit_lock);
- np->dev->xmit_lock_owner = smp_processor_id();
+ netif_tx_lock(np->dev);
/*
* network drivers do not expect to be called if the queue is
* stopped.
*/
if (netif_queue_stopped(np->dev)) {
- np->dev->xmit_lock_owner = -1;
- spin_unlock(&np->dev->xmit_lock);
+ netif_tx_unlock(np->dev);
netpoll_poll(np);
udelay(50);
continue;
}
status = np->dev->hard_start_xmit(skb, np->dev);
- np->dev->xmit_lock_owner = -1;
- spin_unlock(&np->dev->xmit_lock);
+ netif_tx_unlock(np->dev);
/* success */
if(!status) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index c23e9c06ee23..67ed14ddabd2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2897,7 +2897,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- spin_lock_bh(&odev->xmit_lock);
+ netif_tx_lock_bh(odev);
if (!netif_queue_stopped(odev)) {
atomic_inc(&(pkt_dev->skb->users));
@@ -2942,7 +2942,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
pkt_dev->next_tx_ns = 0;
}
- spin_unlock_bh(&odev->xmit_lock);
+ netif_tx_unlock_bh(odev);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index fb3770f9c094..8e5044ba3ab6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo = skb_shinfo(skb);
atomic_set(&shinfo->dataref, 1);
shinfo->nr_frags = 0;
- shinfo->tso_size = 0;
- shinfo->tso_segs = 0;
- shinfo->ufo_size = 0;
+ shinfo->gso_size = 0;
+ shinfo->gso_segs = 0;
+ shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
shinfo->frag_list = NULL;
@@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp,
atomic_set(&(skb_shinfo(skb)->dataref), 1);
skb_shinfo(skb)->nr_frags = 0;
- skb_shinfo(skb)->tso_size = 0;
- skb_shinfo(skb)->tso_segs = 0;
+ skb_shinfo(skb)->gso_size = 0;
+ skb_shinfo(skb)->gso_segs = 0;
+ skb_shinfo(skb)->gso_type = 0;
skb_shinfo(skb)->frag_list = NULL;
out:
return skb;
@@ -464,7 +465,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
C(input_dev);
#endif
-
+ skb_copy_secmark(n, skb);
#endif
C(truesize);
atomic_set(&n->users, 1);
@@ -526,9 +527,11 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
#endif
new->tc_index = old->tc_index;
#endif
+ skb_copy_secmark(new, old);
atomic_set(&new->users, 1);
- skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size;
- skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs;
+ skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size;
+ skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs;
+ skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type;
}
/**
@@ -780,32 +783,46 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
- * May return NULL in out of memory cases.
+ * May return error in out of memory cases. The skb is freed on error.
*/
-struct sk_buff *skb_pad(struct sk_buff *skb, int pad)
+int skb_pad(struct sk_buff *skb, int pad)
{
- struct sk_buff *nskb;
+ int err;
+ int ntail;
/* If the skbuff is non linear tailroom is always zero.. */
- if (skb_tailroom(skb) >= pad) {
+ if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) {
memset(skb->data+skb->len, 0, pad);
- return skb;
+ return 0;
}
-
- nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC);
+
+ ntail = skb->data_len + pad - (skb->end - skb->tail);
+ if (likely(skb_cloned(skb) || ntail > 0)) {
+ err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC);
+ if (unlikely(err))
+ goto free_skb;
+ }
+
+ /* FIXME: The use of this function with non-linear skb's really needs
+ * to be audited.
+ */
+ err = skb_linearize(skb);
+ if (unlikely(err))
+ goto free_skb;
+
+ memset(skb->data + skb->len, 0, pad);
+ return 0;
+
+free_skb:
kfree_skb(skb);
- if (nskb)
- memset(nskb->data+nskb->len, 0, pad);
- return nskb;
+ return err;
}
-/* Trims skb to length len. It can change skb pointers, if "realloc" is 1.
- * If realloc==0 and trimming is impossible without change of data,
- * it is BUG().
+/* Trims skb to length len. It can change skb pointers.
*/
-int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
+int ___pskb_trim(struct sk_buff *skb, unsigned int len)
{
int offset = skb_headlen(skb);
int nfrags = skb_shinfo(skb)->nr_frags;
@@ -815,7 +832,6 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc)
int end = offset + skb_shinfo(skb)->frags[i].size;
if (end > len) {
if (skb_cloned(skb)) {
- BUG_ON(!realloc);
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
return -ENOMEM;
}
@@ -1826,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
+/**
+ * skb_segment - Perform protocol segmentation on skb.
+ * @skb: buffer to segment
+ * @sg: whether scatter-gather can be used for generated segments
+ *
+ * This function performs segmentation on the given skb. It returns
+ * the segment at the given position. It returns NULL if there are
+ * no more segments to generate, or when an error is encountered.
+ */
+struct sk_buff *skb_segment(struct sk_buff *skb, int sg)
+{
+ struct sk_buff *segs = NULL;
+ struct sk_buff *tail = NULL;
+ unsigned int mss = skb_shinfo(skb)->gso_size;
+ unsigned int doffset = skb->data - skb->mac.raw;
+ unsigned int offset = doffset;
+ unsigned int headroom;
+ unsigned int len;
+ int nfrags = skb_shinfo(skb)->nr_frags;
+ int err = -ENOMEM;
+ int i = 0;
+ int pos;
+
+ __skb_push(skb, doffset);
+ headroom = skb_headroom(skb);
+ pos = skb_headlen(skb);
+
+ do {
+ struct sk_buff *nskb;
+ skb_frag_t *frag;
+ int hsize, nsize;
+ int k;
+ int size;
+
+ len = skb->len - offset;
+ if (len > mss)
+ len = mss;
+
+ hsize = skb_headlen(skb) - offset;
+ if (hsize < 0)
+ hsize = 0;
+ nsize = hsize + doffset;
+ if (nsize > len + doffset || !sg)
+ nsize = len + doffset;
+
+ nskb = alloc_skb(nsize + headroom, GFP_ATOMIC);
+ if (unlikely(!nskb))
+ goto err;
+
+ if (segs)
+ tail->next = nskb;
+ else
+ segs = nskb;
+ tail = nskb;
+
+ nskb->dev = skb->dev;
+ nskb->priority = skb->priority;
+ nskb->protocol = skb->protocol;
+ nskb->dst = dst_clone(skb->dst);
+ memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
+ nskb->pkt_type = skb->pkt_type;
+ nskb->mac_len = skb->mac_len;
+
+ skb_reserve(nskb, headroom);
+ nskb->mac.raw = nskb->data;
+ nskb->nh.raw = nskb->data + skb->mac_len;
+ nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw);
+ memcpy(skb_put(nskb, doffset), skb->data, doffset);
+
+ if (!sg) {
+ nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ skb_put(nskb, len),
+ len, 0);
+ continue;
+ }
+
+ frag = skb_shinfo(nskb)->frags;
+ k = 0;
+
+ nskb->ip_summed = CHECKSUM_HW;
+ nskb->csum = skb->csum;
+ memcpy(skb_put(nskb, hsize), skb->data + offset, hsize);
+
+ while (pos < offset + len) {
+ BUG_ON(i >= nfrags);
+
+ *frag = skb_shinfo(skb)->frags[i];
+ get_page(frag->page);
+ size = frag->size;
+
+ if (pos < offset) {
+ frag->page_offset += offset - pos;
+ frag->size -= offset - pos;
+ }
+
+ k++;
+
+ if (pos + size <= offset + len) {
+ i++;
+ pos += size;
+ } else {
+ frag->size -= pos + size - (offset + len);
+ break;
+ }
+
+ frag++;
+ }
+
+ skb_shinfo(nskb)->nr_frags = k;
+ nskb->data_len = len - hsize;
+ nskb->len += nskb->data_len;
+ nskb->truesize += nskb->data_len;
+ } while ((offset += len) < skb->len);
+
+ return segs;
+
+err:
+ while ((skb = segs)) {
+ segs = skb->next;
+ kfree(skb);
+ }
+ return ERR_PTR(err);
+}
+
+EXPORT_SYMBOL_GPL(skb_segment);
+
void __init skb_init(void)
{
skbuff_head_cache = kmem_cache_create("skbuff_head_cache",
diff --git a/net/core/sock.c b/net/core/sock.c
index ed2afdb9ea2d..5d820c376653 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -832,6 +832,9 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
atomic_set(&newsk->sk_omem_alloc, 0);
skb_queue_head_init(&newsk->sk_receive_queue);
skb_queue_head_init(&newsk->sk_write_queue);
+#ifdef CONFIG_NET_DMA
+ skb_queue_head_init(&newsk->sk_async_wait_queue);
+#endif
rwlock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
@@ -1383,6 +1386,9 @@ void sock_init_data(struct socket *sock, struct sock *sk)
skb_queue_head_init(&sk->sk_receive_queue);
skb_queue_head_init(&sk->sk_write_queue);
skb_queue_head_init(&sk->sk_error_queue);
+#ifdef CONFIG_NET_DMA
+ skb_queue_head_init(&sk->sk_async_wait_queue);
+#endif
sk->sk_send_head = NULL;
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
new file mode 100644
index 000000000000..b7c98dbcdb81
--- /dev/null
+++ b/net/core/user_dma.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/socket.h>
+#include <linux/rtnetlink.h> /* for BUG_TRAP */
+#include <net/tcp.h>
+
+#define NET_DMA_DEFAULT_COPYBREAK 4096
+
+int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
+
+/**
+ * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ * @skb - buffer to copy
+ * @offset - offset in the buffer to start copying from
+ * @iovec - io vector to copy to
+ * @len - amount of data to copy from buffer to iovec
+ * @pinned_list - locked iovec buffer data
+ *
+ * Note: the iovec is modified during the copy.
+ */
+int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
+ struct sk_buff *skb, int offset, struct iovec *to,
+ size_t len, struct dma_pinned_list *pinned_list)
+{
+ int start = skb_headlen(skb);
+ int i, copy = start - offset;
+ dma_cookie_t cookie = 0;
+
+ /* Copy header. */
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
+ skb->data + offset, copy);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+
+ /* Copy paged appendix. Hmm... why does this look so complicated? */
+ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + skb_shinfo(skb)->frags[i].size;
+ copy = end - offset;
+ if ((copy = end - offset) > 0) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+ struct page *page = frag->page;
+
+ if (copy > len)
+ copy = len;
+
+ cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
+ frag->page_offset + offset - start, copy);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+ start = end;
+ }
+
+ if (skb_shinfo(skb)->frag_list) {
+ struct sk_buff *list = skb_shinfo(skb)->frag_list;
+
+ for (; list; list = list->next) {
+ int end;
+
+ BUG_TRAP(start <= offset + len);
+
+ end = start + list->len;
+ copy = end - offset;
+ if (copy > 0) {
+ if (copy > len)
+ copy = len;
+ cookie = dma_skb_copy_datagram_iovec(chan, list,
+ offset - start, to, copy,
+ pinned_list);
+ if (cookie < 0)
+ goto fault;
+ len -= copy;
+ if (len == 0)
+ goto end;
+ offset += copy;
+ }
+ start = end;
+ }
+ }
+
+end:
+ if (!len) {
+ skb->dma_cookie = cookie;
+ return cookie;
+ }
+
+fault:
+ return -EFAULT;
+}