summaryrefslogtreecommitdiffstats
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c258
-rw-r--r--net/core/drop_monitor.c263
-rw-r--r--net/core/ethtool.c58
-rw-r--r--net/core/fib_rules.c3
-rw-r--r--net/core/neighbour.c15
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/net-traces.c29
-rw-r--r--net/core/pktgen.c18
-rw-r--r--net/core/rtnetlink.c9
-rw-r--r--net/core/skbuff.c235
-rw-r--r--net/core/sock.c95
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/core/utils.c1
15 files changed, 769 insertions, 223 deletions
diff --git a/net/core/Makefile b/net/core/Makefile
index 26a37cb31923..796f46eece5f 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -17,3 +17,6 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o
obj-$(CONFIG_NETPOLL) += netpoll.o
obj-$(CONFIG_NET_DMA) += user_dma.o
obj-$(CONFIG_FIB_RULES) += fib_rules.o
+obj-$(CONFIG_TRACEPOINTS) += net-traces.o
+obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
+
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 5e2ac0c4b07c..d0de644b378d 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -208,7 +208,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
{
- kfree_skb(skb);
+ consume_skb(skb);
sk_mem_reclaim_partial(sk);
}
diff --git a/net/core/dev.c b/net/core/dev.c
index e3fe5c705606..052dd478d3e1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1668,8 +1668,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int rc;
- prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
@@ -1681,13 +1681,27 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}
- return ops->ndo_start_xmit(skb, dev);
+ rc = ops->ndo_start_xmit(skb, dev);
+ /*
+ * TODO: if skb_orphan() was called by
+ * dev->hard_start_xmit() (for example, the unmodified
+ * igb driver does that; bnx2 doesn't), then
+ * skb_tx_software_timestamp() will be unable to send
+ * back the time stamp.
+ *
+ * How can this be prevented? Always create another
+ * reference to the socket before calling
+ * dev->hard_start_xmit()? Prevent that skb_orphan()
+ * does anything in dev->hard_start_xmit() by clearing
+ * the skb destructor before the call and restoring it
+ * afterwards, then doing the skb_orphan() ourselves?
+ */
+ return rc;
}
gso:
do {
struct sk_buff *nskb = skb->next;
- int rc;
skb->next = nskb->next;
nskb->next = NULL;
@@ -1708,59 +1722,24 @@ out_kfree_skb:
return 0;
}
-static u32 simple_tx_hashrnd;
-static int simple_tx_hashrnd_initialized = 0;
+static u32 skb_tx_hashrnd;
-static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
{
- u32 addr1, addr2, ports;
- u32 hash, ihl;
- u8 ip_proto = 0;
-
- if (unlikely(!simple_tx_hashrnd_initialized)) {
- get_random_bytes(&simple_tx_hashrnd, 4);
- simple_tx_hashrnd_initialized = 1;
- }
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
- ip_proto = ip_hdr(skb)->protocol;
- addr1 = ip_hdr(skb)->saddr;
- addr2 = ip_hdr(skb)->daddr;
- ihl = ip_hdr(skb)->ihl;
- break;
- case htons(ETH_P_IPV6):
- ip_proto = ipv6_hdr(skb)->nexthdr;
- addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
- addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
- ihl = (40 >> 2);
- break;
- default:
- return 0;
- }
-
+ u32 hash;
- switch (ip_proto) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_DCCP:
- case IPPROTO_ESP:
- case IPPROTO_AH:
- case IPPROTO_SCTP:
- case IPPROTO_UDPLITE:
- ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
- break;
-
- default:
- ports = 0;
- break;
- }
+ if (skb_rx_queue_recorded(skb)) {
+ hash = skb_get_rx_queue(skb);
+ } else if (skb->sk && skb->sk->sk_hash) {
+ hash = skb->sk->sk_hash;
+ } else
+ hash = skb->protocol;
- hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+ hash = jhash_1word(hash, skb_tx_hashrnd);
return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
}
+EXPORT_SYMBOL(skb_tx_hash);
static struct netdev_queue *dev_pick_tx(struct net_device *dev,
struct sk_buff *skb)
@@ -1771,7 +1750,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
if (ops->ndo_select_queue)
queue_index = ops->ndo_select_queue(dev, skb);
else if (dev->real_num_tx_queues > 1)
- queue_index = simple_tx_hash(dev, skb);
+ queue_index = skb_tx_hash(dev, skb);
skb_set_queue_mapping(skb, queue_index);
return netdev_get_tx_queue(dev, queue_index);
@@ -2297,6 +2276,8 @@ ncls:
if (!skb)
goto out;
+ skb_orphan(skb);
+
type = skb->protocol;
list_for_each_entry_rcu(ptype,
&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2366,7 +2347,6 @@ static int napi_gro_complete(struct sk_buff *skb)
out:
skb_shinfo(skb)->gso_size = 0;
- __skb_push(skb, -skb_network_offset(skb));
return netif_receive_skb(skb);
}
@@ -2380,20 +2360,40 @@ void napi_gro_flush(struct napi_struct *napi)
napi_gro_complete(skb);
}
+ napi->gro_count = 0;
napi->gro_list = NULL;
}
EXPORT_SYMBOL(napi_gro_flush);
+void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+{
+ unsigned int offset = skb_gro_offset(skb);
+
+ hlen += offset;
+ if (hlen <= skb_headlen(skb))
+ return skb->data + offset;
+
+ if (unlikely(!skb_shinfo(skb)->nr_frags ||
+ skb_shinfo(skb)->frags[0].size <=
+ hlen - skb_headlen(skb) ||
+ PageHighMem(skb_shinfo(skb)->frags[0].page)))
+ return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+ return page_address(skb_shinfo(skb)->frags[0].page) +
+ skb_shinfo(skb)->frags[0].page_offset +
+ offset - skb_headlen(skb);
+}
+EXPORT_SYMBOL(skb_gro_header);
+
int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff **pp = NULL;
struct packet_type *ptype;
__be16 type = skb->protocol;
struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
- int count = 0;
int same_flow;
int mac_len;
- int free;
+ int ret;
if (!(skb->dev->features & NETIF_F_GRO))
goto normal;
@@ -2403,30 +2403,16 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
rcu_read_lock();
list_for_each_entry_rcu(ptype, head, list) {
- struct sk_buff *p;
-
if (ptype->type != type || ptype->dev || !ptype->gro_receive)
continue;
- skb_reset_network_header(skb);
+ skb_set_network_header(skb, skb_gro_offset(skb));
mac_len = skb->network_header - skb->mac_header;
skb->mac_len = mac_len;
NAPI_GRO_CB(skb)->same_flow = 0;
NAPI_GRO_CB(skb)->flush = 0;
NAPI_GRO_CB(skb)->free = 0;
- for (p = napi->gro_list; p; p = p->next) {
- count++;
-
- if (!NAPI_GRO_CB(p)->same_flow)
- continue;
-
- if (p->mac_len != mac_len ||
- memcmp(skb_mac_header(p), skb_mac_header(skb),
- mac_len))
- NAPI_GRO_CB(p)->same_flow = 0;
- }
-
pp = ptype->gro_receive(&napi->gro_list, skb);
break;
}
@@ -2436,7 +2422,7 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
goto normal;
same_flow = NAPI_GRO_CB(skb)->same_flow;
- free = NAPI_GRO_CB(skb)->free;
+ ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
if (pp) {
struct sk_buff *nskb = *pp;
@@ -2444,27 +2430,35 @@ int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
*pp = nskb->next;
nskb->next = NULL;
napi_gro_complete(nskb);
- count--;
+ napi->gro_count--;
}
if (same_flow)
goto ok;
- if (NAPI_GRO_CB(skb)->flush || count >= MAX_GRO_SKBS) {
- __skb_push(skb, -skb_network_offset(skb));
+ if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
goto normal;
- }
+ napi->gro_count++;
NAPI_GRO_CB(skb)->count = 1;
- skb_shinfo(skb)->gso_size = skb->len;
+ skb_shinfo(skb)->gso_size = skb_gro_len(skb);
skb->next = napi->gro_list;
napi->gro_list = skb;
+ ret = GRO_HELD;
+
+pull:
+ if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
+ if (napi->gro_list == skb)
+ napi->gro_list = skb->next;
+ ret = GRO_DROP;
+ }
ok:
- return free;
+ return ret;
normal:
- return -1;
+ ret = GRO_NORMAL;
+ goto pull;
}
EXPORT_SYMBOL(dev_gro_receive);
@@ -2472,29 +2466,44 @@ static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
{
struct sk_buff *p;
+ if (netpoll_rx_on(skb))
+ return GRO_NORMAL;
+
for (p = napi->gro_list; p; p = p->next) {
- NAPI_GRO_CB(p)->same_flow = 1;
+ NAPI_GRO_CB(p)->same_flow = !compare_ether_header(
+ skb_mac_header(p), skb_gro_mac_header(skb));
NAPI_GRO_CB(p)->flush = 0;
}
return dev_gro_receive(napi, skb);
}
-int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+int napi_skb_finish(int ret, struct sk_buff *skb)
{
- if (netpoll_receive_skb(skb))
- return NET_RX_DROP;
+ int err = NET_RX_SUCCESS;
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
+ switch (ret) {
+ case GRO_NORMAL:
return netif_receive_skb(skb);
- case 1:
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
+
+ case GRO_MERGED_FREE:
kfree_skb(skb);
break;
}
- return NET_RX_SUCCESS;
+ return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+ skb_gro_reset_offset(skb);
+
+ return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
}
EXPORT_SYMBOL(napi_gro_receive);
@@ -2512,6 +2521,9 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
{
struct net_device *dev = napi->dev;
struct sk_buff *skb = napi->skb;
+ struct ethhdr *eth;
+ skb_frag_t *frag;
+ int i;
napi->skb = NULL;
@@ -2524,20 +2536,36 @@ struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
}
BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
+ frag = &info->frags[info->nr_frags - 1];
+
+ for (i = skb_shinfo(skb)->nr_frags; i < info->nr_frags; i++) {
+ skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
+ frag->size);
+ frag++;
+ }
skb_shinfo(skb)->nr_frags = info->nr_frags;
- memcpy(skb_shinfo(skb)->frags, info->frags, sizeof(info->frags));
skb->data_len = info->len;
skb->len += info->len;
skb->truesize += info->len;
- if (!pskb_may_pull(skb, ETH_HLEN)) {
+ skb_reset_mac_header(skb);
+ skb_gro_reset_offset(skb);
+
+ eth = skb_gro_header(skb, sizeof(*eth));
+ if (!eth) {
napi_reuse_skb(napi, skb);
skb = NULL;
goto out;
}
- skb->protocol = eth_type_trans(skb, dev);
+ skb_gro_pull(skb, sizeof(*eth));
+
+ /*
+ * This works because the only protocols we care about don't require
+ * special handling. We'll fix it up properly at the end.
+ */
+ skb->protocol = eth->h_proto;
skb->ip_summed = info->ip_summed;
skb->csum = info->csum;
@@ -2547,32 +2575,43 @@ out:
}
EXPORT_SYMBOL(napi_fraginfo_skb);
-int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
{
- struct sk_buff *skb = napi_fraginfo_skb(napi, info);
- int err = NET_RX_DROP;
+ int err = NET_RX_SUCCESS;
- if (!skb)
- goto out;
+ switch (ret) {
+ case GRO_NORMAL:
+ case GRO_HELD:
+ skb->protocol = eth_type_trans(skb, napi->dev);
- if (netpoll_receive_skb(skb))
- goto out;
+ if (ret == GRO_NORMAL)
+ return netif_receive_skb(skb);
- err = NET_RX_SUCCESS;
+ skb_gro_pull(skb, -ETH_HLEN);
+ break;
- switch (__napi_gro_receive(napi, skb)) {
- case -1:
- return netif_receive_skb(skb);
+ case GRO_DROP:
+ err = NET_RX_DROP;
+ /* fall through */
- case 0:
- goto out;
+ case GRO_MERGED_FREE:
+ napi_reuse_skb(napi, skb);
+ break;
}
- napi_reuse_skb(napi, skb);
-
-out:
return err;
}
+EXPORT_SYMBOL(napi_frags_finish);
+
+int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
+{
+ struct sk_buff *skb = napi_fraginfo_skb(napi, info);
+
+ if (!skb)
+ return NET_RX_DROP;
+
+ return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
EXPORT_SYMBOL(napi_gro_frags);
static int process_backlog(struct napi_struct *napi, int quota)
@@ -2653,6 +2692,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
int (*poll)(struct napi_struct *, int), int weight)
{
INIT_LIST_HEAD(&napi->poll_list);
+ napi->gro_count = 0;
napi->gro_list = NULL;
napi->skb = NULL;
napi->poll = poll;
@@ -2681,6 +2721,7 @@ void netif_napi_del(struct napi_struct *napi)
}
napi->gro_list = NULL;
+ napi->gro_count = 0;
}
EXPORT_SYMBOL(netif_napi_del);
@@ -3949,6 +3990,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
cmd == SIOCSMIIREG ||
cmd == SIOCBRADDIF ||
cmd == SIOCBRDELIF ||
+ cmd == SIOCSHWTSTAMP ||
cmd == SIOCWANDEV) {
err = -EOPNOTSUPP;
if (ops->ndo_do_ioctl) {
@@ -4103,6 +4145,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
case SIOCBONDCHANGEACTIVE:
case SIOCBRADDIF:
case SIOCBRDELIF:
+ case SIOCSHWTSTAMP:
if (!capable(CAP_NET_ADMIN))
return -EPERM;
/* fall through */
@@ -5199,6 +5242,7 @@ static int __init net_dev_init(void)
queue->backlog.poll = process_backlog;
queue->backlog.weight = weight_p;
queue->backlog.gro_list = NULL;
+ queue->backlog.gro_count = 0;
}
dev_boot_phase = 0;
@@ -5231,6 +5275,14 @@ out:
subsys_initcall(net_dev_init);
+static int __init initialize_hashrnd(void)
+{
+ get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+ return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
+
EXPORT_SYMBOL(__dev_get_by_index);
EXPORT_SYMBOL(__dev_get_by_name);
EXPORT_SYMBOL(__dev_remove_pack);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
new file mode 100644
index 000000000000..9fd0dc3cca99
--- /dev/null
+++ b/net/core/drop_monitor.c
@@ -0,0 +1,263 @@
+/*
+ * Monitoring code for network dropped packet alerts
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <linux/percpu.h>
+#include <linux/timer.h>
+#include <linux/bitops.h>
+#include <net/genetlink.h>
+
+#include <trace/skb.h>
+
+#include <asm/unaligned.h>
+
+#define TRACE_ON 1
+#define TRACE_OFF 0
+
+static void send_dm_alert(struct work_struct *unused);
+
+
+/*
+ * Globals, our netlink socket pointer
+ * and the work handle that will send up
+ * netlink alerts
+ */
+struct sock *dm_sock;
+
+struct per_cpu_dm_data {
+ struct work_struct dm_alert_work;
+ struct sk_buff *skb;
+ atomic_t dm_hit_count;
+ struct timer_list send_timer;
+};
+
+static struct genl_family net_drop_monitor_family = {
+ .id = GENL_ID_GENERATE,
+ .hdrsize = 0,
+ .name = "NET_DM",
+ .version = 1,
+ .maxattr = NET_DM_CMD_MAX,
+};
+
+static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
+
+static int dm_hit_limit = 64;
+static int dm_delay = 1;
+
+
+static void reset_per_cpu_data(struct per_cpu_dm_data *data)
+{
+ size_t al;
+ struct net_dm_alert_msg *msg;
+
+ al = sizeof(struct net_dm_alert_msg);
+ al += dm_hit_limit * sizeof(struct net_dm_drop_point);
+ data->skb = genlmsg_new(al, GFP_KERNEL);
+ genlmsg_put(data->skb, 0, 0, &net_drop_monitor_family,
+ 0, NET_DM_CMD_ALERT);
+ msg = __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_alert_msg));
+ memset(msg, 0, al);
+ atomic_set(&data->dm_hit_count, dm_hit_limit);
+}
+
+static void send_dm_alert(struct work_struct *unused)
+{
+ struct sk_buff *skb;
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+ /*
+ * Grab the skb we're about to send
+ */
+ skb = data->skb;
+
+ /*
+ * Replace it with a new one
+ */
+ reset_per_cpu_data(data);
+
+ /*
+ * Ship it!
+ */
+ genlmsg_multicast(skb, 0, NET_DM_GRP_ALERT, GFP_KERNEL);
+
+}
+
+/*
+ * This is the timer function to delay the sending of an alert
+ * in the event that more drops will arrive during the
+ * hysteresis period. Note that it operates under the timer interrupt
+ * so we don't need to disable preemption here
+ */
+static void sched_send_work(unsigned long unused)
+{
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+ schedule_work(&data->dm_alert_work);
+}
+
+static void trace_kfree_skb_hit(struct sk_buff *skb, void *location)
+{
+ struct net_dm_alert_msg *msg;
+ struct nlmsghdr *nlh;
+ int i;
+ struct per_cpu_dm_data *data = &__get_cpu_var(dm_cpu_data);
+
+
+ if (!atomic_add_unless(&data->dm_hit_count, -1, 0)) {
+ /*
+ * we're already at zero, discard this hit
+ */
+ goto out;
+ }
+
+ nlh = (struct nlmsghdr *)data->skb->data;
+ msg = genlmsg_data(nlmsg_data(nlh));
+ for (i = 0; i < msg->entries; i++) {
+ if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) {
+ msg->points[i].count++;
+ goto out;
+ }
+ }
+
+ /*
+ * We need to create a new entry
+ */
+ __nla_reserve_nohdr(data->skb, sizeof(struct net_dm_drop_point));
+ memcpy(msg->points[msg->entries].pc, &location, sizeof(void *));
+ msg->points[msg->entries].count = 1;
+ msg->entries++;
+
+ if (!timer_pending(&data->send_timer)) {
+ data->send_timer.expires = jiffies + dm_delay * HZ;
+ add_timer_on(&data->send_timer, smp_processor_id());
+ }
+
+out:
+ return;
+}
+
+static int set_all_monitor_traces(int state)
+{
+ int rc = 0;
+
+ switch (state) {
+ case TRACE_ON:
+ rc |= register_trace_kfree_skb(trace_kfree_skb_hit);
+ break;
+ case TRACE_OFF:
+ rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit);
+
+ tracepoint_synchronize_unregister();
+ break;
+ default:
+ rc = 1;
+ break;
+ }
+
+ if (rc)
+ return -EINPROGRESS;
+ return rc;
+}
+
+
+static int net_dm_cmd_config(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ return -ENOTSUPP;
+}
+
+static int net_dm_cmd_trace(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ switch (info->genlhdr->cmd) {
+ case NET_DM_CMD_START:
+ return set_all_monitor_traces(TRACE_ON);
+ break;
+ case NET_DM_CMD_STOP:
+ return set_all_monitor_traces(TRACE_OFF);
+ break;
+ }
+
+ return -ENOTSUPP;
+}
+
+
+static struct genl_ops dropmon_ops[] = {
+ {
+ .cmd = NET_DM_CMD_CONFIG,
+ .doit = net_dm_cmd_config,
+ },
+ {
+ .cmd = NET_DM_CMD_START,
+ .doit = net_dm_cmd_trace,
+ },
+ {
+ .cmd = NET_DM_CMD_STOP,
+ .doit = net_dm_cmd_trace,
+ },
+};
+
+static int __init init_net_drop_monitor(void)
+{
+ int cpu;
+ int rc, i, ret;
+ struct per_cpu_dm_data *data;
+ printk(KERN_INFO "Initalizing network drop monitor service\n");
+
+ if (sizeof(void *) > 8) {
+ printk(KERN_ERR "Unable to store program counters on this arch, Drop monitor failed\n");
+ return -ENOSPC;
+ }
+
+ if (genl_register_family(&net_drop_monitor_family) < 0) {
+ printk(KERN_ERR "Could not create drop monitor netlink family\n");
+ return -EFAULT;
+ }
+
+ rc = -EFAULT;
+
+ for (i = 0; i < ARRAY_SIZE(dropmon_ops); i++) {
+ ret = genl_register_ops(&net_drop_monitor_family,
+ &dropmon_ops[i]);
+ if (ret) {
+ printk(KERN_CRIT "failed to register operation %d\n",
+ dropmon_ops[i].cmd);
+ goto out_unreg;
+ }
+ }
+
+ rc = 0;
+
+ for_each_present_cpu(cpu) {
+ data = &per_cpu(dm_cpu_data, cpu);
+ reset_per_cpu_data(data);
+ INIT_WORK(&data->dm_alert_work, send_dm_alert);
+ init_timer(&data->send_timer);
+ data->send_timer.data = cpu;
+ data->send_timer.function = sched_send_work;
+ }
+ goto out;
+
+out_unreg:
+ genl_unregister_family(&net_drop_monitor_family);
+out:
+ return rc;
+}
+
+late_initcall(init_net_drop_monitor);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 947710a36ced..244ca56dffac 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -209,34 +209,62 @@ static int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
return 0;
}
-static int ethtool_set_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc cmd;
- if (!dev->ethtool_ops->set_rxhash)
+ if (!dev->ethtool_ops->set_rxnfc)
return -EOPNOTSUPP;
if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
return -EFAULT;
- return dev->ethtool_ops->set_rxhash(dev, &cmd);
+ return dev->ethtool_ops->set_rxnfc(dev, &cmd);
}
-static int ethtool_get_rxhash(struct net_device *dev, void __user *useraddr)
+static int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
{
struct ethtool_rxnfc info;
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ int ret;
+ void *rule_buf = NULL;
- if (!dev->ethtool_ops->get_rxhash)
+ if (!ops->get_rxnfc)
return -EOPNOTSUPP;
if (copy_from_user(&info, useraddr, sizeof(info)))
return -EFAULT;
- dev->ethtool_ops->get_rxhash(dev, &info);
+ if (info.cmd == ETHTOOL_GRXCLSRLALL) {
+ if (info.rule_cnt > 0) {
+ rule_buf = kmalloc(info.rule_cnt * sizeof(u32),
+ GFP_USER);
+ if (!rule_buf)
+ return -ENOMEM;
+ }
+ }
+ ret = ops->get_rxnfc(dev, &info, rule_buf);
+ if (ret < 0)
+ goto err_out;
+
+ ret = -EFAULT;
if (copy_to_user(useraddr, &info, sizeof(info)))
- return -EFAULT;
- return 0;
+ goto err_out;
+
+ if (rule_buf) {
+ useraddr += offsetof(struct ethtool_rxnfc, rule_locs);
+ if (copy_to_user(useraddr, rule_buf,
+ info.rule_cnt * sizeof(u32)))
+ goto err_out;
+ }
+ ret = 0;
+
+err_out:
+ if (rule_buf)
+ kfree(rule_buf);
+
+ return ret;
}
static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
@@ -901,6 +929,10 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
case ETHTOOL_GFLAGS:
case ETHTOOL_GPFLAGS:
case ETHTOOL_GRXFH:
+ case ETHTOOL_GRXRINGS:
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ case ETHTOOL_GRXCLSRLALL:
break;
default:
if (!capable(CAP_NET_ADMIN))
@@ -1052,10 +1084,16 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
dev->ethtool_ops->set_priv_flags);
break;
case ETHTOOL_GRXFH:
- rc = ethtool_get_rxhash(dev, useraddr);
+ case ETHTOOL_GRXRINGS:
+ case ETHTOOL_GRXCLSRLCNT:
+ case ETHTOOL_GRXCLSRULE:
+ case ETHTOOL_GRXCLSRLALL:
+ rc = ethtool_get_rxnfc(dev, useraddr);
break;
case ETHTOOL_SRXFH:
- rc = ethtool_set_rxhash(dev, useraddr);
+ case ETHTOOL_SRXCLSRLDEL:
+ case ETHTOOL_SRXCLSRLINS:
+ rc = ethtool_set_rxnfc(dev, useraddr);
break;
case ETHTOOL_GGRO:
rc = ethtool_get_gro(dev, useraddr);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 32b3a0152d7a..98691e1466b8 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -588,7 +588,8 @@ static void notify_rule_change(int event, struct fib_rule *rule,
goto errout;
}
- err = rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ rtnl_notify(skb, net, pid, ops->nlgroup, nlh, GFP_KERNEL);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, ops->nlgroup, err);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 278a142d1047..a1cbce7fdae5 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -871,8 +871,7 @@ static void neigh_timer_handler(unsigned long arg)
write_unlock(&neigh->lock);
neigh->ops->solicit(neigh, skb);
atomic_inc(&neigh->probes);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
} else {
out:
write_unlock(&neigh->lock);
@@ -908,8 +907,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
neigh->updated = jiffies;
write_unlock_bh(&neigh->lock);
- if (skb)
- kfree_skb(skb);
+ kfree_skb(skb);
return 1;
}
} else if (neigh->nud_state & NUD_STALE) {
@@ -1656,7 +1654,11 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
flags &= ~NEIGH_UPDATE_F_OVERRIDE;
}
- err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
+ if (ndm->ndm_flags & NTF_USE) {
+ neigh_event_send(neigh, NULL);
+ err = 0;
+ } else
+ err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
neigh_release(neigh);
goto out_dev_put;
}
@@ -2534,7 +2536,8 @@ static void __neigh_notify(struct neighbour *n, int type, int flags)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 484f58750eba..2da59a0ac4ac 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -498,7 +498,7 @@ int netdev_register_kobject(struct net_device *net)
dev->groups = groups;
BUILD_BUG_ON(BUS_ID_SIZE < IFNAMSIZ);
- dev_set_name(dev, net->name);
+ dev_set_name(dev, "%s", net->name);
#ifdef CONFIG_SYSFS
*groups++ = &netstat_group;
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
new file mode 100644
index 000000000000..c8fb45665e4f
--- /dev/null
+++ b/net/core/net-traces.c
@@ -0,0 +1,29 @@
+/*
+ * consolidates trace point definitions
+ *
+ * Copyright (C) 2009 Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/string.h>
+#include <linux/if_arp.h>
+#include <linux/inetdevice.h>
+#include <linux/inet.h>
+#include <linux/interrupt.h>
+#include <linux/netpoll.h>
+#include <linux/sched.h>
+#include <linux/delay.h>
+#include <linux/rcupdate.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/netlink.h>
+#include <linux/net_dropmon.h>
+#include <trace/skb.h>
+
+#include <asm/unaligned.h>
+#include <asm/bitops.h>
+
+
+DEFINE_TRACE(kfree_skb);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 65498483325a..32d419f5ac98 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3275,8 +3275,7 @@ static void pktgen_stop(struct pktgen_thread *t)
list_for_each_entry(pkt_dev, &t->if_list, list) {
pktgen_stop_device(pkt_dev);
- if (pkt_dev->skb)
- kfree_skb(pkt_dev->skb);
+ kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
}
@@ -3303,8 +3302,7 @@ static void pktgen_rem_one_if(struct pktgen_thread *t)
if (!cur->removal_mark)
continue;
- if (cur->skb)
- kfree_skb(cur->skb);
+ kfree_skb(cur->skb);
cur->skb = NULL;
pktgen_remove_device(t, cur);
@@ -3328,8 +3326,7 @@ static void pktgen_rem_all_ifs(struct pktgen_thread *t)
list_for_each_safe(q, n, &t->if_list) {
cur = list_entry(q, struct pktgen_dev, list);
- if (cur->skb)
- kfree_skb(cur->skb);
+ kfree_skb(cur->skb);
cur->skb = NULL;
pktgen_remove_device(t, cur);
@@ -3393,8 +3390,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
if (!netif_running(odev)) {
pktgen_stop_device(pkt_dev);
- if (pkt_dev->skb)
- kfree_skb(pkt_dev->skb);
+ kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
goto out;
}
@@ -3415,8 +3411,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
if ((++pkt_dev->clone_count >= pkt_dev->clone_skb)
|| (!pkt_dev->skb)) {
/* build a new pkt */
- if (pkt_dev->skb)
- kfree_skb(pkt_dev->skb);
+ kfree_skb(pkt_dev->skb);
pkt_dev->skb = fill_packet(odev, pkt_dev);
if (pkt_dev->skb == NULL) {
@@ -3498,8 +3493,7 @@ static __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
/* Done with this */
pktgen_stop_device(pkt_dev);
- if (pkt_dev->skb)
- kfree_skb(pkt_dev->skb);
+ kfree_skb(pkt_dev->skb);
pkt_dev->skb = NULL;
}
out:;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 790dd205bb5d..d78030f88bd0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -455,8 +455,8 @@ int rtnl_unicast(struct sk_buff *skb, struct net *net, u32 pid)
return nlmsg_unicast(rtnl, skb, pid);
}
-int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
- struct nlmsghdr *nlh, gfp_t flags)
+void rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
+ struct nlmsghdr *nlh, gfp_t flags)
{
struct sock *rtnl = net->rtnl;
int report = 0;
@@ -464,7 +464,7 @@ int rtnl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group,
if (nlh)
report = nlmsg_report(nlh);
- return nlmsg_notify(rtnl, skb, pid, group, report, flags);
+ nlmsg_notify(rtnl, skb, pid, group, report, flags);
}
void rtnl_set_sk_err(struct net *net, u32 group, int error)
@@ -1246,7 +1246,8 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change)
kfree_skb(skb);
goto errout;
}
- err = rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_KERNEL);
+ return;
errout:
if (err < 0)
rtnl_set_sk_err(net, RTNLGRP_LINK, err);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index c6a6b166f8d6..6acbf9e79eb1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/scatterlist.h>
+#include <linux/errqueue.h>
#include <net/protocol.h>
#include <net/dst.h>
@@ -64,6 +65,7 @@
#include <asm/uaccess.h>
#include <asm/system.h>
+#include <trace/skb.h>
#include "kmap_skb.h"
@@ -123,6 +125,7 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
+EXPORT_SYMBOL(skb_over_panic);
/**
* skb_under_panic - private function
@@ -142,6 +145,7 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
skb->dev ? skb->dev->name : "<NULL>");
BUG();
}
+EXPORT_SYMBOL(skb_under_panic);
/* Allocate a new skbuff. We do this ourselves so we can fill in a few
* 'private' fields and also do memory statistics to find all the
@@ -205,7 +209,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo->gso_segs = 0;
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
+ shinfo->tx_flags.flags = 0;
shinfo->frag_list = NULL;
+ memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
if (fclone) {
struct sk_buff *child = skb + 1;
@@ -223,6 +229,7 @@ nodata:
skb = NULL;
goto out;
}
+EXPORT_SYMBOL(__alloc_skb);
/**
* __netdev_alloc_skb - allocate an skbuff for rx on a specific device
@@ -250,6 +257,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
}
return skb;
}
+EXPORT_SYMBOL(__netdev_alloc_skb);
struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
{
@@ -418,6 +426,7 @@ void __kfree_skb(struct sk_buff *skb)
skb_release_all(skb);
kfree_skbmem(skb);
}
+EXPORT_SYMBOL(__kfree_skb);
/**
* kfree_skb - free an sk_buff
@@ -434,8 +443,30 @@ void kfree_skb(struct sk_buff *skb)
smp_rmb();
else if (likely(!atomic_dec_and_test(&skb->users)))
return;
+ trace_kfree_skb(skb, __builtin_return_address(0));
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb);
+
+/**
+ * consume_skb - free an skbuff
+ * @skb: buffer to free
+ *
+ * Drop a ref to the buffer and free it if the usage count has hit zero
+ * Functions identically to kfree_skb, but kfree_skb assumes that the frame
+ * is being dropped after a failure and notes that
+ */
+void consume_skb(struct sk_buff *skb)
+{
+ if (unlikely(!skb))
+ return;
+ if (likely(atomic_read(&skb->users) == 1))
+ smp_rmb();
+ else if (likely(!atomic_dec_and_test(&skb->users)))
+ return;
__kfree_skb(skb);
}
+EXPORT_SYMBOL(consume_skb);
/**
* skb_recycle_check - check if skb can be reused for receive
@@ -605,6 +636,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
return __skb_clone(n, skb);
}
+EXPORT_SYMBOL(skb_clone);
static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
{
@@ -671,7 +703,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
copy_skb_header(n, skb);
return n;
}
-
+EXPORT_SYMBOL(skb_copy);
/**
* pskb_copy - create copy of an sk_buff with private head.
@@ -730,6 +762,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
out:
return n;
}
+EXPORT_SYMBOL(pskb_copy);
/**
* pskb_expand_head - reallocate header of &sk_buff
@@ -813,6 +846,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
nodata:
return -ENOMEM;
}
+EXPORT_SYMBOL(pskb_expand_head);
/* Make private copy of skb with writable head and some headroom */
@@ -833,7 +867,7 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
}
return skb2;
}
-
+EXPORT_SYMBOL(skb_realloc_headroom);
/**
* skb_copy_expand - copy and expand sk_buff
@@ -898,6 +932,7 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
return n;
}
+EXPORT_SYMBOL(skb_copy_expand);
/**
* skb_pad - zero pad the tail of an skb
@@ -943,6 +978,7 @@ free_skb:
kfree_skb(skb);
return err;
}
+EXPORT_SYMBOL(skb_pad);
/**
* skb_put - add data to a buffer
@@ -1100,6 +1136,7 @@ done:
return 0;
}
+EXPORT_SYMBOL(___pskb_trim);
/**
* __pskb_pull_tail - advance tail of skb header
@@ -1193,8 +1230,7 @@ unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta)
insp = list;
}
if (!pskb_pull(list, eat)) {
- if (clone)
- kfree_skb(clone);
+ kfree_skb(clone);
return NULL;
}
break;
@@ -1238,6 +1274,7 @@ pull_pages:
return skb_tail_pointer(skb);
}
+EXPORT_SYMBOL(__pskb_pull_tail);
/* Copy some data bits from skb to kernel buffer. */
@@ -1315,6 +1352,7 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len)
fault:
return -EFAULT;
}
+EXPORT_SYMBOL(skb_copy_bits);
/*
* Callback from splice_to_pipe(), if we need to release some pages
@@ -1325,14 +1363,39 @@ static void sock_spd_release(struct splice_pipe_desc *spd, unsigned int i)
put_page(spd->pages[i]);
}
-static inline struct page *linear_to_page(struct page *page, unsigned int len,
- unsigned int offset)
+static inline struct page *linear_to_page(struct page *page, unsigned int *len,
+ unsigned int *offset,
+ struct sk_buff *skb)
{
- struct page *p = alloc_pages(GFP_KERNEL, 0);
+ struct sock *sk = skb->sk;
+ struct page *p = sk->sk_sndmsg_page;
+ unsigned int off;
- if (!p)
- return NULL;
- memcpy(page_address(p) + offset, page_address(page) + offset, len);
+ if (!p) {
+new_page:
+ p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0);
+ if (!p)
+ return NULL;
+
+ off = sk->sk_sndmsg_off = 0;
+ /* hold one ref to this page until it's full */
+ } else {
+ unsigned int mlen;
+
+ off = sk->sk_sndmsg_off;
+ mlen = PAGE_SIZE - off;
+ if (mlen < 64 && mlen < *len) {
+ put_page(p);
+ goto new_page;
+ }
+
+ *len = min_t(unsigned int, *len, mlen);
+ }
+
+ memcpy(page_address(p) + off, page_address(page) + *offset, *len);
+ sk->sk_sndmsg_off += *len;
+ *offset = off;
+ get_page(p);
return p;
}
@@ -1341,21 +1404,21 @@ static inline struct page *linear_to_page(struct page *page, unsigned int len,
* Fill page/offset/length into spd, if it can hold more pages.
*/
static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
- unsigned int len, unsigned int offset,
+ unsigned int *len, unsigned int offset,
struct sk_buff *skb, int linear)
{
if (unlikely(spd->nr_pages == PIPE_BUFFERS))
return 1;
if (linear) {
- page = linear_to_page(page, len, offset);
+ page = linear_to_page(page, len, &offset, skb);
if (!page)
return 1;
} else
get_page(page);
spd->pages[spd->nr_pages] = page;
- spd->partial[spd->nr_pages].len = len;
+ spd->partial[spd->nr_pages].len = *len;
spd->partial[spd->nr_pages].offset = offset;
spd->nr_pages++;
@@ -1365,8 +1428,13 @@ static inline int spd_fill_page(struct splice_pipe_desc *spd, struct page *page,
static inline void __segment_seek(struct page **page, unsigned int *poff,
unsigned int *plen, unsigned int off)
{
+ unsigned long n;
+
*poff += off;
- *page += *poff / PAGE_SIZE;
+ n = *poff / PAGE_SIZE;
+ if (n)
+ *page = nth_page(*page, n);
+
*poff = *poff % PAGE_SIZE;
*plen -= off;
}
@@ -1397,7 +1465,7 @@ static inline int __splice_segment(struct page *page, unsigned int poff,
/* the linear region may spread across several pages */
flen = min_t(unsigned int, flen, PAGE_SIZE - poff);
- if (spd_fill_page(spd, page, flen, poff, skb, linear))
+ if (spd_fill_page(spd, page, &flen, poff, skb, linear))
return 1;
__segment_seek(&page, &poff, &plen, flen);
@@ -1590,7 +1658,6 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len)
fault:
return -EFAULT;
}
-
EXPORT_SYMBOL(skb_store_bits);
/* Checksum skb data. */
@@ -1667,6 +1734,7 @@ __wsum skb_checksum(const struct sk_buff *skb, int offset,
return csum;
}
+EXPORT_SYMBOL(skb_checksum);
/* Both of above in one bottle. */
@@ -1748,6 +1816,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
BUG_ON(len);
return csum;
}
+EXPORT_SYMBOL(skb_copy_and_csum_bits);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
{
@@ -1774,6 +1843,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to)
*((__sum16 *)(to + csstuff)) = csum_fold(csum);
}
}
+EXPORT_SYMBOL(skb_copy_and_csum_dev);
/**
* skb_dequeue - remove from the head of the queue
@@ -1794,6 +1864,7 @@ struct sk_buff *skb_dequeue(struct sk_buff_head *list)
spin_unlock_irqrestore(&list->lock, flags);
return result;
}
+EXPORT_SYMBOL(skb_dequeue);
/**
* skb_dequeue_tail - remove from the tail of the queue
@@ -1813,6 +1884,7 @@ struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list)
spin_unlock_irqrestore(&list->lock, flags);
return result;
}
+EXPORT_SYMBOL(skb_dequeue_tail);
/**
* skb_queue_purge - empty a list
@@ -1828,6 +1900,7 @@ void skb_queue_purge(struct sk_buff_head *list)
while ((skb = skb_dequeue(list)) != NULL)
kfree_skb(skb);
}
+EXPORT_SYMBOL(skb_queue_purge);
/**
* skb_queue_head - queue a buffer at the list head
@@ -1848,6 +1921,7 @@ void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk)
__skb_queue_head(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
+EXPORT_SYMBOL(skb_queue_head);
/**
* skb_queue_tail - queue a buffer at the list tail
@@ -1868,6 +1942,7 @@ void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk)
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
+EXPORT_SYMBOL(skb_queue_tail);
/**
* skb_unlink - remove a buffer from a list
@@ -1887,6 +1962,7 @@ void skb_unlink(struct sk_buff *skb, struct sk_buff_head *list)
__skb_unlink(skb, list);
spin_unlock_irqrestore(&list->lock, flags);
}
+EXPORT_SYMBOL(skb_unlink);
/**
* skb_append - append a buffer
@@ -1906,7 +1982,7 @@ void skb_append(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
__skb_queue_after(list, old, newsk);
spin_unlock_irqrestore(&list->lock, flags);
}
-
+EXPORT_SYMBOL(skb_append);
/**
* skb_insert - insert a buffer
@@ -1928,6 +2004,7 @@ void skb_insert(struct sk_buff *old, struct sk_buff *newsk, struct sk_buff_head
__skb_insert(newsk, old->prev, old, list);
spin_unlock_irqrestore(&list->lock, flags);
}
+EXPORT_SYMBOL(skb_insert);
static inline void skb_split_inside_header(struct sk_buff *skb,
struct sk_buff* skb1,
@@ -2006,6 +2083,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len)
else /* Second chunk has no header, nothing to copy. */
skb_split_no_header(skb, skb1, len, pos);
}
+EXPORT_SYMBOL(skb_split);
/* Shifting from/to a cloned skb is a no-go.
*
@@ -2168,6 +2246,7 @@ void skb_prepare_seq_read(struct sk_buff *skb, unsigned int from,
st->frag_idx = st->stepped_offset = 0;
st->frag_data = NULL;
}
+EXPORT_SYMBOL(skb_prepare_seq_read);
/**
* skb_seq_read - Sequentially read skb data
@@ -2255,6 +2334,7 @@ next_skb:
return 0;
}
+EXPORT_SYMBOL(skb_seq_read);
/**
* skb_abort_seq_read - Abort a sequential read of skb data
@@ -2268,6 +2348,7 @@ void skb_abort_seq_read(struct skb_seq_state *st)
if (st->frag_data)
kunmap_skb_frag(st->frag_data);
}
+EXPORT_SYMBOL(skb_abort_seq_read);
#define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb))
@@ -2310,6 +2391,7 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
ret = textsearch_find(config, state);
return (ret <= to - from ? ret : UINT_MAX);
}
+EXPORT_SYMBOL(skb_find_text);
/**
* skb_append_datato_frags: - append the user data to a skb
@@ -2382,6 +2464,7 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
return 0;
}
+EXPORT_SYMBOL(skb_append_datato_frags);
/**
* skb_pull_rcsum - pull skb and update receive checksum
@@ -2569,7 +2652,6 @@ err:
}
return ERR_PTR(err);
}
-
EXPORT_SYMBOL_GPL(skb_segment);
int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
@@ -2577,17 +2659,23 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct sk_buff *p = *head;
struct sk_buff *nskb;
unsigned int headroom;
- unsigned int hlen = p->data - skb_mac_header(p);
- unsigned int len = skb->len;
+ unsigned int len = skb_gro_len(skb);
- if (hlen + p->len + len >= 65536)
+ if (p->len + len >= 65536)
return -E2BIG;
if (skb_shinfo(p)->frag_list)
goto merge;
- else if (!skb_headlen(p) && !skb_headlen(skb) &&
- skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags <
- MAX_SKB_FRAGS) {
+ else if (skb_headlen(skb) <= skb_gro_offset(skb)) {
+ if (skb_shinfo(p)->nr_frags + skb_shinfo(skb)->nr_frags >
+ MAX_SKB_FRAGS)
+ return -E2BIG;
+
+ skb_shinfo(skb)->frags[0].page_offset +=
+ skb_gro_offset(skb) - skb_headlen(skb);
+ skb_shinfo(skb)->frags[0].size -=
+ skb_gro_offset(skb) - skb_headlen(skb);
+
memcpy(skb_shinfo(p)->frags + skb_shinfo(p)->nr_frags,
skb_shinfo(skb)->frags,
skb_shinfo(skb)->nr_frags * sizeof(skb_frag_t));
@@ -2604,7 +2692,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
}
headroom = skb_headroom(p);
- nskb = netdev_alloc_skb(p->dev, headroom);
+ nskb = netdev_alloc_skb(p->dev, headroom + skb_gro_offset(p));
if (unlikely(!nskb))
return -ENOMEM;
@@ -2612,12 +2700,15 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
nskb->mac_len = p->mac_len;
skb_reserve(nskb, headroom);
+ __skb_put(nskb, skb_gro_offset(p));
- skb_set_mac_header(nskb, -hlen);
+ skb_set_mac_header(nskb, skb_mac_header(p) - p->data);
skb_set_network_header(nskb, skb_network_offset(p));
skb_set_transport_header(nskb, skb_transport_offset(p));
- memcpy(skb_mac_header(nskb), skb_mac_header(p), hlen);
+ __skb_pull(p, skb_gro_offset(p));
+ memcpy(skb_mac_header(nskb), skb_mac_header(p),
+ p->data - skb_mac_header(p));
*NAPI_GRO_CB(nskb) = *NAPI_GRO_CB(p);
skb_shinfo(nskb)->frag_list = p;
@@ -2636,6 +2727,17 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
p = nskb;
merge:
+ if (skb_gro_offset(skb) > skb_headlen(skb)) {
+ skb_shinfo(skb)->frags[0].page_offset +=
+ skb_gro_offset(skb) - skb_headlen(skb);
+ skb_shinfo(skb)->frags[0].size -=
+ skb_gro_offset(skb) - skb_headlen(skb);
+ skb_gro_reset_offset(skb);
+ skb_gro_pull(skb, skb_headlen(skb));
+ }
+
+ __skb_pull(skb, skb_gro_offset(skb));
+
p->prev->next = skb;
p->prev = skb;
skb_header_release(skb);
@@ -2747,6 +2849,7 @@ int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int le
return nsg;
}
+EXPORT_SYMBOL_GPL(skb_to_sgvec);
/**
* skb_cow_data - Check that a socket buffer's data buffers are writable
@@ -2856,6 +2959,45 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
return elt;
}
+EXPORT_SYMBOL_GPL(skb_cow_data);
+
+void skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct sock *sk = orig_skb->sk;
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb;
+ int err;
+
+ if (!sk)
+ return;
+
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (hwtstamps) {
+ *skb_hwtstamps(skb) =
+ *hwtstamps;
+ } else {
+ /*
+ * no hardware time stamps available,
+ * so keep the skb_shared_tx and only
+ * store software time stamp
+ */
+ skb->tstamp = ktime_get_real();
+ }
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ err = sock_queue_err_skb(sk, skb);
+ if (err)
+ kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_tstamp_tx);
+
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
@@ -2884,6 +3026,7 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off)
skb->csum_offset = off;
return true;
}
+EXPORT_SYMBOL_GPL(skb_partial_csum_set);
void __skb_warn_lro_forwarding(const struct sk_buff *skb)
{
@@ -2891,42 +3034,4 @@ void __skb_warn_lro_forwarding(const struct sk_buff *skb)
pr_warning("%s: received packets cannot be forwarded"
" while LRO is enabled\n", skb->dev->name);
}
-
-EXPORT_SYMBOL(___pskb_trim);
-EXPORT_SYMBOL(__kfree_skb);
-EXPORT_SYMBOL(kfree_skb);
-EXPORT_SYMBOL(__pskb_pull_tail);
-EXPORT_SYMBOL(__alloc_skb);
-EXPORT_SYMBOL(__netdev_alloc_skb);
-EXPORT_SYMBOL(pskb_copy);
-EXPORT_SYMBOL(pskb_expand_head);
-EXPORT_SYMBOL(skb_checksum);
-EXPORT_SYMBOL(skb_clone);
-EXPORT_SYMBOL(skb_copy);
-EXPORT_SYMBOL(skb_copy_and_csum_bits);
-EXPORT_SYMBOL(skb_copy_and_csum_dev);
-EXPORT_SYMBOL(skb_copy_bits);
-EXPORT_SYMBOL(skb_copy_expand);
-EXPORT_SYMBOL(skb_over_panic);
-EXPORT_SYMBOL(skb_pad);
-EXPORT_SYMBOL(skb_realloc_headroom);
-EXPORT_SYMBOL(skb_under_panic);
-EXPORT_SYMBOL(skb_dequeue);
-EXPORT_SYMBOL(skb_dequeue_tail);
-EXPORT_SYMBOL(skb_insert);
-EXPORT_SYMBOL(skb_queue_purge);
-EXPORT_SYMBOL(skb_queue_head);
-EXPORT_SYMBOL(skb_queue_tail);
-EXPORT_SYMBOL(skb_unlink);
-EXPORT_SYMBOL(skb_append);
-EXPORT_SYMBOL(skb_split);
-EXPORT_SYMBOL(skb_prepare_seq_read);
-EXPORT_SYMBOL(skb_seq_read);
-EXPORT_SYMBOL(skb_abort_seq_read);
-EXPORT_SYMBOL(skb_find_text);
-EXPORT_SYMBOL(skb_append_datato_frags);
EXPORT_SYMBOL(__skb_warn_lro_forwarding);
-
-EXPORT_SYMBOL_GPL(skb_to_sgvec);
-EXPORT_SYMBOL_GPL(skb_cow_data);
-EXPORT_SYMBOL_GPL(skb_partial_csum_set);
diff --git a/net/core/sock.c b/net/core/sock.c
index 5f97caa158e8..0620046e4eba 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -120,6 +120,7 @@
#include <net/net_namespace.h>
#include <net/request_sock.h>
#include <net/sock.h>
+#include <linux/net_tstamp.h>
#include <net/xfrm.h>
#include <linux/ipsec.h>
@@ -149,7 +150,7 @@ static const char *af_family_key_strings[AF_MAX+1] = {
"sk_lock-AF_DECnet", "sk_lock-AF_NETBEUI" , "sk_lock-AF_SECURITY" ,
"sk_lock-AF_KEY" , "sk_lock-AF_NETLINK" , "sk_lock-AF_PACKET" ,
"sk_lock-AF_ASH" , "sk_lock-AF_ECONET" , "sk_lock-AF_ATMSVC" ,
- "sk_lock-21" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
+ "sk_lock-AF_RDS" , "sk_lock-AF_SNA" , "sk_lock-AF_IRDA" ,
"sk_lock-AF_PPPOX" , "sk_lock-AF_WANPIPE" , "sk_lock-AF_LLC" ,
"sk_lock-27" , "sk_lock-28" , "sk_lock-AF_CAN" ,
"sk_lock-AF_TIPC" , "sk_lock-AF_BLUETOOTH", "sk_lock-IUCV" ,
@@ -164,7 +165,7 @@ static const char *af_family_slock_key_strings[AF_MAX+1] = {
"slock-AF_DECnet", "slock-AF_NETBEUI" , "slock-AF_SECURITY" ,
"slock-AF_KEY" , "slock-AF_NETLINK" , "slock-AF_PACKET" ,
"slock-AF_ASH" , "slock-AF_ECONET" , "slock-AF_ATMSVC" ,
- "slock-21" , "slock-AF_SNA" , "slock-AF_IRDA" ,
+ "slock-AF_RDS" , "slock-AF_SNA" , "slock-AF_IRDA" ,
"slock-AF_PPPOX" , "slock-AF_WANPIPE" , "slock-AF_LLC" ,
"slock-27" , "slock-28" , "slock-AF_CAN" ,
"slock-AF_TIPC" , "slock-AF_BLUETOOTH", "slock-AF_IUCV" ,
@@ -179,7 +180,7 @@ static const char *af_family_clock_key_strings[AF_MAX+1] = {
"clock-AF_DECnet", "clock-AF_NETBEUI" , "clock-AF_SECURITY" ,
"clock-AF_KEY" , "clock-AF_NETLINK" , "clock-AF_PACKET" ,
"clock-AF_ASH" , "clock-AF_ECONET" , "clock-AF_ATMSVC" ,
- "clock-21" , "clock-AF_SNA" , "clock-AF_IRDA" ,
+ "clock-AF_RDS" , "clock-AF_SNA" , "clock-AF_IRDA" ,
"clock-AF_PPPOX" , "clock-AF_WANPIPE" , "clock-AF_LLC" ,
"clock-27" , "clock-28" , "clock-AF_CAN" ,
"clock-AF_TIPC" , "clock-AF_BLUETOOTH", "clock-AF_IUCV" ,
@@ -255,11 +256,14 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
-static void sock_disable_timestamp(struct sock *sk)
+static void sock_disable_timestamp(struct sock *sk, int flag)
{
- if (sock_flag(sk, SOCK_TIMESTAMP)) {
- sock_reset_flag(sk, SOCK_TIMESTAMP);
- net_disable_timestamp();
+ if (sock_flag(sk, flag)) {
+ sock_reset_flag(sk, flag);
+ if (!sock_flag(sk, SOCK_TIMESTAMP) &&
+ !sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE)) {
+ net_disable_timestamp();
+ }
}
}
@@ -614,13 +618,38 @@ set_rcvbuf:
else
sock_set_flag(sk, SOCK_RCVTSTAMPNS);
sock_set_flag(sk, SOCK_RCVTSTAMP);
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
} else {
sock_reset_flag(sk, SOCK_RCVTSTAMP);
sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
}
break;
+ case SO_TIMESTAMPING:
+ if (val & ~SOF_TIMESTAMPING_MASK) {
+ ret = EINVAL;
+ break;
+ }
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE,
+ val & SOF_TIMESTAMPING_TX_HARDWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE,
+ val & SOF_TIMESTAMPING_TX_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE,
+ val & SOF_TIMESTAMPING_RX_HARDWARE);
+ if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
+ sock_enable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ else
+ sock_disable_timestamp(sk,
+ SOCK_TIMESTAMPING_RX_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_SOFTWARE,
+ val & SOF_TIMESTAMPING_SOFTWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE,
+ val & SOF_TIMESTAMPING_SYS_HARDWARE);
+ sock_valbool_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE,
+ val & SOF_TIMESTAMPING_RAW_HARDWARE);
+ break;
+
case SO_RCVLOWAT:
if (val < 0)
val = INT_MAX;
@@ -768,6 +797,24 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
break;
+ case SO_TIMESTAMPING:
+ v.val = 0;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_TX_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RX_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_RX_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE))
+ v.val |= SOF_TIMESTAMPING_SOFTWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_SYS_HARDWARE;
+ if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE))
+ v.val |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ break;
+
case SO_RCVTIMEO:
lv=sizeof(struct timeval);
if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
@@ -969,7 +1016,8 @@ void sk_free(struct sock *sk)
rcu_assign_pointer(sk->sk_filter, NULL);
}
- sock_disable_timestamp(sk);
+ sock_disable_timestamp(sk, SOCK_TIMESTAMP);
+ sock_disable_timestamp(sk, SOCK_TIMESTAMPING_RX_SOFTWARE);
if (atomic_read(&sk->sk_omem_alloc))
printk(KERN_DEBUG "%s: optmem leakage (%d bytes) detected.\n",
@@ -1255,10 +1303,9 @@ static long sock_wait_for_wmem(struct sock * sk, long timeo)
* Generic send/receive buffer handlers
*/
-static struct sk_buff *sock_alloc_send_pskb(struct sock *sk,
- unsigned long header_len,
- unsigned long data_len,
- int noblock, int *errcode)
+struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
+ unsigned long data_len, int noblock,
+ int *errcode)
{
struct sk_buff *skb;
gfp_t gfp_mask;
@@ -1338,6 +1385,7 @@ failure:
*errcode = err;
return NULL;
}
+EXPORT_SYMBOL(sock_alloc_send_pskb);
struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
int noblock, int *errcode)
@@ -1786,7 +1834,7 @@ int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
{
struct timeval tv;
if (!sock_flag(sk, SOCK_TIMESTAMP))
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
tv = ktime_to_timeval(sk->sk_stamp);
if (tv.tv_sec == -1)
return -ENOENT;
@@ -1802,7 +1850,7 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
{
struct timespec ts;
if (!sock_flag(sk, SOCK_TIMESTAMP))
- sock_enable_timestamp(sk);
+ sock_enable_timestamp(sk, SOCK_TIMESTAMP);
ts = ktime_to_timespec(sk->sk_stamp);
if (ts.tv_sec == -1)
return -ENOENT;
@@ -1814,11 +1862,20 @@ int sock_get_timestampns(struct sock *sk, struct timespec __user *userstamp)
}
EXPORT_SYMBOL(sock_get_timestampns);
-void sock_enable_timestamp(struct sock *sk)
+void sock_enable_timestamp(struct sock *sk, int flag)
{
- if (!sock_flag(sk, SOCK_TIMESTAMP)) {
- sock_set_flag(sk, SOCK_TIMESTAMP);
- net_enable_timestamp();
+ if (!sock_flag(sk, flag)) {
+ sock_set_flag(sk, flag);
+ /*
+ * we just set one of the two flags which require net
+ * time stamping, but time stamping might have been on
+ * already because of the other one
+ */
+ if (!sock_flag(sk,
+ flag == SOCK_TIMESTAMP ?
+ SOCK_TIMESTAMPING_RX_SOFTWARE :
+ SOCK_TIMESTAMP))
+ net_enable_timestamp();
}
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 83d3398559ea..7db1de0497c6 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,6 +11,7 @@
#include <linux/socket.h>
#include <linux/netdevice.h>
#include <linux/init.h>
+#include <net/ip.h>
#include <net/sock.h>
static struct ctl_table net_core_table[] = {
diff --git a/net/core/utils.c b/net/core/utils.c
index 72e0ebe964a0..83221aee7084 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -22,7 +22,6 @@
#include <linux/net.h>
#include <linux/string.h>
#include <linux/types.h>
-#include <linux/random.h>
#include <linux/percpu.h>
#include <linux/init.h>
#include <net/sock.h>