From 7bd6b91800c996da328bd57e40e62b3f73760fbe Mon Sep 17 00:00:00 2001 From: Larry Finger Date: Thu, 8 Jun 2006 09:47:42 -0500 Subject: [PATCH] wireless: correct dump of WPA IE In net/ieee80211/softmac/ieee80211softmac_wx.c, there is a bug that prints extended sign information whenever the byte value exceeds 0x7f. The following patch changes the printk to use a u8 cast to limit the output to 2 digits. This bug was first noticed by Dan Williams . This patch applies to the current master branch of the Linville tree. Signed-Off-By: Larry Finger Signed-off-by: John W. Linville --- net/ieee80211/softmac/ieee80211softmac_wx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c index 22aa6199185b..0e65ff4e33fc 100644 --- a/net/ieee80211/softmac/ieee80211softmac_wx.c +++ b/net/ieee80211/softmac/ieee80211softmac_wx.c @@ -388,7 +388,7 @@ ieee80211softmac_wx_set_genie(struct net_device *dev, memcpy(mac->wpa.IE, extra, wrqu->data.length); dprintk(KERN_INFO PFX "generic IE set to "); for (i=0;idata.length;i++) - dprintk("%.2x", mac->wpa.IE[i]); + dprintk("%.2x", (u8)mac->wpa.IE[i]); dprintk("\n"); mac->wpa.IElen = wrqu->data.length; } else { -- cgit v1.2.3 From d3dcd4efe2ad1ad1865b2fe5c863c1ebd9482a84 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 19 Jun 2006 23:39:45 -0700 Subject: [NETFILTER]: xt_sctp: fix endless loop caused by 0 chunk length Fix endless loop in the SCTP match similar to those already fixed in the SCTP conntrack helper (was CVE-2006-1527). Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_sctp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index b5110e5b54b0..9316c753692f 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -62,7 +62,7 @@ match_packet(const struct sk_buff *skb, do { sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch); - if (sch == NULL) { + if (sch == NULL || sch->length == 0) { duprintf("Dropping invalid SCTP packet.\n"); *hotdrop = 1; return 0; -- cgit v1.2.3 From 48d83325b61043e3bbd24dd37b9fe433744cf330 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 19 Jun 2006 23:57:59 -0700 Subject: [NET]: Prevent multiple qdisc runs Having two or more qdisc_run's contend against each other is bad because it can induce packet reordering if the packets have to be requeued. It appears that this is an unintended consequence of relinquinshing the queue lock while transmitting. That in turn is needed for devices that spend a lot of time in their transmit routine. There are no advantages to be had as devices with queues are inherently single-threaded (the loopback device is not but then it doesn't have a queue). Even if you were to add a queue to a parallel virtual device (e.g., bolt a tbf filter in front of an ipip tunnel device), you would still want to process the queue in sequence to ensure that the packets are ordered correctly. The solution here is to steal a bit from net_device to prevent this. BTW, as qdisc_restart is no longer used by anyone as a module inside the kernel (IIRC it used to with netif_wake_queue), I have not exported the new __qdisc_run function. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/pkt_sched.h | 7 ++++--- net/sched/sch_generic.c | 11 +++++++++-- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e432b743dda2..39919c882a25 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -233,6 +233,7 @@ enum netdev_state_t __LINK_STATE_RX_SCHED, __LINK_STATE_LINKWATCH_PENDING, __LINK_STATE_DORMANT, + __LINK_STATE_QDISC_RUNNING, }; diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index b94d1ad92c4d..75b5b9333fc7 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -218,12 +218,13 @@ extern struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab); extern void qdisc_put_rtab(struct qdisc_rate_table *tab); -extern int qdisc_restart(struct net_device *dev); +extern void __qdisc_run(struct net_device *dev); static inline void qdisc_run(struct net_device *dev) { - while (!netif_queue_stopped(dev) && qdisc_restart(dev) < 0) - /* NOTHING */; + if (!netif_queue_stopped(dev) && + !test_and_set_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + __qdisc_run(dev); } extern int tc_classify(struct sk_buff *skb, struct tcf_proto *tp, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index b1e4c5e20ac7..d7aca8ef524a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -90,7 +90,7 @@ void qdisc_unlock_tree(struct net_device *dev) NOTE: Called under dev->queue_lock with locally disabled BH. */ -int qdisc_restart(struct net_device *dev) +static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; @@ -179,6 +179,14 @@ requeue: return q->q.qlen; } +void __qdisc_run(struct net_device *dev) +{ + while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) + /* NOTHING */; + + clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); +} + static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; @@ -620,6 +628,5 @@ EXPORT_SYMBOL(qdisc_create_dflt); EXPORT_SYMBOL(qdisc_alloc); EXPORT_SYMBOL(qdisc_destroy); EXPORT_SYMBOL(qdisc_reset); -EXPORT_SYMBOL(qdisc_restart); EXPORT_SYMBOL(qdisc_lock_tree); EXPORT_SYMBOL(qdisc_unlock_tree); -- cgit v1.2.3 From 8ca84481b69513f7bf341c7dd9897023a04d7d1d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Jun 2006 03:26:14 -0700 Subject: [SCTP]: sctp_unpack_cookie() fix sizeof(pointer) != sizeof(array)... Signed-off-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/sctp/sm_make_chunk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 5e0de3c0eead..2a8773691695 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1402,14 +1402,14 @@ struct sctp_association *sctp_unpack_cookie( sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; - memset(digest, 0x00, sizeof(digest)); + memset(digest, 0x00, SCTP_SIGNATURE_SIZE); sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, 1, digest); if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Try the previous key. */ key = (char *)ep->secret_key[ep->last_key]; - memset(digest, 0x00, sizeof(digest)); + memset(digest, 0x00, SCTP_SIGNATURE_SIZE); sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, 1, digest); -- cgit v1.2.3 From ff7512e1a2a3504649d3716a757f43807b6d26ef Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 20 Jun 2006 03:27:27 -0700 Subject: [ATM]: fix broken uses of NIPQUAD in net/atm NIPQUAD expects an l-value of type __be32, _NOT_ a pointer to __be32. Signed-off-by: Al Viro Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- net/atm/mpc.c | 13 +++---------- net/atm/mpoa_caches.c | 12 ++++-------- 2 files changed, 7 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/atm/mpc.c b/net/atm/mpc.c index c304ef1513b9..a48a5d580408 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -229,20 +229,15 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *entry) /* this is buggered - we need locking for qos_head */ void atm_mpoa_disp_qos(struct seq_file *m) { - unsigned char *ip; - char ipaddr[16]; struct atm_mpoa_qos *qos; qos = qos_head; seq_printf(m, "QoS entries for shortcuts:\n"); seq_printf(m, "IP address\n TX:max_pcr pcr min_pcr max_cdv max_sdu\n RX:max_pcr pcr min_pcr max_cdv max_sdu\n"); - ipaddr[sizeof(ipaddr)-1] = '\0'; while (qos != NULL) { - ip = (unsigned char *)&qos->ipaddr; - sprintf(ipaddr, "%u.%u.%u.%u", NIPQUAD(ip)); seq_printf(m, "%u.%u.%u.%u\n %-7d %-7d %-7d %-7d %-7d\n %-7d %-7d %-7d %-7d %-7d\n", - NIPQUAD(ipaddr), + NIPQUAD(qos->ipaddr), qos->qos.txtp.max_pcr, qos->qos.txtp.pcr, qos->qos.txtp.min_pcr, qos->qos.txtp.max_cdv, qos->qos.txtp.max_sdu, qos->qos.rxtp.max_pcr, qos->qos.rxtp.pcr, qos->qos.rxtp.min_pcr, qos->qos.rxtp.max_cdv, qos->qos.rxtp.max_sdu); qos = qos->next; @@ -1083,7 +1078,6 @@ static void MPOA_trigger_rcvd(struct k_message *msg, struct mpoa_client *mpc) static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_client *client, in_cache_entry *entry) { uint32_t dst_ip = msg->content.in_info.in_dst_ip; - unsigned char *ip __attribute__ ((unused)) = (unsigned char *)&dst_ip; struct atm_mpoa_qos *qos = atm_mpoa_search_qos(dst_ip); eg_cache_entry *eg_entry = client->eg_ops->get_by_src_ip(dst_ip, client); @@ -1097,7 +1091,7 @@ static void check_qos_and_open_shortcut(struct k_message *msg, struct mpoa_clien entry->shortcut = eg_entry->shortcut; } if(entry->shortcut){ - dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(ip)); + dprintk("mpoa: (%s) using egress SVC to reach %u.%u.%u.%u\n",client->dev->name, NIPQUAD(dst_ip)); client->eg_ops->put(eg_entry); return; } @@ -1123,8 +1117,7 @@ static void MPOA_res_reply_rcvd(struct k_message *msg, struct mpoa_client *mpc) uint32_t dst_ip = msg->content.in_info.in_dst_ip; in_cache_entry *entry = mpc->in_ops->get(dst_ip, mpc); - ip = (unsigned char *)&dst_ip; - dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(ip)); + dprintk("mpoa: (%s) MPOA_res_reply_rcvd: ip %u.%u.%u.%u\n", mpc->dev->name, NIPQUAD(dst_ip)); ddprintk("mpoa: (%s) MPOA_res_reply_rcvd() entry = %p", mpc->dev->name, entry); if(entry == NULL){ printk("\nmpoa: (%s) ARGH, received res. reply for an entry that doesn't exist.\n", mpc->dev->name); diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 64ddebb64060..781ed1b9329d 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -223,7 +223,6 @@ static void in_cache_remove_entry(in_cache_entry *entry, but an easy one... */ static void clear_count_and_expired(struct mpoa_client *client) { - unsigned char *ip; in_cache_entry *entry, *next_entry; struct timeval now; @@ -236,8 +235,7 @@ static void clear_count_and_expired(struct mpoa_client *client) next_entry = entry->next; if((now.tv_sec - entry->tv.tv_sec) > entry->ctrl_info.holding_time){ - ip = (unsigned char*)&entry->ctrl_info.in_dst_ip; - dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: holding time expired, ip = %u.%u.%u.%u\n", NIPQUAD(entry->ctrl_info.in_dst_ip)); client->in_ops->remove_entry(entry, client); } entry = next_entry; @@ -455,7 +453,6 @@ static void eg_cache_remove_entry(eg_cache_entry *entry, static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_client *client) { - unsigned char *ip; eg_cache_entry *entry = kmalloc(sizeof(eg_cache_entry), GFP_KERNEL); if (entry == NULL) { @@ -463,8 +460,7 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli return NULL; } - ip = (unsigned char *)&msg->content.eg_info.eg_dst_ip; - dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: adding an egress entry, ip = %u.%u.%u.%u, this should be our IP\n", NIPQUAD(msg->content.eg_info.eg_dst_ip)); memset(entry, 0, sizeof(eg_cache_entry)); atomic_set(&entry->use, 1); @@ -481,8 +477,8 @@ static eg_cache_entry *eg_cache_add_entry(struct k_message *msg, struct mpoa_cli do_gettimeofday(&(entry->tv)); entry->entry_state = EGRESS_RESOLVED; dprintk("mpoa: mpoa_caches.c: new_eg_cache_entry cache_id %lu\n", ntohl(entry->ctrl_info.cache_id)); - ip = (unsigned char *)&entry->ctrl_info.mps_ip; - dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", NIPQUAD(ip)); + dprintk("mpoa: mpoa_caches.c: mps_ip = %u.%u.%u.%u\n", + NIPQUAD(entry->ctrl_info.mps_ip)); atomic_inc(&entry->use); write_unlock_irq(&client->egress_lock); -- cgit v1.2.3 From 5b057c6b1a25d57edf2b4d1e956e50936480a9ff Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 23 Jun 2006 02:06:41 -0700 Subject: [NET]: Avoid allocating skb in skb_pad First of all it is unnecessary to allocate a new skb in skb_pad since the existing one is not shared. More importantly, our hard_start_xmit interface does not allow a new skb to be allocated since that breaks requeueing. This patch uses pskb_expand_head to expand the existing skb and linearize it if needed. Actually, someone should sift through every instance of skb_pad on a non-linear skb as they do not fit the reasons why this was originally created. Incidentally, this fixes a minor bug when the skb is cloned (tcpdump, TCP, etc.). As it is skb_pad will simply write over a cloned skb. Because of the position of the write it is unlikely to cause problems but still it's best if we don't do it. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/3c527.c | 3 +-- drivers/net/82596.c | 3 +-- drivers/net/a2065.c | 3 +-- drivers/net/ariadne.c | 3 +-- drivers/net/arm/ether1.c | 3 +-- drivers/net/arm/ether3.c | 3 +-- drivers/net/atarilance.c | 3 +-- drivers/net/cassini.c | 3 +-- drivers/net/declance.c | 3 +-- drivers/net/depca.c | 7 ++----- drivers/net/eepro.c | 3 +-- drivers/net/eexpress.c | 3 +-- drivers/net/epic100.c | 7 ++----- drivers/net/eth16i.c | 3 +-- drivers/net/hp100.c | 7 ++----- drivers/net/lance.c | 3 +-- drivers/net/lasi_82596.c | 3 +-- drivers/net/lp486e.c | 3 +-- drivers/net/myri10ge/myri10ge.c | 3 +-- drivers/net/pcmcia/fmvj18x_cs.c | 3 +-- drivers/net/pcmcia/xirc2ps_cs.c | 3 +-- drivers/net/r8169.c | 3 +-- drivers/net/seeq8005.c | 3 +-- drivers/net/sis190.c | 3 +-- drivers/net/sk98lin/skge.c | 2 +- drivers/net/skge.c | 3 +-- drivers/net/smc9194.c | 3 +-- drivers/net/sonic.c | 3 +-- drivers/net/starfire.c | 3 +-- drivers/net/via-rhine.c | 7 ++----- drivers/net/wireless/ray_cs.c | 3 +-- drivers/net/wireless/wavelan_cs.c | 7 ++----- drivers/net/yellowfin.c | 12 +++++------- drivers/net/znet.c | 3 +-- include/linux/skbuff.h | 11 +++++------ net/core/skbuff.c | 36 ++++++++++++++++++++++++++---------- 36 files changed, 74 insertions(+), 103 deletions(-) (limited to 'net') diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c index 1b1cb0026072..157eda573925 100644 --- a/drivers/net/3c527.c +++ b/drivers/net/3c527.c @@ -1031,8 +1031,7 @@ static int mc32_send_packet(struct sk_buff *skb, struct net_device *dev) return 1; } - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) { + if (skb_padto(skb, ETH_ZLEN)) { netif_wake_queue(dev); return 0; } diff --git a/drivers/net/82596.c b/drivers/net/82596.c index da0c878dcba8..8a9f7d61b9b1 100644 --- a/drivers/net/82596.c +++ b/drivers/net/82596.c @@ -1070,8 +1070,7 @@ static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->len, (unsigned int)skb->data)); if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c index 79bb56b8dcef..71165ac0257a 100644 --- a/drivers/net/a2065.c +++ b/drivers/net/a2065.c @@ -573,8 +573,7 @@ static int lance_start_xmit (struct sk_buff *skb, struct net_device *dev) if (len < ETH_ZLEN) { len = ETH_ZLEN; - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; } diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c index d1b6b1f794e2..a9bb7a4aff98 100644 --- a/drivers/net/ariadne.c +++ b/drivers/net/ariadne.c @@ -607,8 +607,7 @@ static int ariadne_start_xmit(struct sk_buff *skb, struct net_device *dev) /* FIXME: is the 79C960 new enough to do its own padding right ? */ if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; len = ETH_ZLEN; } diff --git a/drivers/net/arm/ether1.c b/drivers/net/arm/ether1.c index 36475eb2727f..312955d07b28 100644 --- a/drivers/net/arm/ether1.c +++ b/drivers/net/arm/ether1.c @@ -700,8 +700,7 @@ ether1_sendpacket (struct sk_buff *skb, struct net_device *dev) } if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) goto out; } diff --git a/drivers/net/arm/ether3.c b/drivers/net/arm/ether3.c index f1d5b1027ff7..081074180e62 100644 --- a/drivers/net/arm/ether3.c +++ b/drivers/net/arm/ether3.c @@ -518,8 +518,7 @@ ether3_sendpacket(struct sk_buff *skb, struct net_device *dev) length = (length + 1) & ~1; if (length != skb->len) { - skb = skb_padto(skb, length); - if (skb == NULL) + if (skb_padto(skb, length)) goto out; } diff --git a/drivers/net/atarilance.c b/drivers/net/atarilance.c index 442b2cbeb58a..91783a8008be 100644 --- a/drivers/net/atarilance.c +++ b/drivers/net/atarilance.c @@ -804,8 +804,7 @@ static int lance_start_xmit( struct sk_buff *skb, struct net_device *dev ) ++len; if (len > skb->len) { - skb = skb_padto(skb, len); - if (skb == NULL) + if (skb_padto(skb, len)) return 0; } diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c index 39f36aa05aa8..565a54f1d06a 100644 --- a/drivers/net/cassini.c +++ b/drivers/net/cassini.c @@ -2915,8 +2915,7 @@ static int cas_start_xmit(struct sk_buff *skb, struct net_device *dev) */ static int ring; - skb = skb_padto(skb, cp->min_frame_size); - if (!skb) + if (skb_padto(skb, cp->min_frame_size)) return 0; /* XXX: we need some higher-level QoS hooks to steer packets to diff --git a/drivers/net/declance.c b/drivers/net/declance.c index f130bdab3fd3..d3d958e7ac56 100644 --- a/drivers/net/declance.c +++ b/drivers/net/declance.c @@ -885,8 +885,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) len = skblen; if (len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; len = ETH_ZLEN; } diff --git a/drivers/net/depca.c b/drivers/net/depca.c index 0941d40f046f..e946c43d3b10 100644 --- a/drivers/net/depca.c +++ b/drivers/net/depca.c @@ -938,11 +938,8 @@ static int depca_start_xmit(struct sk_buff *skb, struct net_device *dev) if (skb->len < 1) goto out; - if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - goto out; - } + if (skb_padto(skb, ETH_ZLEN)) + goto out; netif_stop_queue(dev); diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c index a806dfe54d23..e70f172699db 100644 --- a/drivers/net/eepro.c +++ b/drivers/net/eepro.c @@ -1154,8 +1154,7 @@ static int eepro_send_packet(struct sk_buff *skb, struct net_device *dev) printk(KERN_DEBUG "%s: entering eepro_send_packet routine.\n", dev->name); if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c index 82bd356e4f3a..a74b20715755 100644 --- a/drivers/net/eexpress.c +++ b/drivers/net/eexpress.c @@ -677,8 +677,7 @@ static int eexp_xmit(struct sk_buff *buf, struct net_device *dev) #endif if (buf->len < ETH_ZLEN) { - buf = skb_padto(buf, ETH_ZLEN); - if (buf == NULL) + if (skb_padto(buf, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c index 8d680ce600d7..724d7dc35fa3 100644 --- a/drivers/net/epic100.c +++ b/drivers/net/epic100.c @@ -1027,11 +1027,8 @@ static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 ctrl_word; unsigned long flags; - if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - return 0; - } + if (skb_padto(skb, ETH_ZLEN)) + return 0; /* Caution: the write order is important here, set the field with the "ownership" bit last. */ diff --git a/drivers/net/eth16i.c b/drivers/net/eth16i.c index b67545be2caa..4bf76f86d8e9 100644 --- a/drivers/net/eth16i.c +++ b/drivers/net/eth16i.c @@ -1064,8 +1064,7 @@ static int eth16i_tx(struct sk_buff *skb, struct net_device *dev) unsigned long flags; if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c index 247c8ca86033..dd1dc32dc98d 100644 --- a/drivers/net/hp100.c +++ b/drivers/net/hp100.c @@ -1487,11 +1487,8 @@ static int hp100_start_xmit_bm(struct sk_buff *skb, struct net_device *dev) if (skb->len <= 0) return 0; - if (skb->len < ETH_ZLEN && lp->chip == HP100_CHIPID_SHASTA) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - return 0; - } + if (lp->chip == HP100_CHIPID_SHASTA && skb_padto(skb, ETH_ZLEN)) + return 0; /* Get Tx ring tail pointer */ if (lp->txrtail->next == lp->txrhead) { diff --git a/drivers/net/lance.c b/drivers/net/lance.c index bb5ad479210b..c1c3452c90ca 100644 --- a/drivers/net/lance.c +++ b/drivers/net/lance.c @@ -968,8 +968,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev) /* The old LANCE chips doesn't automatically pad buffers to min. size. */ if (chip_table[lp->chip_version].flags & LANCE_MUST_PAD) { if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) goto out; lp->tx_ring[entry].length = -ETH_ZLEN; } diff --git a/drivers/net/lasi_82596.c b/drivers/net/lasi_82596.c index 957888de3d7e..1ab09447baa5 100644 --- a/drivers/net/lasi_82596.c +++ b/drivers/net/lasi_82596.c @@ -1083,8 +1083,7 @@ static int i596_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->len, skb->data)); if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c index 94d5ea1ce8bd..bf3f343ae715 100644 --- a/drivers/net/lp486e.c +++ b/drivers/net/lp486e.c @@ -877,8 +877,7 @@ static int i596_start_xmit (struct sk_buff *skb, struct net_device *dev) { length = skb->len; if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 5a74f63618bc..b983e1e04348 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1939,8 +1939,7 @@ again: /* pad frames to at least ETH_ZLEN bytes */ if (unlikely(skb->len < ETH_ZLEN)) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) { + if (skb_padto(skb, ETH_ZLEN)) { /* The packet is gone, so we must * return 0 */ mgp->stats.tx_dropped += 1; diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c index 09b11761cdfa..ea93b8f18605 100644 --- a/drivers/net/pcmcia/fmvj18x_cs.c +++ b/drivers/net/pcmcia/fmvj18x_cs.c @@ -831,8 +831,7 @@ static int fjn_start_xmit(struct sk_buff *skb, struct net_device *dev) if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c index e80d1e3aec68..9bae77ce1314 100644 --- a/drivers/net/pcmcia/xirc2ps_cs.c +++ b/drivers/net/pcmcia/xirc2ps_cs.c @@ -1374,8 +1374,7 @@ do_start_xmit(struct sk_buff *skb, struct net_device *dev) */ if (pktlen < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; pktlen = ETH_ZLEN; } diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 9945cc6b8d90..985afe0e6273 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2222,8 +2222,7 @@ static int rtl8169_start_xmit(struct sk_buff *skb, struct net_device *dev) len = skb->len; if (unlikely(len < ETH_ZLEN)) { - skb = skb_padto(skb, ETH_ZLEN); - if (!skb) + if (skb_padto(skb, ETH_ZLEN)) goto err_update_stats; len = ETH_ZLEN; } diff --git a/drivers/net/seeq8005.c b/drivers/net/seeq8005.c index bcef03feb2fc..efd0f235020f 100644 --- a/drivers/net/seeq8005.c +++ b/drivers/net/seeq8005.c @@ -396,8 +396,7 @@ static int seeq8005_send_packet(struct sk_buff *skb, struct net_device *dev) unsigned char *buf; if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c index 31dd3f036fa8..df39f3447655 100644 --- a/drivers/net/sis190.c +++ b/drivers/net/sis190.c @@ -1156,8 +1156,7 @@ static int sis190_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t mapping; if (unlikely(skb->len < ETH_ZLEN)) { - skb = skb_padto(skb, ETH_ZLEN); - if (!skb) { + if (skb_padto(skb, ETH_ZLEN)) { tp->stats.tx_dropped++; goto out; } diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c index 38a26df4095f..f3efbd177ae7 100644 --- a/drivers/net/sk98lin/skge.c +++ b/drivers/net/sk98lin/skge.c @@ -1525,7 +1525,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ ** This is to resolve faulty padding by the HW with 0xaa bytes. */ if (BytesSend < C_LEN_ETHERNET_MINSIZE) { - if ((pMessage = skb_padto(pMessage, C_LEN_ETHERNET_MINSIZE)) == NULL) { + if (skb_padto(pMessage, C_LEN_ETHERNET_MINSIZE)) { spin_unlock_irqrestore(&pTxPort->TxDesRingLock, Flags); return 0; } diff --git a/drivers/net/skge.c b/drivers/net/skge.c index 536dd1cf7f79..19a4a16055dc 100644 --- a/drivers/net/skge.c +++ b/drivers/net/skge.c @@ -2310,8 +2310,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) u64 map; unsigned long flags; - skb = skb_padto(skb, ETH_ZLEN); - if (!skb) + if (skb_padto(skb, ETH_ZLEN)) return NETDEV_TX_OK; if (!spin_trylock_irqsave(&skge->tx_lock, flags)) diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c index 6cf16f322ad5..8b0321f1976c 100644 --- a/drivers/net/smc9194.c +++ b/drivers/net/smc9194.c @@ -523,8 +523,7 @@ static int smc_wait_to_send_packet( struct sk_buff * skb, struct net_device * de length = skb->len; if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) { + if (skb_padto(skb, ETH_ZLEN)) { netif_wake_queue(dev); return 0; } diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c index 90b818a8de6e..cab0dd958492 100644 --- a/drivers/net/sonic.c +++ b/drivers/net/sonic.c @@ -231,8 +231,7 @@ static int sonic_send_packet(struct sk_buff *skb, struct net_device *dev) length = skb->len; if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index 9b7805be21da..c158eedc7813 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -1349,8 +1349,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE) if (skb->ip_summed == CHECKSUM_HW) { - skb = skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK); - if (skb == NULL) + if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK)) return NETDEV_TX_OK; } #endif /* ZEROCOPY && HAS_BROKEN_FIRMWARE */ diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index fdc21037f6dc..c80a4f1d5f7a 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -1284,11 +1284,8 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev) /* Calculate the next Tx descriptor entry. */ entry = rp->cur_tx % TX_RING_SIZE; - if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - return 0; - } + if (skb_padto(skb, ETH_ZLEN)) + return 0; rp->tx_skbuff[entry] = skb; diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c index 879eb427607c..a915fe6c6aa5 100644 --- a/drivers/net/wireless/ray_cs.c +++ b/drivers/net/wireless/ray_cs.c @@ -924,8 +924,7 @@ static int ray_dev_start_xmit(struct sk_buff *skb, struct net_device *dev) if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/drivers/net/wireless/wavelan_cs.c b/drivers/net/wireless/wavelan_cs.c index f7724eb2fa7e..561250f73fd3 100644 --- a/drivers/net/wireless/wavelan_cs.c +++ b/drivers/net/wireless/wavelan_cs.c @@ -3194,11 +3194,8 @@ wavelan_packet_xmit(struct sk_buff * skb, * and we don't have the Ethernet specific requirement of beeing * able to detect collisions, therefore in theory we don't really * need to pad. Jean II */ - if (skb->len < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) - return 0; - } + if (skb_padto(skb, ETH_ZLEN)) + return 0; wv_packet_write(dev, skb->data, skb->len); diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c index fd0f43b7db5b..ecec8e5db786 100644 --- a/drivers/net/yellowfin.c +++ b/drivers/net/yellowfin.c @@ -862,13 +862,11 @@ static int yellowfin_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Fix GX chipset errata. */ if (cacheline_end > 24 || cacheline_end == 0) { len = skb->len + 32 - cacheline_end + 1; - if (len != skb->len) - skb = skb_padto(skb, len); - } - if (skb == NULL) { - yp->tx_skbuff[entry] = NULL; - netif_wake_queue(dev); - return 0; + if (skb_padto(skb, len)) { + yp->tx_skbuff[entry] = NULL; + netif_wake_queue(dev); + return 0; + } } } yp->tx_skbuff[entry] = skb; diff --git a/drivers/net/znet.c b/drivers/net/znet.c index 3ac047bc727d..a7c089df66e6 100644 --- a/drivers/net/znet.c +++ b/drivers/net/znet.c @@ -544,8 +544,7 @@ static int znet_send_packet(struct sk_buff *skb, struct net_device *dev) printk(KERN_DEBUG "%s: ZNet_send_packet.\n", dev->name); if (length < ETH_ZLEN) { - skb = skb_padto(skb, ETH_ZLEN); - if (skb == NULL) + if (skb_padto(skb, ETH_ZLEN)) return 0; length = ETH_ZLEN; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 66f8819f9568..f8c7eb79a27f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -345,7 +345,7 @@ extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom, int newtailroom, gfp_t priority); -extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); +extern int skb_pad(struct sk_buff *skb, int pad); #define dev_kfree_skb(a) kfree_skb(a) extern void skb_over_panic(struct sk_buff *skb, int len, void *here); @@ -1122,16 +1122,15 @@ static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) * * Pads up a buffer to ensure the trailing bytes exist and are * blanked. If the buffer already contains sufficient data it - * is untouched. Returns the buffer, which may be a replacement - * for the original, or NULL for out of memory - in which case - * the original buffer is still freed. + * is untouched. Otherwise it is extended. Returns zero on + * success. The skb is freed on error. */ -static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) +static inline int skb_padto(struct sk_buff *skb, unsigned int len) { unsigned int size = skb->len; if (likely(size >= len)) - return skb; + return 0; return skb_pad(skb, len-size); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index bb7210f4005e..fe63d4efbd4d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -781,24 +781,40 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return NULL in out of memory cases. + * May return error in out of memory cases. The skb is freed on error. */ -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) +int skb_pad(struct sk_buff *skb, int pad) { - struct sk_buff *nskb; + int err; + int ntail; /* If the skbuff is non linear tailroom is always zero.. */ - if (skb_tailroom(skb) >= pad) { + if (!skb_cloned(skb) && skb_tailroom(skb) >= pad) { memset(skb->data+skb->len, 0, pad); - return skb; + return 0; } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + err = pskb_expand_head(skb, 0, ntail, GFP_ATOMIC); + if (unlikely(err)) + goto free_skb; + } + + /* FIXME: The use of this function with non-linear skb's really needs + * to be audited. + */ + err = skb_linearize(skb); + if (unlikely(err)) + goto free_skb; + + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: kfree_skb(skb); - if (nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; + return err; } /* Trims skb to length len. It can change skb pointers. -- cgit v1.2.3 From 102128e3a27821bdcbacb10f4f2bba253f587ba4 Mon Sep 17 00:00:00 2001 From: Łukasz Stelmach Date: Thu, 22 Jun 2006 01:37:19 -0700 Subject: [IPV6]: Fix source address selection. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two additional labels (RFC 3484, sec. 10.3) for IPv6 addreses are defined to make a distinction between global unicast addresses and Unique Local Addresses (fc00::/7, RFC 4193) and Teredo (2001::/32, RFC 4380). It is necessary to avoid attempts of connection that would either fail (eg. fec0:: to 2001:feed::) or be sub-optimal (2001:0:: to 2001:feed::). Signed-off-by: Łukasz Stelmach Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c2c26fa0943d..6b361fc3e14d 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -862,6 +862,8 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) * 2002::/16 2 * ::/96 3 * ::ffff:0:0/96 4 + * fc00::/7 5 + * 2001::/32 6 */ if (type & IPV6_ADDR_LOOPBACK) return 0; @@ -869,8 +871,12 @@ static int inline ipv6_saddr_label(const struct in6_addr *addr, int type) return 3; else if (type & IPV6_ADDR_MAPPED) return 4; + else if (addr->s6_addr32[0] == htonl(0x20010000)) + return 6; else if (addr->s6_addr16[0] == htons(0x2002)) return 2; + else if ((addr->s6_addr[0] & 0xfe) == 0xfc) + return 5; return 1; } -- cgit v1.2.3 From 5e2707fa3aed8c24075087cbaea2628725adbe55 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 22 Jun 2006 01:41:18 -0700 Subject: [IPV6] ADDRCONF: Fix default source address selection without CONFIG_IPV6_PRIVACY We need to update hiscore.rule even if we don't enable CONFIG_IPV6_PRIVACY, because we have more less significant rule; longest match. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6b361fc3e14d..4da664538f52 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1075,6 +1075,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, if (hiscore.attrs & IPV6_SADDR_SCORE_PRIVACY) continue; } +#else + if (hiscore.rule < 7) + hiscore.rule++; #endif /* Rule 8: Use longest matching prefix */ if (hiscore.rule < 8) { -- cgit v1.2.3 From d4828d85d188dc70ed172802e798d3978bb6e29e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 02:28:18 -0700 Subject: [NET]: Prevent transmission after dev_deactivate The dev_deactivate function has bit-rotted since the introduction of lockless drivers. In particular, the spin_unlock_wait call at the end has no effect on the xmit routine of lockless drivers. With a little bit of work, we can make it much more useful by providing the guarantee that when it returns, no more calls to the xmit routine of the underlying driver will be made. The idea is simple. There are two entry points in to the xmit routine. The first comes from dev_queue_xmit. That one is easily stopped by using synchronize_rcu. This works because we set the qdisc to noop_qdisc before the synchronize_rcu call. That in turn causes all subsequent packets sent to dev_queue_xmit to be dropped. The synchronize_rcu call also ensures all outstanding calls leave their critical section. The other entry point is from qdisc_run. Since we now have a bit that indicates whether it's running, all we have to do is to wait until the bit is off. I've removed the loop to wait for __LINK_STATE_SCHED to clear. This is useless because netif_wake_queue can cause it to be set again. It is also harmless because we've disarmed qdisc_run. I've also removed the spin_unlock_wait on xmit_lock because its only purpose of making sure that all outstanding xmit_lock holders have exited is also given by dev_watchdog_down. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/core/dev.c | 6 +++--- net/sched/sch_generic.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/core/dev.c b/net/core/dev.c index ab39fe17cb58..29e3888102bc 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1295,7 +1295,7 @@ int dev_queue_xmit(struct sk_buff *skb) /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ - local_bh_disable(); + rcu_read_lock_bh(); /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a @@ -1369,13 +1369,13 @@ int dev_queue_xmit(struct sk_buff *skb) } rc = -ENETDOWN; - local_bh_enable(); + rcu_read_unlock_bh(); out_kfree_skb: kfree_skb(skb); return rc; out: - local_bh_enable(); + rcu_read_unlock_bh(); return rc; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index d7aca8ef524a..7aad0121232c 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -181,9 +181,13 @@ requeue: void __qdisc_run(struct net_device *dev) { + if (unlikely(dev->qdisc == &noop_qdisc)) + goto out; + while (qdisc_restart(dev) < 0 && !netif_queue_stopped(dev)) /* NOTHING */; +out: clear_bit(__LINK_STATE_QDISC_RUNNING, &dev->state); } @@ -583,10 +587,12 @@ void dev_deactivate(struct net_device *dev) dev_watchdog_down(dev); - while (test_bit(__LINK_STATE_SCHED, &dev->state)) - yield(); + /* Wait for outstanding dev_queue_xmit calls. */ + synchronize_rcu(); - spin_unlock_wait(&dev->_xmit_lock); + /* Wait for outstanding qdisc_run calls. */ + while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) + yield(); } void dev_init_scheduler(struct net_device *dev) -- cgit v1.2.3 From 7967168cefdbc63bf332d6b1548eca7cd65ebbcc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 02:40:14 -0700 Subject: [NET]: Merge TSO/UFO fields in sk_buff Having separate fields in sk_buff for TSO/UFO (tso_size/ufo_size) is not going to scale if we add any more segmentation methods (e.g., DCCP). So let's merge them. They were used to tell the protocol of a packet. This function has been subsumed by the new gso_type field. This is essentially a set of netdev feature bits (shifted by 16 bits) that are required to process a specific skb. As such it's easy to tell whether a given device can process a GSO skb: you just have to and the gso_type field and the netdev's features field. I've made gso_type a conjunction. The idea is that you have a base type (e.g., SKB_GSO_TCPV4) that can be modified further to support new features. For example, if we add a hardware TSO type that supports ECN, they would declare NETIF_F_TSO | NETIF_F_TSO_ECN. All TSO packets with CWR set would have a gso_type of SKB_GSO_TCPV4 | SKB_GSO_TCPV4_ECN while all other TSO packets would be SKB_GSO_TCPV4. This means that only the CWR packets need to be emulated in software. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/net/8139cp.c | 2 +- drivers/net/bnx2.c | 4 ++-- drivers/net/chelsio/sge.c | 4 ++-- drivers/net/e1000/e1000_main.c | 10 ++++----- drivers/net/forcedeth.c | 4 ++-- drivers/net/ixgb/ixgb_main.c | 4 ++-- drivers/net/loopback.c | 4 ++-- drivers/net/myri10ge/myri10ge.c | 4 ++-- drivers/net/r8169.c | 2 +- drivers/net/s2io.c | 16 +++++++------- drivers/net/sky2.c | 4 ++-- drivers/net/tg3.c | 4 ++-- drivers/net/typhoon.c | 2 +- drivers/s390/net/qeth_eddp.c | 12 +++++------ drivers/s390/net/qeth_main.c | 4 ++-- drivers/s390/net/qeth_tso.h | 2 +- include/linux/netdevice.h | 14 ++++++++++-- include/linux/skbuff.h | 12 ++++++++--- include/net/tcp.h | 4 ++-- net/bridge/br_forward.c | 4 ++-- net/bridge/br_netfilter.c | 2 +- net/core/skbuff.c | 16 ++++++++------ net/ipv4/ip_output.c | 16 ++++++++------ net/ipv4/tcp.c | 4 ++-- net/ipv4/tcp_input.c | 2 +- net/ipv4/tcp_output.c | 47 ++++++++++++++++++++++++----------------- net/ipv6/ip6_output.c | 7 +++--- 27 files changed, 120 insertions(+), 90 deletions(-) (limited to 'net') diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c index a26077a175ad..0cdc830449d8 100644 --- a/drivers/net/8139cp.c +++ b/drivers/net/8139cp.c @@ -797,7 +797,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) entry = cp->tx_head; eor = (entry == (CP_TX_RING_SIZE - 1)) ? RingEnd : 0; if (dev->features & NETIF_F_TSO) - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; if (skb_shinfo(skb)->nr_frags == 0) { struct cp_desc *txd = &cp->tx_ring[entry]; diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c index 702d546567ad..7635736cc791 100644 --- a/drivers/net/bnx2.c +++ b/drivers/net/bnx2.c @@ -1640,7 +1640,7 @@ bnx2_tx_int(struct bnx2 *bp) skb = tx_buf->skb; #ifdef BCM_TSO /* partial BD completions possible with TSO packets */ - if (skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->gso_size) { u16 last_idx, last_ring_idx; last_idx = sw_cons + @@ -4428,7 +4428,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) (TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16)); } #ifdef BCM_TSO - if ((mss = skb_shinfo(skb)->tso_size) && + if ((mss = skb_shinfo(skb)->gso_size) && (skb->len > (bp->dev->mtu + ETH_HLEN))) { u32 tcp_opt_len, ip_tcp_len; diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c index 4391bf4bf573..53efff6da784 100644 --- a/drivers/net/chelsio/sge.c +++ b/drivers/net/chelsio/sge.c @@ -1418,7 +1418,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) struct cpl_tx_pkt *cpl; #ifdef NETIF_F_TSO - if (skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->gso_size) { int eth_type; struct cpl_tx_pkt_lso *hdr; @@ -1433,7 +1433,7 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) hdr->ip_hdr_words = skb->nh.iph->ihl; hdr->tcp_hdr_words = skb->h.th->doff; hdr->eth_type_mss = htons(MK_ETH_TYPE_MSS(eth_type, - skb_shinfo(skb)->tso_size)); + skb_shinfo(skb)->gso_size)); hdr->len = htonl(skb->len - sizeof(*hdr)); cpl = (struct cpl_tx_pkt *)hdr; sge->stats.tx_lso_pkts++; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index a373ccb308d8..32b7d444b374 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -2394,7 +2394,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, uint8_t ipcss, ipcso, tucss, tucso, hdr_len; int err; - if (skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->gso_size) { if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (err) @@ -2402,7 +2402,7 @@ e1000_tso(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, } hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; if (skb->protocol == htons(ETH_P_IP)) { skb->nh.iph->tot_len = 0; skb->nh.iph->check = 0; @@ -2519,7 +2519,7 @@ e1000_tx_map(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, * tso gets written back prematurely before the data is fully * DMA'd to the controller */ if (!skb->data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) { + !skb_shinfo(skb)->gso_size) { tx_ring->last_tx_tso = 0; size -= 4; } @@ -2757,7 +2757,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } #ifdef NETIF_F_TSO - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; /* The controller does a simple calculation to * make sure there is enough room in the FIFO before * initiating the DMA for each buffer. The calc is: @@ -2807,7 +2807,7 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) #ifdef NETIF_F_TSO /* Controller Erratum workaround */ if (!skb->data_len && tx_ring->last_tx_tso && - !skb_shinfo(skb)->tso_size) + !skb_shinfo(skb)->gso_size) count++; #endif diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index 191383d461d7..21be4fa071b5 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -1495,8 +1495,8 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) np->tx_skbuff[nr] = skb; #ifdef NETIF_F_TSO - if (skb_shinfo(skb)->tso_size) - tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->tso_size << NV_TX2_TSO_SHIFT); + if (skb_shinfo(skb)->gso_size) + tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); else #endif tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c index 57006fb8840e..8bb32f946993 100644 --- a/drivers/net/ixgb/ixgb_main.c +++ b/drivers/net/ixgb/ixgb_main.c @@ -1173,7 +1173,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb) uint16_t ipcse, tucse, mss; int err; - if(likely(skb_shinfo(skb)->tso_size)) { + if(likely(skb_shinfo(skb)->gso_size)) { if (skb_header_cloned(skb)) { err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); if (err) @@ -1181,7 +1181,7 @@ ixgb_tso(struct ixgb_adapter *adapter, struct sk_buff *skb) } hdr_len = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2)); - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; skb->nh.iph->tot_len = 0; skb->nh.iph->check = 0; skb->h.th->check = ~csum_tcpudp_magic(skb->nh.iph->saddr, diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index b79d6e8d3045..43fef7de8cb9 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -74,7 +74,7 @@ static void emulate_large_send_offload(struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct tcphdr *th = (struct tcphdr*)(skb->nh.raw + (iph->ihl * 4)); unsigned int doffset = (iph->ihl + th->doff) * 4; - unsigned int mtu = skb_shinfo(skb)->tso_size + doffset; + unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; unsigned int offset = 0; u32 seq = ntohl(th->seq); u16 id = ntohs(iph->id); @@ -139,7 +139,7 @@ static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) #endif #ifdef LOOPBACK_TSO - if (skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->gso_size) { BUG_ON(skb->protocol != htons(ETH_P_IP)); BUG_ON(skb->nh.iph->protocol != IPPROTO_TCP); diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index b983e1e04348..dbdf189436fa 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -1879,7 +1879,7 @@ again: #ifdef NETIF_F_TSO if (skb->len > (dev->mtu + ETH_HLEN)) { - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; if (mss != 0) max_segments = MYRI10GE_MAX_SEND_DESC_TSO; } @@ -2112,7 +2112,7 @@ abort_linearize: } idx = (idx + 1) & tx->mask; } while (idx != last_idx); - if (skb_shinfo(skb)->tso_size) { + if (skb_shinfo(skb)->gso_size) { printk(KERN_ERR "myri10ge: %s: TSO but wanted to linearize?!?!?\n", mgp->dev->name); diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 985afe0e6273..12d1cb289bb0 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -2172,7 +2172,7 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb, static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) { if (dev->features & NETIF_F_TSO) { - u32 mss = skb_shinfo(skb)->tso_size; + u32 mss = skb_shinfo(skb)->gso_size; if (mss) return LargeSend | ((mss & MSSMask) << MSSShift); diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c index 11daed495b97..3defe5d4f7d3 100644 --- a/drivers/net/s2io.c +++ b/drivers/net/s2io.c @@ -3959,8 +3959,8 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_1 = 0; txdp->Control_2 = 0; #ifdef NETIF_F_TSO - mss = skb_shinfo(skb)->tso_size; - if (mss) { + mss = skb_shinfo(skb)->gso_size; + if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV4) { txdp->Control_1 |= TXD_TCP_LSO_EN; txdp->Control_1 |= TXD_TCP_LSO_MSS(mss); } @@ -3980,10 +3980,10 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) } frg_len = skb->len - skb->data_len; - if (skb_shinfo(skb)->ufo_size) { + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) { int ufo_size; - ufo_size = skb_shinfo(skb)->ufo_size; + ufo_size = skb_shinfo(skb)->gso_size; ufo_size &= ~7; txdp->Control_1 |= TXD_UFO_EN; txdp->Control_1 |= TXD_UFO_MSS(ufo_size); @@ -4009,7 +4009,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Host_Control = (unsigned long) skb; txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len); - if (skb_shinfo(skb)->ufo_size) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) txdp->Control_1 |= TXD_UFO_EN; frg_cnt = skb_shinfo(skb)->nr_frags; @@ -4024,12 +4024,12 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) (sp->pdev, frag->page, frag->page_offset, frag->size, PCI_DMA_TODEVICE); txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size); - if (skb_shinfo(skb)->ufo_size) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) txdp->Control_1 |= TXD_UFO_EN; } txdp->Control_1 |= TXD_GATHER_CODE_LAST; - if (skb_shinfo(skb)->ufo_size) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) frg_cnt++; /* as Txd0 was used for inband header */ tx_fifo = mac_control->tx_FIFO_start[queue]; @@ -4043,7 +4043,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) if (mss) val64 |= TX_FIFO_SPECIAL_FUNC; #endif - if (skb_shinfo(skb)->ufo_size) + if (skb_shinfo(skb)->gso_type == SKB_GSO_UDPV4) val64 |= TX_FIFO_SPECIAL_FUNC; writeq(val64, &tx_fifo->List_Control); diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c index fba1e4d4d83d..d3577871be28 100644 --- a/drivers/net/sky2.c +++ b/drivers/net/sky2.c @@ -1160,7 +1160,7 @@ static unsigned tx_le_req(const struct sk_buff *skb) count = sizeof(dma_addr_t) / sizeof(u32); count += skb_shinfo(skb)->nr_frags * count; - if (skb_shinfo(skb)->tso_size) + if (skb_shinfo(skb)->gso_size) ++count; if (skb->ip_summed == CHECKSUM_HW) @@ -1232,7 +1232,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) } /* Check for TCP Segmentation Offload */ - mss = skb_shinfo(skb)->tso_size; + mss = skb_shinfo(skb)->gso_size; if (mss != 0) { /* just drop the packet if non-linear expansion fails */ if (skb_header_cloned(skb) && diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index b2ddd4522a87..e3e380f90f86 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -3780,7 +3780,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) #if TG3_TSO_SUPPORT != 0 mss = 0; if (skb->len > (tp->dev->mtu + ETH_HLEN) && - (mss = skb_shinfo(skb)->tso_size) != 0) { + (mss = skb_shinfo(skb)->gso_size) != 0) { int tcp_opt_len, ip_tcp_len; if (skb_header_cloned(skb) && @@ -3905,7 +3905,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) #if TG3_TSO_SUPPORT != 0 mss = 0; if (skb->len > (tp->dev->mtu + ETH_HLEN) && - (mss = skb_shinfo(skb)->tso_size) != 0) { + (mss = skb_shinfo(skb)->gso_size) != 0) { int tcp_opt_len, ip_tcp_len; if (skb_header_cloned(skb) && diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c index d9258d42090c..e49e8b520c28 100644 --- a/drivers/net/typhoon.c +++ b/drivers/net/typhoon.c @@ -340,7 +340,7 @@ enum state_values { #endif #if defined(NETIF_F_TSO) -#define skb_tso_size(x) (skb_shinfo(x)->tso_size) +#define skb_tso_size(x) (skb_shinfo(x)->gso_size) #define TSO_NUM_DESCRIPTORS 2 #define TSO_OFFLOAD_ON TYPHOON_OFFLOAD_TCP_SEGMENT #else diff --git a/drivers/s390/net/qeth_eddp.c b/drivers/s390/net/qeth_eddp.c index 0bab60a20309..38aad8321456 100644 --- a/drivers/s390/net/qeth_eddp.c +++ b/drivers/s390/net/qeth_eddp.c @@ -420,7 +420,7 @@ __qeth_eddp_fill_context_tcp(struct qeth_eddp_context *ctx, } tcph = eddp->skb->h.th; while (eddp->skb_offset < eddp->skb->len) { - data_len = min((int)skb_shinfo(eddp->skb)->tso_size, + data_len = min((int)skb_shinfo(eddp->skb)->gso_size, (int)(eddp->skb->len - eddp->skb_offset)); /* prepare qdio hdr */ if (eddp->qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2){ @@ -515,20 +515,20 @@ qeth_eddp_calc_num_pages(struct qeth_eddp_context *ctx, struct sk_buff *skb, QETH_DBF_TEXT(trace, 5, "eddpcanp"); /* can we put multiple skbs in one page? */ - skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->tso_size + hdr_len); + skbs_per_page = PAGE_SIZE / (skb_shinfo(skb)->gso_size + hdr_len); if (skbs_per_page > 1){ - ctx->num_pages = (skb_shinfo(skb)->tso_segs + 1) / + ctx->num_pages = (skb_shinfo(skb)->gso_segs + 1) / skbs_per_page + 1; ctx->elements_per_skb = 1; } else { /* no -> how many elements per skb? */ - ctx->elements_per_skb = (skb_shinfo(skb)->tso_size + hdr_len + + ctx->elements_per_skb = (skb_shinfo(skb)->gso_size + hdr_len + PAGE_SIZE) >> PAGE_SHIFT; ctx->num_pages = ctx->elements_per_skb * - (skb_shinfo(skb)->tso_segs + 1); + (skb_shinfo(skb)->gso_segs + 1); } ctx->num_elements = ctx->elements_per_skb * - (skb_shinfo(skb)->tso_segs + 1); + (skb_shinfo(skb)->gso_segs + 1); } static inline struct qeth_eddp_context * diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 9e671a48cd2f..56009d768326 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -4417,7 +4417,7 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb) struct qeth_eddp_context *ctx = NULL; int tx_bytes = skb->len; unsigned short nr_frags = skb_shinfo(skb)->nr_frags; - unsigned short tso_size = skb_shinfo(skb)->tso_size; + unsigned short tso_size = skb_shinfo(skb)->gso_size; int rc; QETH_DBF_TEXT(trace, 6, "sendpkt"); @@ -4453,7 +4453,7 @@ qeth_send_packet(struct qeth_card *card, struct sk_buff *skb) queue = card->qdio.out_qs [qeth_get_priority_queue(card, skb, ipv, cast_type)]; - if (skb_shinfo(skb)->tso_size) + if (skb_shinfo(skb)->gso_size) large_send = card->options.large_send; /*are we able to do TSO ? If so ,prepare and send it from here */ diff --git a/drivers/s390/net/qeth_tso.h b/drivers/s390/net/qeth_tso.h index 24ef40ca9562..593f298142c1 100644 --- a/drivers/s390/net/qeth_tso.h +++ b/drivers/s390/net/qeth_tso.h @@ -51,7 +51,7 @@ qeth_tso_fill_header(struct qeth_card *card, struct sk_buff *skb) hdr->ext.hdr_version = 1; hdr->ext.hdr_len = 28; /*insert non-fix values */ - hdr->ext.mss = skb_shinfo(skb)->tso_size; + hdr->ext.mss = skb_shinfo(skb)->gso_size; hdr->ext.dg_hdr_len = (__u16)(iph->ihl*4 + tcph->doff*4); hdr->ext.payload_len = (__u16)(skb->len - hdr->ext.dg_hdr_len - sizeof(struct qeth_hdr_tso)); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cead6be467ed..fa5671307b90 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -308,9 +308,12 @@ struct net_device #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ -#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ #define NETIF_F_LLTX 4096 /* LockLess TX */ -#define NETIF_F_UFO 8192 /* Can offload UDP Large Send*/ + + /* Segmentation offload features */ +#define NETIF_F_GSO_SHIFT 16 +#define NETIF_F_TSO (SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT) +#define NETIF_F_UFO (SKB_GSO_UDPV4 << NETIF_F_GSO_SHIFT) #define NETIF_F_GEN_CSUM (NETIF_F_NO_CSUM | NETIF_F_HW_CSUM) #define NETIF_F_ALL_CSUM (NETIF_F_IP_CSUM | NETIF_F_GEN_CSUM) @@ -979,6 +982,13 @@ extern void dev_seq_stop(struct seq_file *seq, void *v); extern void linkwatch_run_queue(void); +static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) +{ + int feature = skb_shinfo(skb)->gso_type << NETIF_F_GSO_SHIFT; + return skb_shinfo(skb)->gso_size && + (dev->features & feature) != feature; +} + #endif /* __KERNEL__ */ #endif /* _LINUX_DEV_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f8c7eb79a27f..97b0d2d1a6b0 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -134,9 +134,10 @@ struct skb_frag_struct { struct skb_shared_info { atomic_t dataref; unsigned short nr_frags; - unsigned short tso_size; - unsigned short tso_segs; - unsigned short ufo_size; + unsigned short gso_size; + /* Warning: this field is not always filled in (UFO)! */ + unsigned short gso_segs; + unsigned short gso_type; unsigned int ip6_frag_id; struct sk_buff *frag_list; skb_frag_t frags[MAX_SKB_FRAGS]; @@ -168,6 +169,11 @@ enum { SKB_FCLONE_CLONE, }; +enum { + SKB_GSO_TCPV4 = 1 << 0, + SKB_GSO_UDPV4 = 1 << 1, +}; + /** * struct sk_buff - socket buffer * @next: Next buffer in list diff --git a/include/net/tcp.h b/include/net/tcp.h index 5f4eb5c79689..b197a9e615c1 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -569,13 +569,13 @@ struct tcp_skb_cb { */ static inline int tcp_skb_pcount(const struct sk_buff *skb) { - return skb_shinfo(skb)->tso_segs; + return skb_shinfo(skb)->gso_segs; } /* This is valid iff tcp_skb_pcount() > 1. */ static inline int tcp_skb_mss(const struct sk_buff *skb) { - return skb_shinfo(skb)->tso_size; + return skb_shinfo(skb)->gso_size; } static inline void tcp_dec_pcount_approx(__u32 *count, diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 0dca027ceb80..8be9f2123e54 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -34,8 +34,8 @@ static inline unsigned packet_length(const struct sk_buff *skb) int br_dev_queue_push_xmit(struct sk_buff *skb) { - /* drop mtu oversized packets except tso */ - if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->tso_size) + /* drop mtu oversized packets except gso */ + if (packet_length(skb) > skb->dev->mtu && !skb_shinfo(skb)->gso_size) kfree_skb(skb); else { #ifdef CONFIG_BRIDGE_NETFILTER diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 3e41f9d6d51c..8298a5179aef 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -761,7 +761,7 @@ static int br_nf_dev_queue_xmit(struct sk_buff *skb) { if (skb->protocol == htons(ETH_P_IP) && skb->len > skb->dev->mtu && - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) + !skb_shinfo(skb)->gso_size) return ip_fragment(skb, br_dev_queue_push_xmit); else return br_dev_queue_push_xmit(skb); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe63d4efbd4d..368d98578c14 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -172,9 +172,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); atomic_set(&shinfo->dataref, 1); shinfo->nr_frags = 0; - shinfo->tso_size = 0; - shinfo->tso_segs = 0; - shinfo->ufo_size = 0; + shinfo->gso_size = 0; + shinfo->gso_segs = 0; + shinfo->gso_type = 0; shinfo->ip6_frag_id = 0; shinfo->frag_list = NULL; @@ -238,8 +238,9 @@ struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, atomic_set(&(skb_shinfo(skb)->dataref), 1); skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; skb_shinfo(skb)->frag_list = NULL; out: return skb; @@ -528,8 +529,9 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) #endif skb_copy_secmark(new, old); atomic_set(&new->users, 1); - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; + skb_shinfo(new)->gso_size = skb_shinfo(old)->gso_size; + skb_shinfo(new)->gso_segs = skb_shinfo(old)->gso_segs; + skb_shinfo(new)->gso_type = skb_shinfo(old)->gso_type; } /** diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8538aac3d148..7624fd1d8f9f 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -210,8 +210,7 @@ static inline int ip_finish_output(struct sk_buff *skb) return dst_output(skb); } #endif - if (skb->len > dst_mtu(skb->dst) && - !(skb_shinfo(skb)->ufo_size || skb_shinfo(skb)->tso_size)) + if (skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) return ip_fragment(skb, ip_finish_output2); else return ip_finish_output2(skb); @@ -362,7 +361,7 @@ packet_routed: } ip_select_ident_more(iph, &rt->u.dst, sk, - (skb_shinfo(skb)->tso_segs ?: 1) - 1); + (skb_shinfo(skb)->gso_segs ?: 1) - 1); /* Add an IP checksum. */ ip_send_check(iph); @@ -744,7 +743,8 @@ static inline int ip_ufo_append_data(struct sock *sk, (length - transhdrlen)); if (!err) { /* specify the length of each IP datagram fragment*/ - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; __skb_queue_tail(&sk->sk_write_queue, skb); return 0; @@ -1087,14 +1087,16 @@ ssize_t ip_append_page(struct sock *sk, struct page *page, inet->cork.length += size; if ((sk->sk_protocol == IPPROTO_UDP) && - (rt->u.dst.dev->features & NETIF_F_UFO)) - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen); + (rt->u.dst.dev->features & NETIF_F_UFO)) { + skb_shinfo(skb)->gso_size = mtu - fragheaderlen; + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; + } while (size > 0) { int i; - if (skb_shinfo(skb)->ufo_size) + if (skb_shinfo(skb)->gso_size) len = size; else { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 74998f250071..062dd1a0d8a8 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -571,7 +571,7 @@ new_segment: skb->ip_summed = CHECKSUM_HW; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_segs = 0; if (!copied) TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH; @@ -818,7 +818,7 @@ new_segment: tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; - skb_shinfo(skb)->tso_segs = 0; + skb_shinfo(skb)->gso_segs = 0; from += copy; copied += copy; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e08245bdda3a..94fe5b1f9dcb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1073,7 +1073,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ else pkt_len = (end_seq - TCP_SKB_CB(skb)->seq); - if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->tso_size)) + if (tcp_fragment(sk, skb, pkt_len, skb_shinfo(skb)->gso_size)) break; pcount = tcp_skb_pcount(skb); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 07bb5a2b375e..bdd71db8bf90 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -515,15 +515,17 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned /* Avoid the costly divide in the normal * non-TSO case. */ - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; } else { unsigned int factor; factor = skb->len + (mss_now - 1); factor /= mss_now; - skb_shinfo(skb)->tso_segs = factor; - skb_shinfo(skb)->tso_size = mss_now; + skb_shinfo(skb)->gso_segs = factor; + skb_shinfo(skb)->gso_size = mss_now; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; } } @@ -914,7 +916,7 @@ static int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int if (!tso_segs || (tso_segs > 1 && - skb_shinfo(skb)->tso_size != mss_now)) { + tcp_skb_mss(skb) != mss_now)) { tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } @@ -1724,8 +1726,9 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) { if (!pskb_trim(skb, 0)) { TCP_SKB_CB(skb)->seq = TCP_SKB_CB(skb)->end_seq - 1; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; } @@ -1930,8 +1933,9 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; @@ -1963,8 +1967,9 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -2047,8 +2052,9 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, TCP_SKB_CB(skb)->seq = tcp_rsk(req)->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; TCP_SKB_CB(skb)->sacked = 0; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ @@ -2152,8 +2158,9 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(sk, tp, buff); TCP_SKB_CB(buff)->sacked = 0; - skb_shinfo(buff)->tso_segs = 1; - skb_shinfo(buff)->tso_size = 0; + skb_shinfo(buff)->gso_segs = 1; + skb_shinfo(buff)->gso_size = 0; + skb_shinfo(buff)->gso_type = 0; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; @@ -2257,8 +2264,9 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; - skb_shinfo(buff)->tso_segs = 1; - skb_shinfo(buff)->tso_size = 0; + skb_shinfo(buff)->gso_segs = 1; + skb_shinfo(buff)->gso_size = 0; + skb_shinfo(buff)->gso_type = 0; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -2293,8 +2301,9 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = urgent; - skb_shinfo(skb)->tso_segs = 1; - skb_shinfo(skb)->tso_size = 0; + skb_shinfo(skb)->gso_segs = 1; + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_type = 0; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d29620f4910e..abb94de33768 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -148,7 +148,7 @@ static int ip6_output2(struct sk_buff *skb) int ip6_output(struct sk_buff *skb) { - if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->ufo_size) || + if ((skb->len > dst_mtu(skb->dst) && !skb_shinfo(skb)->gso_size) || dst_allfrag(skb->dst)) return ip6_fragment(skb, ip6_output2); else @@ -833,8 +833,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, struct frag_hdr fhdr; /* specify the length of each IP datagram fragment*/ - skb_shinfo(skb)->ufo_size = (mtu - fragheaderlen) - - sizeof(struct frag_hdr); + skb_shinfo(skb)->gso_size = mtu - fragheaderlen - + sizeof(struct frag_hdr); + skb_shinfo(skb)->gso_type = SKB_GSO_UDPV4; ipv6_select_ident(skb, &fhdr); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); -- cgit v1.2.3 From f6a78bfcb141f963187464bac838d46a81c3882a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 02:57:17 -0700 Subject: [NET]: Add generic segmentation offload This patch adds the infrastructure for generic segmentation offload. The idea is to tap into the potential savings of TSO without hardware support by postponing the allocation of segmented skb's until just before the entry point into the NIC driver. The same structure can be used to support software IPv6 TSO, as well as UFO and segmentation offload for other relevant protocols, e.g., DCCP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++- net/core/dev.c | 127 ++++++++++++++++++++++++++++++++++++++++++++-- net/sched/sch_generic.c | 19 +++++-- 3 files changed, 143 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fa5671307b90..b4eae18390cc 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -405,6 +405,9 @@ struct net_device struct list_head qdisc_list; unsigned long tx_queue_len; /* Max frames per queue allowed */ + /* Partially transmitted GSO packet. */ + struct sk_buff *gso_skb; + /* ingress path synchronizer */ spinlock_t ingress_lock; struct Qdisc *qdisc_ingress; @@ -539,6 +542,7 @@ struct packet_type { struct net_device *, struct packet_type *, struct net_device *); + struct sk_buff *(*gso_segment)(struct sk_buff *skb, int sg); void *af_packet_priv; struct list_head list; }; @@ -689,7 +693,8 @@ extern int dev_change_name(struct net_device *, char *); extern int dev_set_mtu(struct net_device *, int); extern int dev_set_mac_address(struct net_device *, struct sockaddr *); -extern void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); +extern int dev_hard_start_xmit(struct sk_buff *skb, + struct net_device *dev); extern void dev_init(void); @@ -963,6 +968,7 @@ extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); extern int skb_checksum_help(struct sk_buff *skb, int inward); +extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); #else diff --git a/net/core/dev.c b/net/core/dev.c index 29e3888102bc..d293e0f90a0c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -116,6 +116,7 @@ #include #include #include +#include /* * The list of packet types we will receive (as opposed to discard) @@ -1048,7 +1049,7 @@ static inline void net_timestamp(struct sk_buff *skb) * taps currently in use. */ -void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) { struct packet_type *ptype; @@ -1186,6 +1187,40 @@ out: return ret; } +/** + * skb_gso_segment - Perform segmentation on skb. + * @skb: buffer to segment + * @sg: whether scatter-gather is supported on the target. + * + * This function segments the given skb and returns a list of segments. + */ +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); + struct packet_type *ptype; + int type = skb->protocol; + + BUG_ON(skb_shinfo(skb)->frag_list); + BUG_ON(skb->ip_summed != CHECKSUM_HW); + + skb->mac.raw = skb->data; + skb->mac_len = skb->nh.raw - skb->data; + __skb_pull(skb, skb->mac_len); + + rcu_read_lock(); + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { + segs = ptype->gso_segment(skb, sg); + break; + } + } + rcu_read_unlock(); + + return segs; +} + +EXPORT_SYMBOL(skb_gso_segment); + /* Take action when hardware reception checksum errors are detected. */ #ifdef CONFIG_BUG void netdev_rx_csum_fault(struct net_device *dev) @@ -1222,6 +1257,86 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif +struct dev_gso_cb { + void (*destructor)(struct sk_buff *skb); +}; + +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) + +static void dev_gso_skb_destructor(struct sk_buff *skb) +{ + struct dev_gso_cb *cb; + + do { + struct sk_buff *nskb = skb->next; + + skb->next = nskb->next; + nskb->next = NULL; + kfree_skb(nskb); + } while (skb->next); + + cb = DEV_GSO_CB(skb); + if (cb->destructor) + cb->destructor(skb); +} + +/** + * dev_gso_segment - Perform emulated hardware segmentation on skb. + * @skb: buffer to segment + * + * This function segments the given skb and stores the list of segments + * in skb->next. + */ +static int dev_gso_segment(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct sk_buff *segs; + + segs = skb_gso_segment(skb, dev->features & NETIF_F_SG && + !illegal_highdma(dev, skb)); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + skb->next = segs; + DEV_GSO_CB(skb)->destructor = skb->destructor; + skb->destructor = dev_gso_skb_destructor; + + return 0; +} + +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + if (likely(!skb->next)) { + if (netdev_nit) + dev_queue_xmit_nit(skb, dev); + + if (!netif_needs_gso(dev, skb)) + return dev->hard_start_xmit(skb, dev); + + if (unlikely(dev_gso_segment(skb))) + goto out_kfree_skb; + } + + do { + struct sk_buff *nskb = skb->next; + int rc; + + skb->next = nskb->next; + nskb->next = NULL; + rc = dev->hard_start_xmit(nskb, dev); + if (unlikely(rc)) { + skb->next = nskb; + return rc; + } + } while (skb->next); + + skb->destructor = DEV_GSO_CB(skb)->destructor; + +out_kfree_skb: + kfree_skb(skb); + return 0; +} + #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ netif_tx_lock(dev); \ @@ -1266,6 +1381,10 @@ int dev_queue_xmit(struct sk_buff *skb) struct Qdisc *q; int rc = -ENOMEM; + /* GSO will handle the following emulations directly. */ + if (netif_needs_gso(dev, skb)) + goto gso; + if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && __skb_linearize(skb)) @@ -1290,6 +1409,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(skb, 0)) goto out_kfree_skb; +gso: spin_lock_prefetch(&dev->queue_lock); /* Disable soft irqs for various locks below. Also @@ -1346,11 +1466,8 @@ int dev_queue_xmit(struct sk_buff *skb) HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - rc = 0; - if (!dev->hard_start_xmit(skb, dev)) { + if (!dev_hard_start_xmit(skb, dev)) { HARD_TX_UNLOCK(dev); goto out; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 7aad0121232c..74d4a1dceeec 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -96,8 +96,11 @@ static inline int qdisc_restart(struct net_device *dev) struct sk_buff *skb; /* Dequeue packet */ - if ((skb = q->dequeue(q)) != NULL) { + if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { unsigned nolock = (dev->features & NETIF_F_LLTX); + + dev->gso_skb = NULL; + /* * When the driver has LLTX set it does its own locking * in start_xmit. No need to add additional overhead by @@ -134,10 +137,8 @@ static inline int qdisc_restart(struct net_device *dev) if (!netif_queue_stopped(dev)) { int ret; - if (netdev_nit) - dev_queue_xmit_nit(skb, dev); - ret = dev->hard_start_xmit(skb, dev); + ret = dev_hard_start_xmit(skb, dev); if (ret == NETDEV_TX_OK) { if (!nolock) { netif_tx_unlock(dev); @@ -171,7 +172,10 @@ static inline int qdisc_restart(struct net_device *dev) */ requeue: - q->ops->requeue(skb, q); + if (skb->next) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); netif_schedule(dev); return 1; } @@ -593,6 +597,11 @@ void dev_deactivate(struct net_device *dev) /* Wait for outstanding qdisc_run calls. */ while (test_bit(__LINK_STATE_QDISC_RUNNING, &dev->state)) yield(); + + if (dev->gso_skb) { + kfree_skb(dev->gso_skb); + dev->gso_skb = NULL; + } } void dev_init_scheduler(struct net_device *dev) -- cgit v1.2.3 From f4c50d990dcf11a296679dc05de3873783236711 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 03:02:40 -0700 Subject: [NET]: Add software TSOv4 This patch adds the GSO implementation for IPv4 TCP. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + include/net/protocol.h | 1 + include/net/tcp.h | 2 + net/core/skbuff.c | 126 +++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 51 ++++++++++++++++++++ net/ipv4/tcp.c | 62 ++++++++++++++++++++++++ 6 files changed, 243 insertions(+) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 97b0d2d1a6b0..a45bba9b8cbd 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1297,6 +1297,7 @@ extern void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len); extern void skb_release_data(struct sk_buff *skb); +extern struct sk_buff *skb_segment(struct sk_buff *skb, int sg); static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) diff --git a/include/net/protocol.h b/include/net/protocol.h index bcaee39bd2ff..3b6dc15c68a5 100644 --- a/include/net/protocol.h +++ b/include/net/protocol.h @@ -36,6 +36,7 @@ struct net_protocol { int (*handler)(struct sk_buff *skb); void (*err_handler)(struct sk_buff *skb, u32 info); + struct sk_buff *(*gso_segment)(struct sk_buff *skb, int sg); int no_policy; }; diff --git a/include/net/tcp.h b/include/net/tcp.h index b197a9e615c1..ca3d38dfc00b 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1086,6 +1086,8 @@ extern struct request_sock_ops tcp_request_sock_ops; extern int tcp_v4_destroy_sock(struct sock *sk); +extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg); + #ifdef CONFIG_PROC_FS extern int tcp4_proc_init(void); extern void tcp4_proc_exit(void); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 368d98578c14..8e5044ba3ab6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1842,6 +1842,132 @@ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) EXPORT_SYMBOL_GPL(skb_pull_rcsum); +/** + * skb_segment - Perform protocol segmentation on skb. + * @skb: buffer to segment + * @sg: whether scatter-gather can be used for generated segments + * + * This function performs segmentation on the given skb. It returns + * the segment at the given position. It returns NULL if there are + * no more segments to generate, or when an error is encountered. + */ +struct sk_buff *skb_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = NULL; + struct sk_buff *tail = NULL; + unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int doffset = skb->data - skb->mac.raw; + unsigned int offset = doffset; + unsigned int headroom; + unsigned int len; + int nfrags = skb_shinfo(skb)->nr_frags; + int err = -ENOMEM; + int i = 0; + int pos; + + __skb_push(skb, doffset); + headroom = skb_headroom(skb); + pos = skb_headlen(skb); + + do { + struct sk_buff *nskb; + skb_frag_t *frag; + int hsize, nsize; + int k; + int size; + + len = skb->len - offset; + if (len > mss) + len = mss; + + hsize = skb_headlen(skb) - offset; + if (hsize < 0) + hsize = 0; + nsize = hsize + doffset; + if (nsize > len + doffset || !sg) + nsize = len + doffset; + + nskb = alloc_skb(nsize + headroom, GFP_ATOMIC); + if (unlikely(!nskb)) + goto err; + + if (segs) + tail->next = nskb; + else + segs = nskb; + tail = nskb; + + nskb->dev = skb->dev; + nskb->priority = skb->priority; + nskb->protocol = skb->protocol; + nskb->dst = dst_clone(skb->dst); + memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); + nskb->pkt_type = skb->pkt_type; + nskb->mac_len = skb->mac_len; + + skb_reserve(nskb, headroom); + nskb->mac.raw = nskb->data; + nskb->nh.raw = nskb->data + skb->mac_len; + nskb->h.raw = nskb->nh.raw + (skb->h.raw - skb->nh.raw); + memcpy(skb_put(nskb, doffset), skb->data, doffset); + + if (!sg) { + nskb->csum = skb_copy_and_csum_bits(skb, offset, + skb_put(nskb, len), + len, 0); + continue; + } + + frag = skb_shinfo(nskb)->frags; + k = 0; + + nskb->ip_summed = CHECKSUM_HW; + nskb->csum = skb->csum; + memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); + + while (pos < offset + len) { + BUG_ON(i >= nfrags); + + *frag = skb_shinfo(skb)->frags[i]; + get_page(frag->page); + size = frag->size; + + if (pos < offset) { + frag->page_offset += offset - pos; + frag->size -= offset - pos; + } + + k++; + + if (pos + size <= offset + len) { + i++; + pos += size; + } else { + frag->size -= pos + size - (offset + len); + break; + } + + frag++; + } + + skb_shinfo(nskb)->nr_frags = k; + nskb->data_len = len - hsize; + nskb->len += nskb->data_len; + nskb->truesize += nskb->data_len; + } while ((offset += len) < skb->len); + + return segs; + +err: + while ((skb = segs)) { + segs = skb->next; + kfree(skb); + } + return ERR_PTR(err); +} + +EXPORT_SYMBOL_GPL(skb_segment); + void __init skb_init(void) { skbuff_head_cache = kmem_cache_create("skbuff_head_cache", diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 0a277453526b..461216b47948 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -68,6 +68,7 @@ */ #include +#include #include #include #include @@ -1096,6 +1097,54 @@ int inet_sk_rebuild_header(struct sock *sk) EXPORT_SYMBOL(inet_sk_rebuild_header); +static struct sk_buff *inet_gso_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct iphdr *iph; + struct net_protocol *ops; + int proto; + int ihl; + int id; + + if (!pskb_may_pull(skb, sizeof(*iph))) + goto out; + + iph = skb->nh.iph; + ihl = iph->ihl * 4; + if (ihl < sizeof(*iph)) + goto out; + + if (!pskb_may_pull(skb, ihl)) + goto out; + + skb->h.raw = __skb_pull(skb, ihl); + iph = skb->nh.iph; + id = ntohs(iph->id); + proto = iph->protocol & (MAX_INET_PROTOS - 1); + segs = ERR_PTR(-EPROTONOSUPPORT); + + rcu_read_lock(); + ops = rcu_dereference(inet_protos[proto]); + if (ops && ops->gso_segment) + segs = ops->gso_segment(skb, sg); + rcu_read_unlock(); + + if (IS_ERR(segs)) + goto out; + + skb = segs; + do { + iph = skb->nh.iph; + iph->id = htons(id++); + iph->tot_len = htons(skb->len - skb->mac_len); + iph->check = 0; + iph->check = ip_fast_csum(skb->nh.raw, iph->ihl); + } while ((skb = skb->next)); + +out: + return segs; +} + #ifdef CONFIG_IP_MULTICAST static struct net_protocol igmp_protocol = { .handler = igmp_rcv, @@ -1105,6 +1154,7 @@ static struct net_protocol igmp_protocol = { static struct net_protocol tcp_protocol = { .handler = tcp_v4_rcv, .err_handler = tcp_v4_err, + .gso_segment = tcp_tso_segment, .no_policy = 1, }; @@ -1150,6 +1200,7 @@ static int ipv4_proc_init(void); static struct packet_type ip_packet_type = { .type = __constant_htons(ETH_P_IP), .func = ip_rcv, + .gso_segment = inet_gso_segment, }; static int __init inet_init(void) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 062dd1a0d8a8..0e029c4e2903 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -258,6 +258,7 @@ #include #include #include +#include #include #include @@ -2144,6 +2145,67 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int sg) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + struct tcphdr *th; + unsigned thlen; + unsigned int seq; + unsigned int delta; + unsigned int oldlen; + unsigned int len; + + if (!pskb_may_pull(skb, sizeof(*th))) + goto out; + + th = skb->h.th; + thlen = th->doff * 4; + if (thlen < sizeof(*th)) + goto out; + + if (!pskb_may_pull(skb, thlen)) + goto out; + + oldlen = ~htonl(skb->len); + __skb_pull(skb, thlen); + + segs = skb_segment(skb, sg); + if (IS_ERR(segs)) + goto out; + + len = skb_shinfo(skb)->gso_size; + delta = csum_add(oldlen, htonl(thlen + len)); + + skb = segs; + th = skb->h.th; + seq = ntohl(th->seq); + + do { + th->fin = th->psh = 0; + + if (skb->ip_summed == CHECKSUM_NONE) { + th->check = csum_fold(csum_partial( + skb->h.raw, thlen, csum_add(skb->csum, delta))); + } + + seq += len; + skb = skb->next; + th = skb->h.th; + + th->seq = htonl(seq); + th->cwr = 0; + } while (skb->next); + + if (skb->ip_summed == CHECKSUM_NONE) { + delta = csum_add(oldlen, htonl(skb->tail - skb->h.raw)); + th->check = csum_fold(csum_partial( + skb->h.raw, thlen, csum_add(skb->csum, delta))); + } + +out: + return segs; +} + extern void __skb_cb_too_small_for_tcp(int, int); extern struct tcp_congestion_ops tcp_reno; -- cgit v1.2.3 From 37c3185a02d4b85fbe134bf5204535405dd2c957 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 03:07:29 -0700 Subject: [NET]: Added GSO toggle This patch adds a generic segmentation offload toggle that can be turned on/off for each net device. For now it only supports in TCPv4. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ include/linux/netdevice.h | 1 + include/net/sock.h | 4 ++++ net/bridge/br_if.c | 17 +++++++++++------ net/core/ethtool.c | 29 +++++++++++++++++++++++++++++ 5 files changed, 47 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index cf2abeca92a0..c6310aef5ab0 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -411,6 +411,8 @@ struct ethtool_ops { #define ETHTOOL_GPERMADDR 0x00000020 /* Get permanent hardware address */ #define ETHTOOL_GUFO 0x00000021 /* Get UFO enable (ethtool_value) */ #define ETHTOOL_SUFO 0x00000022 /* Set UFO enable (ethtool_value) */ +#define ETHTOOL_GGSO 0x00000023 /* Get GSO enable (ethtool_value) */ +#define ETHTOOL_SGSO 0x00000024 /* Set GSO enable (ethtool_value) */ /* compatibility with older code */ #define SPARC_ETH_GSET ETHTOOL_GSET diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b4eae18390cc..bc747e5d7138 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -308,6 +308,7 @@ struct net_device #define NETIF_F_HW_VLAN_RX 256 /* Receive VLAN hw acceleration */ #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ +#define NETIF_F_GSO 2048 /* Enable software GSO. */ #define NETIF_F_LLTX 4096 /* LockLess TX */ /* Segmentation offload features */ diff --git a/include/net/sock.h b/include/net/sock.h index d10dfecb6cbd..a897f05de3b5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1030,9 +1030,13 @@ static inline void sk_setup_caps(struct sock *sk, struct dst_entry *dst) { __sk_dst_set(sk, dst); sk->sk_route_caps = dst->dev->features; + if (sk->sk_route_caps & NETIF_F_GSO) + sk->sk_route_caps |= NETIF_F_TSO; if (sk->sk_route_caps & NETIF_F_TSO) { if (sock_flag(sk, SOCK_NO_LARGESEND) || dst->header_len) sk->sk_route_caps &= ~NETIF_F_TSO; + else + sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; } } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index fdec773f5b52..07956ecf545e 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -376,15 +376,20 @@ void br_features_recompute(struct net_bridge *br) features = br->feature_mask & ~NETIF_F_ALL_CSUM; list_for_each_entry(p, &br->port_list, list) { - if (checksum & NETIF_F_NO_CSUM && - !(p->dev->features & NETIF_F_NO_CSUM)) + unsigned long feature = p->dev->features; + + if (checksum & NETIF_F_NO_CSUM && !(feature & NETIF_F_NO_CSUM)) checksum ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM; - if (checksum & NETIF_F_HW_CSUM && - !(p->dev->features & NETIF_F_HW_CSUM)) + if (checksum & NETIF_F_HW_CSUM && !(feature & NETIF_F_HW_CSUM)) checksum ^= NETIF_F_HW_CSUM | NETIF_F_IP_CSUM; - if (!(p->dev->features & NETIF_F_IP_CSUM)) + if (!(feature & NETIF_F_IP_CSUM)) checksum = 0; - features &= p->dev->features; + + if (feature & NETIF_F_GSO) + feature |= NETIF_F_TSO; + feature |= NETIF_F_GSO; + + features &= feature; } br->dev->features = features | checksum | NETIF_F_LLTX; diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 33ce7ed6afc6..27ce1683caf5 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -614,6 +614,29 @@ static int ethtool_set_ufo(struct net_device *dev, char __user *useraddr) return dev->ethtool_ops->set_ufo(dev, edata.data); } +static int ethtool_get_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GGSO }; + + edata.data = dev->features & NETIF_F_GSO; + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_gso(struct net_device *dev, char __user *useraddr) +{ + struct ethtool_value edata; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + if (edata.data) + dev->features |= NETIF_F_GSO; + else + dev->features &= ~NETIF_F_GSO; + return 0; +} + static int ethtool_self_test(struct net_device *dev, char __user *useraddr) { struct ethtool_test test; @@ -905,6 +928,12 @@ int dev_ethtool(struct ifreq *ifr) case ETHTOOL_SUFO: rc = ethtool_set_ufo(dev, useraddr); break; + case ETHTOOL_GGSO: + rc = ethtool_get_gso(dev, useraddr); + break; + case ETHTOOL_SGSO: + rc = ethtool_set_gso(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } -- cgit v1.2.3 From 09b8f7a93efd4b2c4ef391e2fbf076f28c6d36d6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 22 Jun 2006 03:08:03 -0700 Subject: [IPSEC]: Handle GSO packets This patch segments GSO packets received by the IPsec stack. This can happen when a NIC driver injects GSO packets into the stack which are then forwarded to another host. The primary application of this is going to be Xen where its backend driver may inject GSO packets into dom0. Of course this also can be used by other virtualisation schemes such as VMWare or UML since the tap device could be modified to inject GSO packets received through splice. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- net/ipv4/xfrm4_output.c | 54 +++++++++++++++++++++++++++++++++++++++++-------- net/ipv6/xfrm6_output.c | 39 +++++++++++++++++++++++++++++++++-- 2 files changed, 83 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index ac9d91d4bb05..193363e22932 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -9,6 +9,8 @@ */ #include +#include +#include #include #include #include @@ -97,16 +99,10 @@ error_nolock: goto out_exit; } -static int xfrm4_output_finish(struct sk_buff *skb) +static int xfrm4_output_finish2(struct sk_buff *skb) { int err; -#ifdef CONFIG_NETFILTER - if (!skb->dst->xfrm) { - IPCB(skb)->flags |= IPSKB_REROUTED; - return dst_output(skb); - } -#endif while (likely((err = xfrm4_output_one(skb)) == 0)) { nf_reset(skb); @@ -119,7 +115,7 @@ static int xfrm4_output_finish(struct sk_buff *skb) return dst_output(skb); err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL, - skb->dst->dev, xfrm4_output_finish); + skb->dst->dev, xfrm4_output_finish2); if (unlikely(err != 1)) break; } @@ -127,6 +123,48 @@ static int xfrm4_output_finish(struct sk_buff *skb) return err; } +static int xfrm4_output_finish(struct sk_buff *skb) +{ + struct sk_buff *segs; + +#ifdef CONFIG_NETFILTER + if (!skb->dst->xfrm) { + IPCB(skb)->flags |= IPSKB_REROUTED; + return dst_output(skb); + } +#endif + + if (!skb_shinfo(skb)->gso_size) + return xfrm4_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = xfrm4_output_finish2(segs); + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return err; + } + + segs = nskb; + } while (segs); + + return 0; +} + int xfrm4_output(struct sk_buff *skb) { return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev, diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 16e84254a252..48fccb1eca08 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -94,7 +94,7 @@ error_nolock: goto out_exit; } -static int xfrm6_output_finish(struct sk_buff *skb) +static int xfrm6_output_finish2(struct sk_buff *skb) { int err; @@ -110,7 +110,7 @@ static int xfrm6_output_finish(struct sk_buff *skb) return dst_output(skb); err = nf_hook(PF_INET6, NF_IP6_POST_ROUTING, &skb, NULL, - skb->dst->dev, xfrm6_output_finish); + skb->dst->dev, xfrm6_output_finish2); if (unlikely(err != 1)) break; } @@ -118,6 +118,41 @@ static int xfrm6_output_finish(struct sk_buff *skb) return err; } +static int xfrm6_output_finish(struct sk_buff *skb) +{ + struct sk_buff *segs; + + if (!skb_shinfo(skb)->gso_size) + return xfrm6_output_finish2(skb); + + skb->protocol = htons(ETH_P_IP); + segs = skb_gso_segment(skb, 0); + kfree_skb(skb); + if (unlikely(IS_ERR(segs))) + return PTR_ERR(segs); + + do { + struct sk_buff *nskb = segs->next; + int err; + + segs->next = NULL; + err = xfrm6_output_finish2(segs); + + if (unlikely(err)) { + while ((segs = nskb)) { + nskb = segs->next; + segs->next = NULL; + kfree_skb(segs); + } + return err; + } + + segs = nskb; + } while (segs); + + return 0; +} + int xfrm6_output(struct sk_buff *skb) { return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dst->dev, -- cgit v1.2.3 From f4b8ea7849544114e9d3d682df4d400180854677 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 22 Jun 2006 16:00:11 -0700 Subject: [NET]: fix net-core kernel-doc Warning(/var/linsrc/linux-2617-g4//include/linux/skbuff.h:304): No description found for parameter 'dma_cookie' Warning(/var/linsrc/linux-2617-g4//include/net/sock.h:1274): No description found for parameter 'copied_early' Warning(/var/linsrc/linux-2617-g4//net/core/dev.c:3309): No description found for parameter 'chan' Warning(/var/linsrc/linux-2617-g4//net/core/dev.c:3309): No description found for parameter 'event' Signed-off-by: Randy Dunlap Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 ++ include/net/sock.h | 1 + net/core/dev.c | 4 ++-- 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index a45bba9b8cbd..16eef03ce0eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -215,6 +215,8 @@ enum { * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @tc_index: Traffic control index * @tc_verd: traffic control verdict + * @dma_cookie: a cookie to one of several possible DMA operations + * done by skb DMA functions * @secmark: security marking */ diff --git a/include/net/sock.h b/include/net/sock.h index a897f05de3b5..2d8d6adf1616 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1269,6 +1269,7 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) * sk_eat_skb - Release a skb if it is no longer needed * @sk: socket to eat this skb from * @skb: socket buffer to eat + * @copied_early: flag indicating whether DMA operations copied this data early * * This routine must be called with interrupts disabled or with the socket * locked so that the sk_buff queue operation is ok. diff --git a/net/core/dev.c b/net/core/dev.c index d293e0f90a0c..9b8f0f22c81d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3418,8 +3418,8 @@ static void net_dma_rebalance(void) /** * netdev_dma_event - event callback for the net_dma_client * @client: should always be net_dma_client - * @chan: - * @event: + * @chan: DMA channel for the event + * @event: event type */ static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, enum dma_event event) -- cgit v1.2.3 From 454e2398be9b9fa30433fccc548db34d19aa9958 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 23 Jun 2006 02:02:57 -0700 Subject: [PATCH] VFS: Permit filesystem to override root dentry on mount Extend the get_sb() filesystem operation to take an extra argument that permits the VFS to pass in the target vfsmount that defines the mountpoint. The filesystem is then required to manually set the superblock and root dentry pointers. For most filesystems, this should be done with simple_set_mnt() which will set the superblock pointer and then set the root dentry to the superblock's s_root (as per the old default behaviour). The get_sb() op now returns an integer as there's now no need to return the superblock pointer. This patch permits a superblock to be implicitly shared amongst several mount points, such as can be done with NFS to avoid potential inode aliasing. In such a case, simple_set_mnt() would not be called, and instead the mnt_root and mnt_sb would be set directly. The patch also makes the following changes: (*) the get_sb_*() convenience functions in the core kernel now take a vfsmount pointer argument and return an integer, so most filesystems have to change very little. (*) If one of the convenience function is not used, then get_sb() should normally call simple_set_mnt() to instantiate the vfsmount. This will always return 0, and so can be tail-called from get_sb(). (*) generic_shutdown_super() now calls shrink_dcache_sb() to clean up the dcache upon superblock destruction rather than shrink_dcache_anon(). This is required because the superblock may now have multiple trees that aren't actually bound to s_root, but that still need to be cleaned up. The currently called functions assume that the whole tree is rooted at s_root, and that anonymous dentries are not the roots of trees which results in dentries being left unculled. However, with the way NFS superblock sharing are currently set to be implemented, these assumptions are violated: the root of the filesystem is simply a dummy dentry and inode (the real inode for '/' may well be inaccessible), and all the vfsmounts are rooted on anonymous[*] dentries with child trees. [*] Anonymous until discovered from another tree. (*) The documentation has been adjusted, including the additional bit of changing ext2_* into foo_* in the documentation. [akpm@osdl.org: convert ipath_fs, do other stuff] Signed-off-by: David Howells Acked-by: Al Viro Cc: Nathan Scott Cc: Roland Dreier Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 7 +- Documentation/filesystems/porting | 7 +- Documentation/filesystems/vfs.txt | 4 +- arch/ia64/kernel/perfmon.c | 7 +- arch/powerpc/platforms/cell/spufs/inode.c | 6 +- drivers/infiniband/core/uverbs_main.c | 7 +- drivers/infiniband/hw/ipath/ipath_fs.c | 13 ++-- drivers/isdn/capi/capifs.c | 6 +- drivers/misc/ibmasm/ibmasmfs.c | 7 +- drivers/oprofile/oprofilefs.c | 6 +- drivers/usb/core/inode.c | 6 +- drivers/usb/gadget/inode.c | 6 +- fs/9p/vfs_super.c | 21 +++--- fs/adfs/super.c | 7 +- fs/affs/super.c | 7 +- fs/afs/super.c | 24 ++++--- fs/autofs/init.c | 6 +- fs/autofs4/init.c | 6 +- fs/befs/linuxvfs.c | 7 +- fs/bfs/inode.c | 6 +- fs/binfmt_misc.c | 6 +- fs/block_dev.c | 6 +- fs/cifs/cifsfs.c | 10 +-- fs/coda/inode.c | 6 +- fs/configfs/mount.c | 6 +- fs/cramfs/inode.c | 7 +- fs/dcache.c | 40 ----------- fs/debugfs/inode.c | 8 +-- fs/devfs/base.c | 8 +-- fs/devpts/inode.c | 6 +- fs/efs/super.c | 6 +- fs/eventpoll.c | 13 ++-- fs/ext2/super.c | 6 +- fs/ext3/super.c | 6 +- fs/freevxfs/vxfs_super.c | 7 +- fs/fuse/inode.c | 8 +-- fs/hfs/super.c | 7 +- fs/hfsplus/super.c | 8 ++- fs/hostfs/hostfs_kern.c | 8 +-- fs/hpfs/super.c | 7 +- fs/hppfs/hppfs_kern.c | 8 +-- fs/hugetlbfs/inode.c | 6 +- fs/inotify_user.c | 6 +- fs/isofs/inode.c | 7 +- fs/jffs/inode-v23.c | 7 +- fs/jffs2/super.c | 49 ++++++++------ fs/jfs/super.c | 7 +- fs/libfs.c | 12 ++-- fs/minix/inode.c | 7 +- fs/msdos/namei.c | 9 +-- fs/namespace.c | 9 +++ fs/ncpfs/inode.c | 6 +- fs/nfs/inode.c | 96 +++++++++++++++----------- fs/nfsd/nfsctl.c | 6 +- fs/ntfs/super.c | 7 +- fs/ocfs2/dlm/dlmfs.c | 6 +- fs/ocfs2/super.c | 12 ++-- fs/openpromfs/inode.c | 6 +- fs/pipe.c | 9 ++- fs/proc/root.c | 6 +- fs/qnx4/inode.c | 7 +- fs/ramfs/inode.c | 13 ++-- fs/reiserfs/super.c | 9 +-- fs/romfs/inode.c | 7 +- fs/smbfs/inode.c | 6 +- fs/super.c | 109 +++++++++++++++++------------- fs/sysfs/mount.c | 6 +- fs/sysv/super.c | 13 ++-- fs/udf/super.c | 6 +- fs/ufs/super.c | 6 +- fs/vfat/namei.c | 9 +-- fs/xfs/linux-2.6/xfs_super.c | 8 ++- include/linux/dcache.h | 1 - include/linux/fs.h | 25 ++++--- include/linux/ramfs.h | 4 +- ipc/mqueue.c | 8 +-- kernel/cpuset.c | 8 +-- kernel/futex.c | 8 +-- mm/shmem.c | 6 +- net/socket.c | 7 +- net/sunrpc/rpc_pipe.c | 6 +- security/inode.c | 8 +-- security/selinux/selinuxfs.c | 7 +- 83 files changed, 482 insertions(+), 431 deletions(-) (limited to 'net') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 1045da582b9b..3abf08f1b14a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -142,15 +142,16 @@ see also dquot_operations section. --------------------------- file_system_type --------------------------- prototypes: - struct super_block *(*get_sb) (struct file_system_type *, int, - const char *, void *); + struct int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); locking rules: may block BKL get_sb yes yes kill_sb yes yes -->get_sb() returns error or a locked superblock (exclusive on ->s_umount). +->get_sb() returns error or 0 with locked superblock attached to the vfsmount +(exclusive on ->s_umount). ->kill_sb() takes a write-locked superblock, does all shutdown work on it, unlocks and drops the reference. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 2f388460cbe7..5531694059ab 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -50,10 +50,11 @@ Turn your foo_read_super() into a function that would return 0 in case of success and negative number in case of error (-EINVAL unless you have more informative error value to report). Call it foo_fill_super(). Now declare -struct super_block foo_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +int foo_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, foo_fill_super, + mnt); } (or similar with s/bdev/nodev/ or s/bdev/single/, depending on the kind of diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 3a2e5520c1e3..dd7d0dcedc87 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -113,8 +113,8 @@ members are defined: struct file_system_type { const char *name; int fs_flags; - struct super_block *(*get_sb) (struct file_system_type *, int, - const char *, void *); + struct int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 077f21216b65..2359e2809f50 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -595,10 +595,11 @@ pfm_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, } -static struct super_block * -pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) +static int +pfmfs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC); + return get_sb_pseudo(fs_type, "pfm:", NULL, PFMFS_MAGIC, mnt); } static struct file_system_type pfm_fs_type = { diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index 1987697b23a0..7b4572805db9 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -436,11 +436,11 @@ spufs_fill_super(struct super_block *sb, void *data, int silent) return spufs_create_root(sb, data); } -static struct super_block * +static int spufs_get_sb(struct file_system_type *fstype, int flags, - const char *name, void *data) + const char *name, void *data, struct vfsmount *mnt) { - return get_sb_single(fstype, flags, data, spufs_fill_super); + return get_sb_single(fstype, flags, data, spufs_fill_super, mnt); } static struct file_system_type spufs_type = { diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 5ec2d49e9bb6..e57d3c50f75f 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -821,11 +821,12 @@ static void ib_uverbs_remove_one(struct ib_device *device) kref_put(&uverbs_dev->ref, ib_uverbs_release_dev); } -static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int uverbs_event_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "infinibandevent:", NULL, - INFINIBANDEVENTFS_MAGIC); + INFINIBANDEVENTFS_MAGIC, mnt); } static struct file_system_type uverbs_event_fs = { diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index e274120567e1..63de3046aff3 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -542,13 +542,14 @@ bail: return ret; } -static struct super_block *ipathfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int ipathfs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, struct vfsmount *mnt) { - ipath_super = get_sb_single(fs_type, flags, data, - ipathfs_fill_super); - return ipath_super; + int ret = get_sb_single(fs_type, flags, data, + ipathfs_fill_super, mnt); + if (ret >= 0) + ipath_super = mnt->mnt_sb; + return ret; } static void ipathfs_kill_super(struct super_block *s) diff --git a/drivers/isdn/capi/capifs.c b/drivers/isdn/capi/capifs.c index 0a37aded4b54..9ea6bd0ddc35 100644 --- a/drivers/isdn/capi/capifs.c +++ b/drivers/isdn/capi/capifs.c @@ -121,10 +121,10 @@ fail: return -ENOMEM; } -static struct super_block *capifs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int capifs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, capifs_fill_super); + return get_sb_single(fs_type, flags, data, capifs_fill_super, mnt); } static struct file_system_type capifs_fs_type = { diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index 26a230b6ff80..4a35caff5d02 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -90,10 +90,11 @@ static void ibmasmfs_create_files (struct super_block *sb, struct dentry *root); static int ibmasmfs_fill_super (struct super_block *sb, void *data, int silent); -static struct super_block *ibmasmfs_get_super(struct file_system_type *fst, - int flags, const char *name, void *data) +static int ibmasmfs_get_super(struct file_system_type *fst, + int flags, const char *name, void *data, + struct vfsmount *mnt) { - return get_sb_single(fst, flags, data, ibmasmfs_fill_super); + return get_sb_single(fst, flags, data, ibmasmfs_fill_super, mnt); } static struct super_operations ibmasmfs_s_ops = { diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index b62da9b0cbf0..71c2da277d6e 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -272,10 +272,10 @@ static int oprofilefs_fill_super(struct super_block * sb, void * data, int silen } -static struct super_block *oprofilefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int oprofilefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, oprofilefs_fill_super); + return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt); } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 3cf945cc5b9a..95f5ad923b0f 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -543,10 +543,10 @@ static void fs_remove_file (struct dentry *dentry) /* --------------------------------------------------------------------- */ -static struct super_block *usb_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int usb_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, usbfs_fill_super); + return get_sb_single(fs_type, flags, data, usbfs_fill_super, mnt); } static struct file_system_type usb_fs_type = { diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index aef0722b8f17..3bdc5e3ba234 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -2070,11 +2070,11 @@ enomem0: } /* "mount -t gadgetfs path /dev/gadget" ends up here */ -static struct super_block * +static int gadgetfs_get_sb (struct file_system_type *t, int flags, - const char *path, void *opts) + const char *path, void *opts, struct vfsmount *mnt) { - return get_sb_single (t, flags, opts, gadgetfs_fill_super); + return get_sb_single (t, flags, opts, gadgetfs_fill_super, mnt); } static void diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 61c599b4a1e3..872943004e59 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -99,12 +99,13 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses, * @flags: mount flags * @dev_name: device name that was mounted * @data: mount options + * @mnt: mountpoint record to be instantiated * */ -static struct super_block *v9fs_get_sb(struct file_system_type - *fs_type, int flags, - const char *dev_name, void *data) +static int v9fs_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data, + struct vfsmount *mnt) { struct super_block *sb = NULL; struct v9fs_fcall *fcall = NULL; @@ -123,17 +124,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL); if (!v9ses) - return ERR_PTR(-ENOMEM); + return -ENOMEM; if ((newfid = v9fs_session_init(v9ses, dev_name, data)) < 0) { dprintk(DEBUG_ERROR, "problem initiating session\n"); - sb = ERR_PTR(newfid); + retval = newfid; goto out_free_session; } sb = sget(fs_type, NULL, v9fs_set_super, v9ses); - if (IS_ERR(sb)) + if (IS_ERR(sb)) { + retval = PTR_ERR(sb); goto out_close_session; + } v9fs_fill_super(sb, v9ses, flags); inode = v9fs_get_inode(sb, S_IFDIR | mode); @@ -184,19 +187,19 @@ static struct super_block *v9fs_get_sb(struct file_system_type goto put_back_sb; } - return sb; + return simple_set_mnt(mnt, sb); out_close_session: v9fs_session_close(v9ses); out_free_session: kfree(v9ses); - return sb; + return retval; put_back_sb: /* deactivate_super calls v9fs_kill_super which will frees the rest */ up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(retval); + return retval; } /** diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 252abda0d200..1b58a9b7f6aa 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c @@ -470,10 +470,11 @@ error: return -EINVAL; } -static struct super_block *adfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int adfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super, + mnt); } static struct file_system_type adfs_fs_type = { diff --git a/fs/affs/super.c b/fs/affs/super.c index 4d7e5b19e5cd..6a52e7875403 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -524,10 +524,11 @@ affs_statfs(struct super_block *sb, struct kstatfs *buf) return 0; } -static struct super_block *affs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int affs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super, + mnt); } static struct file_system_type affs_fs_type = { diff --git a/fs/afs/super.c b/fs/afs/super.c index 53c56e7231ab..82468df0ba54 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -38,9 +38,9 @@ struct afs_mount_params { static void afs_i_init_once(void *foo, kmem_cache_t *cachep, unsigned long flags); -static struct super_block *afs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data); +static int afs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt); static struct inode *afs_alloc_inode(struct super_block *sb); @@ -294,10 +294,11 @@ static int afs_fill_super(struct super_block *sb, void *data, int silent) * get an AFS superblock * - TODO: don't use get_sb_nodev(), but rather call sget() directly */ -static struct super_block *afs_get_sb(struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *options) +static int afs_get_sb(struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *options, + struct vfsmount *mnt) { struct afs_mount_params params; struct super_block *sb; @@ -311,7 +312,7 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type, ret = afscm_start(); if (ret < 0) { _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; } /* parse the options */ @@ -348,18 +349,19 @@ static struct super_block *afs_get_sb(struct file_system_type *fs_type, goto error; } sb->s_flags |= MS_ACTIVE; + simple_set_mnt(mnt, sb); afs_put_volume(params.volume); afs_put_cell(params.default_cell); - _leave(" = %p", sb); - return sb; + _leave(" = 0 [%p]", 0, sb); + return 0; error: afs_put_volume(params.volume); afs_put_cell(params.default_cell); afscm_stop(); _leave(" = %d", ret); - return ERR_PTR(ret); + return ret; } /* end afs_get_sb() */ /*****************************************************************************/ diff --git a/fs/autofs/init.c b/fs/autofs/init.c index b977ece69f0c..aca123752406 100644 --- a/fs/autofs/init.c +++ b/fs/autofs/init.c @@ -14,10 +14,10 @@ #include #include "autofs_i.h" -static struct super_block *autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int autofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, autofs_fill_super); + return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt); } static struct file_system_type autofs_fs_type = { diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index acecec8578ce..5d9193332bef 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c @@ -14,10 +14,10 @@ #include #include "autofs_i.h" -static struct super_block *autofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int autofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, autofs4_fill_super); + return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt); } static struct file_system_type autofs_fs_type = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 68ebd10f345d..6ed07a5f10c6 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -899,11 +899,12 @@ befs_statfs(struct super_block *sb, struct kstatfs *buf) return 0; } -static struct super_block * +static int befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super, + mnt); } static struct file_system_type befs_fs_type = { diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 55a7a78332f8..e7da03f63a5a 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c @@ -410,10 +410,10 @@ out: return -EINVAL; } -static struct super_block *bfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt); } static struct file_system_type bfs_fs_type = { diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 599f36fd0f67..07a4996cca3f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -739,10 +739,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent) return err; } -static struct super_block *bm_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bm_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, bm_fill_super); + return get_sb_single(fs_type, flags, data, bm_fill_super, mnt); } static struct linux_binfmt misc_format = { diff --git a/fs/block_dev.c b/fs/block_dev.c index 44aaba202f78..028d9fb9c2d5 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -300,10 +300,10 @@ static struct super_operations bdev_sops = { .clear_inode = bdev_clear_inode, }; -static struct super_block *bd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int bd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576); + return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt); } static struct file_system_type bd_type = { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index c262d8874ce9..08b35801dfed 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -460,9 +460,9 @@ struct super_operations cifs_super_ops = { .remount_fs = cifs_remount, }; -static struct super_block * +static int cifs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { int rc; struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); @@ -470,7 +470,7 @@ cifs_get_sb(struct file_system_type *fs_type, cFYI(1, ("Devname: %s flags: %d ", dev_name, flags)); if (IS_ERR(sb)) - return sb; + return PTR_ERR(sb); sb->s_flags = flags; @@ -478,10 +478,10 @@ cifs_get_sb(struct file_system_type *fs_type, if (rc) { up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(rc); + return rc; } sb->s_flags |= MS_ACTIVE; - return sb; + return simple_set_mnt(mnt, sb); } static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, diff --git a/fs/coda/inode.c b/fs/coda/inode.c index ada1a81df6bd..cba70201567d 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -307,10 +307,10 @@ static int coda_statfs(struct super_block *sb, struct kstatfs *buf) /* init_coda: used by filesystems.c to register coda */ -static struct super_block *coda_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int coda_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, coda_fill_super); + return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt); } struct file_system_type coda_fs_type = { diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index f920d30478e5..94dab7bdd851 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -103,10 +103,10 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *configfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int configfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, configfs_fill_super); + return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt); } static struct file_system_type configfs_fs_type = { diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 9efcc3a164e8..37a91a153aa5 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c @@ -528,10 +528,11 @@ static struct super_operations cramfs_ops = { .statfs = cramfs_statfs, }; -static struct super_block *cramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int cramfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super, + mnt); } static struct file_system_type cramfs_fs_type = { diff --git a/fs/dcache.c b/fs/dcache.c index 59dbc92c2079..313b54b2b8f2 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -687,46 +687,6 @@ void shrink_dcache_parent(struct dentry * parent) prune_dcache(found, parent->d_sb); } -/** - * shrink_dcache_anon - further prune the cache - * @head: head of d_hash list of dentries to prune - * - * Prune the dentries that are anonymous - * - * parsing d_hash list does not hlist_for_each_entry_rcu() as it - * done under dcache_lock. - * - */ -void shrink_dcache_anon(struct super_block *sb) -{ - struct hlist_node *lp; - struct hlist_head *head = &sb->s_anon; - int found; - do { - found = 0; - spin_lock(&dcache_lock); - hlist_for_each(lp, head) { - struct dentry *this = hlist_entry(lp, struct dentry, d_hash); - if (!list_empty(&this->d_lru)) { - dentry_stat.nr_unused--; - list_del_init(&this->d_lru); - } - - /* - * move only zero ref count dentries to the end - * of the unused list for prune_dcache - */ - if (!atomic_read(&this->d_count)) { - list_add_tail(&this->d_lru, &dentry_unused); - dentry_stat.nr_unused++; - found++; - } - } - spin_unlock(&dcache_lock); - prune_dcache(found, sb); - } while(found); -} - /* * Scan `nr' dentries and return the number which remain. * diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b55b4ea9a676..440128ebef3b 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -111,11 +111,11 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files); } -static struct super_block *debug_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int debug_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, debug_fill_super); + return get_sb_single(fs_type, flags, data, debug_fill_super, mnt); } static struct file_system_type debug_fs_type = { diff --git a/fs/devfs/base.c b/fs/devfs/base.c index 52f5059c4f31..51a97f132745 100644 --- a/fs/devfs/base.c +++ b/fs/devfs/base.c @@ -2549,11 +2549,11 @@ static int devfs_fill_super(struct super_block *sb, void *data, int silent) return -EINVAL; } /* End Function devfs_fill_super */ -static struct super_block *devfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int devfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, devfs_fill_super); + return get_sb_single(fs_type, flags, data, devfs_fill_super, mnt); } static struct file_system_type devfs_fs_type = { diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 14c5620b5cab..f7aef5bb584a 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -130,10 +130,10 @@ fail: return -ENOMEM; } -static struct super_block *devpts_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int devpts_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, devpts_fill_super); + return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt); } static struct file_system_type devpts_fs_type = { diff --git a/fs/efs/super.c b/fs/efs/super.c index dff623e3ddbf..1ba5e14f879f 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c @@ -18,10 +18,10 @@ static int efs_statfs(struct super_block *s, struct kstatfs *buf); static int efs_fill_super(struct super_block *s, void *d, int silent); -static struct super_block *efs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int efs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt); } static struct file_system_type efs_fs_type = { diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 2695337d4d64..08e7e6a555ca 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -268,9 +268,9 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, int maxevents, long timeout); static int eventpollfs_delete_dentry(struct dentry *dentry); static struct inode *ep_eventpoll_inode(void); -static struct super_block *eventpollfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data); +static int eventpollfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt); /* * This semaphore is used to serialize ep_free() and eventpoll_release_file(). @@ -1595,11 +1595,12 @@ eexit_1: } -static struct super_block * +static int eventpollfs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC); + return get_sb_pseudo(fs_type, "eventpoll:", NULL, EVENTPOLLFS_MAGIC, + mnt); } diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 7e30bae174ed..a4dfffac5967 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -1087,10 +1087,10 @@ static int ext2_statfs (struct super_block * sb, struct kstatfs * buf) return 0; } -static struct super_block *ext2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ext2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt); } #ifdef CONFIG_QUOTA diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f8a5266ea1ff..657f8e73b62f 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -2646,10 +2646,10 @@ out: #endif -static struct super_block *ext3_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ext3_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt); } static struct file_system_type ext3_fs_type = { diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index b44c916d24a1..d76eeaafbde2 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c @@ -241,10 +241,11 @@ out: /* * The usual module blurb. */ -static struct super_block *vxfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int vxfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super, + mnt); } static struct file_system_type vxfs_fs_type = { diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 7627022446b2..c91f0a50aadb 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -569,11 +569,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) return err; } -static struct super_block *fuse_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *raw_data) +static int fuse_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *raw_data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super); + return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt); } static struct file_system_type fuse_fs_type = { diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 1181d116117d..ee5b80a409e8 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c @@ -413,10 +413,11 @@ bail: return res; } -static struct super_block *hfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt); } static struct file_system_type hfs_fs_type = { diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 7843f792a4b7..0ed8b7e8e87f 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -450,10 +450,12 @@ static void hfsplus_destroy_inode(struct inode *inode) #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) -static struct super_block *hfsplus_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hfsplus_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super, + mnt); } static struct file_system_type hfsplus_fs_type = { diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index bf0f8e16e433..04035e08f5c1 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -993,11 +993,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) return(err); } -static struct super_block *hostfs_read_sb(struct file_system_type *type, - int flags, const char *dev_name, - void *data) +static int hostfs_read_sb(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); + return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt); } static struct file_system_type hostfs_type = { diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index d72d8c87c996..3b25cf3e2e65 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -662,10 +662,11 @@ bail0: return -EINVAL; } -static struct super_block *hpfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hpfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super, + mnt); } static struct file_system_type hpfs_fs_type = { diff --git a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c index 5e6363be246f..ec43c22bc9c0 100644 --- a/fs/hppfs/hppfs_kern.c +++ b/fs/hppfs/hppfs_kern.c @@ -769,11 +769,11 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) return(err); } -static struct super_block *hppfs_read_super(struct file_system_type *type, - int flags, const char *dev_name, - void *data) +static int hppfs_read_super(struct file_system_type *type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return(get_sb_nodev(type, flags, data, hppfs_fill_super)); + return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt); } static struct file_system_type hppfs_type = { diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3a5b4e923455..4665c26171f7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -723,10 +723,10 @@ void hugetlb_put_quota(struct address_space *mapping) } } -static struct super_block *hugetlbfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int hugetlbfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super); + return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt); } static struct file_system_type hugetlbfs_fs_type = { diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 9e9931e2badd..f2386442adee 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c @@ -672,11 +672,11 @@ out: return ret; } -static struct super_block * +static int inotify_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) + const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA); + return get_sb_pseudo(fs_type, "inotify", NULL, 0xBAD1DEA, mnt); } static struct file_system_type inotify_fs_type = { diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 70adbb98bad1..17268da63a49 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1399,10 +1399,11 @@ struct inode *isofs_iget(struct super_block *sb, return inode; } -static struct super_block *isofs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int isofs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super, + mnt); } static struct file_system_type iso9660_fs_type = { diff --git a/fs/jffs/inode-v23.c b/fs/jffs/inode-v23.c index 020cc097c539..dd93a091ad67 100644 --- a/fs/jffs/inode-v23.c +++ b/fs/jffs/inode-v23.c @@ -1785,10 +1785,11 @@ static struct super_operations jffs_ops = .remount_fs = jffs_remount, }; -static struct super_block *jffs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int jffs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, jffs_fill_super, + mnt); } static struct file_system_type jffs_fs_type = { diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 9d0521451f59..2378a662c256 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -111,9 +111,10 @@ static int jffs2_sb_set(struct super_block *sb, void *data) return 0; } -static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, struct mtd_info *mtd) +static int jffs2_get_sb_mtd(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct mtd_info *mtd, + struct vfsmount *mnt) { struct super_block *sb; struct jffs2_sb_info *c; @@ -121,19 +122,20 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, c = kmalloc(sizeof(*c), GFP_KERNEL); if (!c) - return ERR_PTR(-ENOMEM); + return -ENOMEM; memset(c, 0, sizeof(*c)); c->mtd = mtd; sb = sget(fs_type, jffs2_sb_compare, jffs2_sb_set, c); if (IS_ERR(sb)) - goto out_put; + goto out_error; if (sb->s_root) { /* New mountpoint for JFFS2 which is already mounted */ D1(printk(KERN_DEBUG "jffs2_get_sb_mtd(): Device %d (\"%s\") is already mounted\n", mtd->index, mtd->name)); + ret = simple_set_mnt(mnt, sb); goto out_put; } @@ -161,44 +163,47 @@ static struct super_block *jffs2_get_sb_mtd(struct file_system_type *fs_type, /* Failure case... */ up_write(&sb->s_umount); deactivate_super(sb); - return ERR_PTR(ret); + return ret; } sb->s_flags |= MS_ACTIVE; - return sb; + return simple_set_mnt(mnt, sb); +out_error: + ret = PTR_ERR(sb); out_put: kfree(c); put_mtd_device(mtd); - return sb; + return ret; } -static struct super_block *jffs2_get_sb_mtdnr(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data, int mtdnr) +static int jffs2_get_sb_mtdnr(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, int mtdnr, + struct vfsmount *mnt) { struct mtd_info *mtd; mtd = get_mtd_device(NULL, mtdnr); if (!mtd) { D1(printk(KERN_DEBUG "jffs2: MTD device #%u doesn't appear to exist\n", mtdnr)); - return ERR_PTR(-EINVAL); + return -EINVAL; } - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); + return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); } -static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int jffs2_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { int err; struct nameidata nd; int mtdnr; if (!dev_name) - return ERR_PTR(-EINVAL); + return -EINVAL; D1(printk(KERN_DEBUG "jffs2_get_sb(): dev_name \"%s\"\n", dev_name)); @@ -220,7 +225,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, mtd = get_mtd_device(NULL, mtdnr); if (mtd) { if (!strcmp(mtd->name, dev_name+4)) - return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd); + return jffs2_get_sb_mtd(fs_type, flags, dev_name, data, mtd, mnt); put_mtd_device(mtd); } } @@ -233,7 +238,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, if (!*endptr) { /* It was a valid number */ D1(printk(KERN_DEBUG "jffs2_get_sb(): mtd%%d, mtdnr %d\n", mtdnr)); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); + return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); } } } @@ -247,7 +252,7 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, err, nd.dentry->d_inode)); if (err) - return ERR_PTR(err); + return err; err = -EINVAL; @@ -269,11 +274,11 @@ static struct super_block *jffs2_get_sb(struct file_system_type *fs_type, mtdnr = iminor(nd.dentry->d_inode); path_release(&nd); - return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr); + return jffs2_get_sb_mtdnr(fs_type, flags, dev_name, data, mtdnr, mnt); out: path_release(&nd); - return ERR_PTR(err); + return err; } static void jffs2_put_super (struct super_block *sb) diff --git a/fs/jfs/super.c b/fs/jfs/super.c index db6f41d6dd60..18a28137b90e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c @@ -565,10 +565,11 @@ static void jfs_unlockfs(struct super_block *sb) } } -static struct super_block *jfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int jfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super, + mnt); } static int jfs_sync_fs(struct super_block *sb, int wait) diff --git a/fs/libfs.c b/fs/libfs.c index 7145ba7a48d0..7d70efa46da9 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -196,9 +196,9 @@ struct inode_operations simple_dir_inode_operations = { * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that * will never be mountable) */ -struct super_block * -get_sb_pseudo(struct file_system_type *fs_type, char *name, - struct super_operations *ops, unsigned long magic) +int get_sb_pseudo(struct file_system_type *fs_type, char *name, + struct super_operations *ops, unsigned long magic, + struct vfsmount *mnt) { struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); static struct super_operations default_ops = {.statfs = simple_statfs}; @@ -207,7 +207,7 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name, struct qstr d_name = {.name = name, .len = strlen(name)}; if (IS_ERR(s)) - return s; + return PTR_ERR(s); s->s_flags = MS_NOUSER; s->s_maxbytes = ~0ULL; @@ -232,12 +232,12 @@ get_sb_pseudo(struct file_system_type *fs_type, char *name, d_instantiate(dentry, root); s->s_root = dentry; s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); Enomem: up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(-ENOMEM); + return -ENOMEM; } int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 2dcccf1d1b7f..14f24dfbfe30 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -559,10 +559,11 @@ void minix_truncate(struct inode * inode) V2_minix_truncate(inode); } -static struct super_block *minix_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int minix_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super, + mnt); } static struct file_system_type minix_fs_type = { diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index 5b76ccd19e3f..9e44158a7540 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -661,11 +661,12 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *msdos_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int msdos_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super, + mnt); } static struct file_system_type msdos_fs_type = { diff --git a/fs/namespace.c b/fs/namespace.c index bf478addb852..c13072a5f1ee 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -86,6 +86,15 @@ struct vfsmount *alloc_vfsmnt(const char *name) return mnt; } +int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb) +{ + mnt->mnt_sb = sb; + mnt->mnt_root = dget(sb->s_root); + return 0; +} + +EXPORT_SYMBOL(simple_set_mnt); + void free_vfsmnt(struct vfsmount *mnt) { kfree(mnt->mnt_devname); diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index a1f3e972c6ef..8db033fab3fd 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -957,10 +957,10 @@ out: return result; } -static struct super_block *ncp_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ncp_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, ncp_fill_super); + return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt); } static struct file_system_type ncp_fs_type = { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index d0b991a92327..ff645a961bc8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1690,8 +1690,8 @@ static int nfs_compare_super(struct super_block *sb, void *data) return !nfs_compare_fh(&old->fh, &server->fh); } -static struct super_block *nfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int nfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { int error; struct nfs_server *server = NULL; @@ -1699,14 +1699,14 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - s = ERR_PTR(-EINVAL); + error = -EINVAL; if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { dprintk("%s: bad mount version\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } switch (data->version) { case 1: @@ -1718,7 +1718,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support NFSv3\n", __FUNCTION__, data->version); - goto out_err; + goto out_err_noserver; } data->root.size = NFS2_FHSIZE; memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); @@ -1727,24 +1727,24 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, dprintk("%s: mount structure version %d does not support strong security\n", __FUNCTION__, data->version); - goto out_err; + goto out_err_noserver; } case 5: memset(data->context, 0, sizeof(data->context)); } #ifndef CONFIG_NFS_V3 /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ - s = ERR_PTR(-EPROTONOSUPPORT); + error = -EPROTONOSUPPORT; if (data->flags & NFS_MOUNT_VER3) { dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); - goto out_err; + goto out_err_noserver; } #endif /* CONFIG_NFS_V3 */ - s = ERR_PTR(-ENOMEM); + error = -ENOMEM; server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - goto out_err; + goto out_err_noserver; /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -1754,7 +1754,7 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, root->size = data->root.size; else root->size = NFS2_FHSIZE; - s = ERR_PTR(-EINVAL); + error = -EINVAL; if (root->size > sizeof(root->data)) { dprintk("%s: invalid root filehandle\n", __FUNCTION__); goto out_err; @@ -1770,15 +1770,20 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, } /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); goto out_err; } s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - if (IS_ERR(s) || s->s_root) + if (IS_ERR(s)) { + error = PTR_ERR(s); + goto out_err_rpciod; + } + + if (s->s_root) goto out_rpciod_down; s->s_flags = flags; @@ -1787,15 +1792,22 @@ static struct super_block *nfs_get_sb(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); + out_rpciod_down: rpciod_down(); + kfree(server); + return simple_set_mnt(mnt, s); + +out_err_rpciod: + rpciod_down(); out_err: kfree(server); - return s; +out_err_noserver: + return error; } static void nfs_kill_super(struct super_block *s) @@ -2032,8 +2044,8 @@ nfs_copy_user_string(char *dst, struct nfs_string *src, int maxlen) return dst; } -static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *raw_data) +static int nfs4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) { int error; struct nfs_server *server; @@ -2043,16 +2055,16 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, if (data == NULL) { dprintk("%s: missing data argument\n", __FUNCTION__); - return ERR_PTR(-EINVAL); + return -EINVAL; } if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { dprintk("%s: bad mount version\n", __FUNCTION__); - return ERR_PTR(-EINVAL); + return -EINVAL; } server = kzalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + return -ENOMEM; /* Zero out the NFS state stuff */ init_nfsv4_state(server); server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); @@ -2074,33 +2086,41 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, /* We now require that the mount process passes the remote address */ if (data->host_addrlen != sizeof(server->addr)) { - s = ERR_PTR(-EINVAL); + error = -EINVAL; goto out_free; } if (copy_from_user(&server->addr, data->host_addr, sizeof(server->addr))) { - s = ERR_PTR(-EFAULT); + error = -EFAULT; goto out_free; } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { dprintk("%s: mount program didn't pass remote IP address!\n", __FUNCTION__); - s = ERR_PTR(-EINVAL); + error = -EINVAL; goto out_free; } /* Fire up rpciod if not yet running */ - s = ERR_PTR(rpciod_up()); - if (IS_ERR(s)) { - dprintk("%s: couldn't start rpciod! Error = %ld\n", - __FUNCTION__, PTR_ERR(s)); + error = rpciod_up(); + if (error < 0) { + dprintk("%s: couldn't start rpciod! Error = %d\n", + __FUNCTION__, error); goto out_free; } s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) + if (IS_ERR(s)) { + error = PTR_ERR(s); goto out_free; + } + + if (s->s_root) { + kfree(server->mnt_path); + kfree(server->hostname); + kfree(server); + return simple_set_mnt(mnt, s); + } s->s_flags = flags; @@ -2108,17 +2128,17 @@ static struct super_block *nfs4_get_sb(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); out_err: - s = (struct super_block *)p; + error = PTR_ERR(p); out_free: kfree(server->mnt_path); kfree(server->hostname); kfree(server); - return s; + return error; } static void nfs4_kill_super(struct super_block *sb) diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3ef017b3b5bd..a1810e6a93e5 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -494,10 +494,10 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) return simple_fill_super(sb, 0x6e667364, nfsd_files); } -static struct super_block *nfsd_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int nfsd_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, nfsd_fill_super); + return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt); } static struct file_system_type nfsd_fs_type = { diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 27833f6df49f..d5d5e969294f 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -3093,10 +3093,11 @@ struct kmem_cache *ntfs_index_ctx_cache; /* Driver wide mutex. */ DEFINE_MUTEX(ntfs_lock); -static struct super_block *ntfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ntfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super, + mnt); } static struct file_system_type ntfs_fs_type = { diff --git a/fs/ocfs2/dlm/dlmfs.c b/fs/ocfs2/dlm/dlmfs.c index 7e88e24b3471..7273d9fa6bab 100644 --- a/fs/ocfs2/dlm/dlmfs.c +++ b/fs/ocfs2/dlm/dlmfs.c @@ -574,10 +574,10 @@ static struct inode_operations dlmfs_file_inode_operations = { .getattr = simple_getattr, }; -static struct super_block *dlmfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int dlmfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super); + return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt); } static struct file_system_type dlmfs_fs_type = { diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 949b3dac30f1..788b8b50dc4c 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -672,12 +672,14 @@ read_super_error: return status; } -static struct super_block *ocfs2_get_sb(struct file_system_type *fs_type, - int flags, - const char *dev_name, - void *data) +static int ocfs2_get_sb(struct file_system_type *fs_type, + int flags, + const char *dev_name, + void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super, + mnt); } static struct file_system_type ocfs2_fs_type = { diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 0f14276a2e51..464e2bce0203 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -1054,10 +1054,10 @@ out_no_root: return -ENOMEM; } -static struct super_block *openprom_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int openprom_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, openprom_fill_super); + return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt); } static struct file_system_type openprom_fs_type = { diff --git a/fs/pipe.c b/fs/pipe.c index 5acd8954aaa0..20352573e025 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -979,12 +979,11 @@ no_files: * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ - -static struct super_block * -pipefs_get_sb(struct file_system_type *fs_type, int flags, - const char *dev_name, void *data) +static int pipefs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); + return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); } static struct file_system_type pipe_fs_type = { diff --git a/fs/proc/root.c b/fs/proc/root.c index c3fd3611112f..9995356ce73e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -26,10 +26,10 @@ struct proc_dir_entry *proc_net, *proc_net_stat, *proc_bus, *proc_root_fs, *proc struct proc_dir_entry *proc_sys_root; #endif -static struct super_block *proc_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int proc_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, proc_fill_super); + return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); } static struct file_system_type proc_fs_type = { diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 2ecd46f85e9f..e6cca5cd4b44 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -561,10 +561,11 @@ static void destroy_inodecache(void) "qnx4_inode_cache: not all structures were freed\n"); } -static struct super_block *qnx4_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int qnx4_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super, + mnt); } static struct file_system_type qnx4_fs_type = { diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index 14bd2246fb6d..b9677335cc8d 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -185,16 +185,17 @@ static int ramfs_fill_super(struct super_block * sb, void * data, int silent) return 0; } -struct super_block *ramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +int ramfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, ramfs_fill_super); + return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt); } -static struct super_block *rootfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int rootfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super); + return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super, + mnt); } static struct file_system_type ramfs_fs_type = { diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index cae2abbc0c71..f3ff41d33989 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -2249,11 +2249,12 @@ static ssize_t reiserfs_quota_write(struct super_block *sb, int type, #endif -static struct super_block *get_super_block(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int get_super_block(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super, + mnt); } static int __init init_reiserfs_fs(void) diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 9b9eda7b335c..4d6cd6621062 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -607,10 +607,11 @@ static struct super_operations romfs_ops = { .remount_fs = romfs_remount, }; -static struct super_block *romfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int romfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, romfs_fill_super, + mnt); } static struct file_system_type romfs_fs_type = { diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index fdeabc0a34f7..4a37c2bbfa3f 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c @@ -782,10 +782,10 @@ out: return error; } -static struct super_block *smb_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int smb_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, smb_fill_super); + return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt); } static struct file_system_type smb_fs_type = { diff --git a/fs/super.c b/fs/super.c index 9d5c2add7228..324c2d232f54 100644 --- a/fs/super.c +++ b/fs/super.c @@ -231,7 +231,7 @@ void generic_shutdown_super(struct super_block *sb) if (root) { sb->s_root = NULL; shrink_dcache_parent(root); - shrink_dcache_anon(sb); + shrink_dcache_sb(sb); dput(root); fsync_super(sb); lock_super(sb); @@ -676,9 +676,10 @@ static void bdev_uevent(struct block_device *bdev, enum kobject_action action) } } -struct super_block *get_sb_bdev(struct file_system_type *fs_type, +int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { struct block_device *bdev; struct super_block *s; @@ -686,7 +687,7 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, bdev = open_bdev_excl(dev_name, flags, fs_type); if (IS_ERR(bdev)) - return (struct super_block *)bdev; + return PTR_ERR(bdev); /* * once the super is inserted into the list by sget, s_umount @@ -697,15 +698,17 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, s = sget(fs_type, test_bdev_super, set_bdev_super, bdev); mutex_unlock(&bdev->bd_mount_mutex); if (IS_ERR(s)) - goto out; + goto error_s; if (s->s_root) { if ((flags ^ s->s_flags) & MS_RDONLY) { up_write(&s->s_umount); deactivate_super(s); - s = ERR_PTR(-EBUSY); + error = -EBUSY; + goto error_bdev; } - goto out; + + close_bdev_excl(bdev); } else { char b[BDEVNAME_SIZE]; @@ -716,18 +719,21 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - s = ERR_PTR(error); - } else { - s->s_flags |= MS_ACTIVE; - bdev_uevent(bdev, KOBJ_MOUNT); + goto error; } + + s->s_flags |= MS_ACTIVE; + bdev_uevent(bdev, KOBJ_MOUNT); } - return s; + return simple_set_mnt(mnt, s); -out: +error_s: + error = PTR_ERR(s); +error_bdev: close_bdev_excl(bdev); - return s; +error: + return error; } EXPORT_SYMBOL(get_sb_bdev); @@ -744,15 +750,16 @@ void kill_block_super(struct super_block *sb) EXPORT_SYMBOL(kill_block_super); -struct super_block *get_sb_nodev(struct file_system_type *fs_type, +int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { int error; struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL); if (IS_ERR(s)) - return s; + return PTR_ERR(s); s->s_flags = flags; @@ -760,10 +767,10 @@ struct super_block *get_sb_nodev(struct file_system_type *fs_type, if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; - return s; + return simple_set_mnt(mnt, s); } EXPORT_SYMBOL(get_sb_nodev); @@ -773,94 +780,102 @@ static int compare_single(struct super_block *s, void *p) return 1; } -struct super_block *get_sb_single(struct file_system_type *fs_type, +int get_sb_single(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)) + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt) { struct super_block *s; int error; s = sget(fs_type, compare_single, set_anon_super, NULL); if (IS_ERR(s)) - return s; + return PTR_ERR(s); if (!s->s_root) { s->s_flags = flags; error = fill_super(s, data, flags & MS_SILENT ? 1 : 0); if (error) { up_write(&s->s_umount); deactivate_super(s); - return ERR_PTR(error); + return error; } s->s_flags |= MS_ACTIVE; } do_remount_sb(s, flags, data, 0); - return s; + return simple_set_mnt(mnt, s); } EXPORT_SYMBOL(get_sb_single); struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) +vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data) { - struct file_system_type *type = get_fs_type(fstype); - struct super_block *sb = ERR_PTR(-ENOMEM); struct vfsmount *mnt; - int error; char *secdata = NULL; + int error; if (!type) return ERR_PTR(-ENODEV); + error = -ENOMEM; mnt = alloc_vfsmnt(name); if (!mnt) goto out; if (data) { secdata = alloc_secdata(); - if (!secdata) { - sb = ERR_PTR(-ENOMEM); + if (!secdata) goto out_mnt; - } error = security_sb_copy_data(type, data, secdata); - if (error) { - sb = ERR_PTR(error); + if (error) goto out_free_secdata; - } } - sb = type->get_sb(type, flags, name, data); - if (IS_ERR(sb)) + error = type->get_sb(type, flags, name, data, mnt); + if (error < 0) goto out_free_secdata; - error = security_sb_kern_mount(sb, secdata); + + error = security_sb_kern_mount(mnt->mnt_sb, secdata); if (error) goto out_sb; - mnt->mnt_sb = sb; - mnt->mnt_root = dget(sb->s_root); - mnt->mnt_mountpoint = sb->s_root; + + mnt->mnt_mountpoint = mnt->mnt_root; mnt->mnt_parent = mnt; - up_write(&sb->s_umount); + up_write(&mnt->mnt_sb->s_umount); free_secdata(secdata); - put_filesystem(type); return mnt; out_sb: - up_write(&sb->s_umount); - deactivate_super(sb); - sb = ERR_PTR(error); + dput(mnt->mnt_root); + up_write(&mnt->mnt_sb->s_umount); + deactivate_super(mnt->mnt_sb); out_free_secdata: free_secdata(secdata); out_mnt: free_vfsmnt(mnt); out: + return ERR_PTR(error); +} + +EXPORT_SYMBOL_GPL(vfs_kern_mount); + +struct vfsmount * +do_kern_mount(const char *fstype, int flags, const char *name, void *data) +{ + struct file_system_type *type = get_fs_type(fstype); + struct vfsmount *mnt; + if (!type) + return ERR_PTR(-ENODEV); + mnt = vfs_kern_mount(type, flags, name, data); put_filesystem(type); - return (struct vfsmount *)sb; + return mnt; } EXPORT_SYMBOL_GPL(do_kern_mount); struct vfsmount *kern_mount(struct file_system_type *type) { - return do_kern_mount(type->name, 0, type->name, NULL); + return vfs_kern_mount(type, 0, type->name, NULL); } EXPORT_SYMBOL(kern_mount); diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index f1117e885bd6..40190c489271 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -66,10 +66,10 @@ static int sysfs_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *sysfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, sysfs_fill_super); + return get_sb_single(fs_type, flags, data, sysfs_fill_super, mnt); } static struct file_system_type sysfs_fs_type = { diff --git a/fs/sysv/super.c b/fs/sysv/super.c index e92b991e6dda..876639b93321 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -506,16 +506,17 @@ failed: /* Every kernel module contains stuff like this. */ -static struct super_block *sysv_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sysv_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super, + mnt); } -static struct super_block *v7_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int v7_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt); } static struct file_system_type sysv_fs_type = { diff --git a/fs/udf/super.c b/fs/udf/super.c index e45789fe38e8..2250774a831d 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -94,10 +94,10 @@ static unsigned int udf_count_free(struct super_block *); static int udf_statfs(struct super_block *, struct kstatfs *); /* UDF filesystem type */ -static struct super_block *udf_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int udf_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt); } static struct file_system_type udf_fstype = { diff --git a/fs/ufs/super.c b/fs/ufs/super.c index db98a4c71e63..768fb8d9e67a 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c @@ -1311,10 +1311,10 @@ out: #endif -static struct super_block *ufs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int ufs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt); } static struct file_system_type ufs_fs_type = { diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index a56cec3be5f0..9a8f48bae956 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -1023,11 +1023,12 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *vfat_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int vfat_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super, + mnt); } static struct file_system_type vfat_fs_type = { diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index f2a0778536f4..d03c89a36655 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -853,14 +853,16 @@ fail_vfsop: return -error; } -STATIC struct super_block * +STATIC int xfs_fs_get_sb( struct file_system_type *fs_type, int flags, const char *dev_name, - void *data) + void *data, + struct vfsmount *mnt) { - return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super); + return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super, + mnt); } STATIC struct super_operations xfs_super_operations = { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 46d0e079735d..0dd1610a94a9 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -217,7 +217,6 @@ extern struct dentry * d_alloc_anon(struct inode *); extern struct dentry * d_splice_alias(struct inode *, struct dentry *); extern void shrink_dcache_sb(struct super_block *); extern void shrink_dcache_parent(struct dentry *); -extern void shrink_dcache_anon(struct super_block *); extern int d_invalidate(struct dentry *); /* only used at mount-time */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 73c7d6f04b31..3e50dd24af87 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1269,23 +1269,26 @@ find_exported_dentry(struct super_block *sb, void *obj, void *parent, struct file_system_type { const char *name; int fs_flags; - struct super_block *(*get_sb) (struct file_system_type *, int, - const char *, void *); + int (*get_sb) (struct file_system_type *, int, + const char *, void *, struct vfsmount *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; struct list_head fs_supers; }; -struct super_block *get_sb_bdev(struct file_system_type *fs_type, +extern int get_sb_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int)); -struct super_block *get_sb_single(struct file_system_type *fs_type, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt); +extern int get_sb_single(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); -struct super_block *get_sb_nodev(struct file_system_type *fs_type, + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt); +extern int get_sb_nodev(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int)); + int (*fill_super)(struct super_block *, void *, int), + struct vfsmount *mnt); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); @@ -1296,8 +1299,10 @@ struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), void *data); -struct super_block *get_sb_pseudo(struct file_system_type *, char *, - struct super_operations *ops, unsigned long); +extern int get_sb_pseudo(struct file_system_type *, char *, + struct super_operations *ops, unsigned long, + struct vfsmount *mnt); +extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); int __put_super(struct super_block *sb); int __put_super_and_need_restart(struct super_block *sb); void unnamed_dev_init(void); diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h index 78ecfa28b1c2..00b340ba6612 100644 --- a/include/linux/ramfs.h +++ b/include/linux/ramfs.h @@ -2,8 +2,8 @@ #define _LINUX_RAMFS_H struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev); -struct super_block *ramfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data); +extern int ramfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt); #ifndef CONFIG_MMU extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file, diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 1511714a9585..0a2a24b6ebe4 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -205,11 +205,11 @@ static int mqueue_fill_super(struct super_block *sb, void *data, int silent) return 0; } -static struct super_block *mqueue_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int mqueue_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, mqueue_fill_super); + return get_sb_single(fs_type, flags, data, mqueue_fill_super, mnt); } static void init_once(void *foo, kmem_cache_t * cachep, unsigned long flags) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index ab81fdd4572b..77f45ffd5ea1 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -392,11 +392,11 @@ static int cpuset_fill_super(struct super_block *sb, void *unused_data, return 0; } -static struct super_block *cpuset_get_sb(struct file_system_type *fs_type, - int flags, const char *unused_dev_name, - void *data) +static int cpuset_get_sb(struct file_system_type *fs_type, + int flags, const char *unused_dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, cpuset_fill_super); + return get_sb_single(fs_type, flags, data, cpuset_fill_super, mnt); } static struct file_system_type cpuset_fs_type = { diff --git a/kernel/futex.c b/kernel/futex.c index 5699c512057b..e1a380c77a5a 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1056,11 +1056,11 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, int val, (unsigned long)uaddr2, val2, val3); } -static struct super_block * -futexfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int futexfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA); + return get_sb_pseudo(fs_type, "futex", NULL, 0xBAD1DEA, mnt); } static struct file_system_type futex_fs_type = { diff --git a/mm/shmem.c b/mm/shmem.c index 1e43c8a865ba..7617bb1c6bf7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2233,10 +2233,10 @@ static struct vm_operations_struct shmem_vm_ops = { }; -static struct super_block *shmem_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int shmem_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_nodev(fs_type, flags, data, shmem_fill_super); + return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt); } static struct file_system_type tmpfs_fs_type = { diff --git a/net/socket.c b/net/socket.c index 02948b622bd2..565f5e8d1191 100644 --- a/net/socket.c +++ b/net/socket.c @@ -335,10 +335,11 @@ static struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static struct super_block *sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sockfs_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC); + return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, + mnt); } static struct vfsmount *sock_mnt __read_mostly; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index cc673dd8433f..8241fa726803 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -815,11 +815,11 @@ out: return -ENOMEM; } -static struct super_block * +static int rpc_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, rpc_fill_super); + return get_sb_single(fs_type, flags, data, rpc_fill_super, mnt); } static struct file_system_type rpc_pipe_fs_type = { diff --git a/security/inode.c b/security/inode.c index 0f77b0223662..e6fc29ac8564 100644 --- a/security/inode.c +++ b/security/inode.c @@ -135,11 +135,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) return simple_fill_super(sb, SECURITYFS_MAGIC, files); } -static struct super_block *get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, - void *data) +static int get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, + void *data, struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, fill_super); + return get_sb_single(fs_type, flags, data, fill_super, mnt); } static struct file_system_type fs_type = { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 2e73d3279f2d..7029bbc9bef8 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -1345,10 +1345,11 @@ err: goto out; } -static struct super_block *sel_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data) +static int sel_get_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + struct vfsmount *mnt) { - return get_sb_single(fs_type, flags, data, sel_fill_super); + return get_sb_single(fs_type, flags, data, sel_fill_super, mnt); } static struct file_system_type sel_fs_type = { -- cgit v1.2.3 From 538c5902b81cc384e93ad3834b6d4a0b3fcb2285 Mon Sep 17 00:00:00 2001 From: Jean-Luc Leger Date: Fri, 23 Jun 2006 02:05:22 -0700 Subject: [PATCH] clean up default value of IP_DCCP_ACKVEC Default values for boolean and tristate options can only be 'y', 'm' or 'n'. This patch removes wrong default for IP_DCCP_ACKVEC. Signed-off-by: Jean-Luc Leger Cc: Arnaldo Carvalho de Melo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- net/dccp/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig index 7e096ba8454f..859e3359fcda 100644 --- a/net/dccp/Kconfig +++ b/net/dccp/Kconfig @@ -26,7 +26,7 @@ config INET_DCCP_DIAG config IP_DCCP_ACKVEC depends on IP_DCCP - def_bool N + bool source "net/dccp/ccids/Kconfig" -- cgit v1.2.3 From 626ab0e69d376fa07599af669af8ba92d58e87c1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 23 Jun 2006 02:05:55 -0700 Subject: [PATCH] list: use list_replace_init() instead of list_splice_init() list_splice_init(list, head) does unneeded job if it is known that list_empty(head) == 1. We can use list_replace_init() instead. Signed-off-by: Oleg Nesterov Acked-by: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/i386/mm/pageattr.c | 8 ++++---- block/ll_rw_blk.c | 5 ++--- fs/aio.c | 4 ++-- kernel/timer.c | 8 ++++---- kernel/workqueue.c | 4 ++-- net/core/dev.c | 6 +++--- net/core/link_watch.c | 5 ++--- 7 files changed, 19 insertions(+), 21 deletions(-) (limited to 'net') diff --git a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c index 92c3d9f0e731..0887b34bc59b 100644 --- a/arch/i386/mm/pageattr.c +++ b/arch/i386/mm/pageattr.c @@ -209,19 +209,19 @@ int change_page_attr(struct page *page, int numpages, pgprot_t prot) } void global_flush_tlb(void) -{ - LIST_HEAD(l); +{ + struct list_head l; struct page *pg, *next; BUG_ON(irqs_disabled()); spin_lock_irq(&cpa_lock); - list_splice_init(&df_list, &l); + list_replace_init(&df_list, &l); spin_unlock_irq(&cpa_lock); flush_map(); list_for_each_entry_safe(pg, next, &l, lru) __free_page(pg); -} +} #ifdef CONFIG_DEBUG_PAGEALLOC void kernel_map_pages(struct page *page, int numpages, int enable) diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index 7eb36c53f4b7..465b54312c59 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -3359,12 +3359,11 @@ EXPORT_SYMBOL(end_that_request_chunk); */ static void blk_done_softirq(struct softirq_action *h) { - struct list_head *cpu_list; - LIST_HEAD(local_list); + struct list_head *cpu_list, local_list; local_irq_disable(); cpu_list = &__get_cpu_var(blk_cpu_done); - list_splice_init(cpu_list, &local_list); + list_replace_init(cpu_list, &local_list); local_irq_enable(); while (!list_empty(&local_list)) { diff --git a/fs/aio.c b/fs/aio.c index e41e932ba489..8c34a62df7d7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -777,11 +777,11 @@ out: static int __aio_run_iocbs(struct kioctx *ctx) { struct kiocb *iocb; - LIST_HEAD(run_list); + struct list_head run_list; assert_spin_locked(&ctx->ctx_lock); - list_splice_init(&ctx->run_list, &run_list); + list_replace_init(&ctx->run_list, &run_list); while (!list_empty(&run_list)) { iocb = list_entry(run_list.next, struct kiocb, ki_run_list); diff --git a/kernel/timer.c b/kernel/timer.c index 9e49deed468c..3bf0e9ed2dbe 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -419,10 +419,10 @@ static inline void __run_timers(tvec_base_t *base) spin_lock_irq(&base->lock); while (time_after_eq(jiffies, base->timer_jiffies)) { - struct list_head work_list = LIST_HEAD_INIT(work_list); + struct list_head work_list; struct list_head *head = &work_list; int index = base->timer_jiffies & TVR_MASK; - + /* * Cascade timers: */ @@ -431,8 +431,8 @@ static inline void __run_timers(tvec_base_t *base) (!cascade(base, &base->tv3, INDEX(1))) && !cascade(base, &base->tv4, INDEX(2))) cascade(base, &base->tv5, INDEX(3)); - ++base->timer_jiffies; - list_splice_init(base->tv1.vec + index, &work_list); + ++base->timer_jiffies; + list_replace_init(base->tv1.vec + index, &work_list); while (!list_empty(head)) { void (*fn)(unsigned long); unsigned long data; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 880fb415a8f6..740c5abceb07 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -531,11 +531,11 @@ int current_is_keventd(void) static void take_over_work(struct workqueue_struct *wq, unsigned int cpu) { struct cpu_workqueue_struct *cwq = per_cpu_ptr(wq->cpu_wq, cpu); - LIST_HEAD(list); + struct list_head list; struct work_struct *work; spin_lock_irq(&cwq->lock); - list_splice_init(&cwq->worklist, &list); + list_replace_init(&cwq->worklist, &list); while (!list_empty(&list)) { printk("Taking work for %s\n", wq->name); diff --git a/net/core/dev.c b/net/core/dev.c index ab39fe17cb58..195a5e96b2d1 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2980,7 +2980,7 @@ static void netdev_wait_allrefs(struct net_device *dev) static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { - struct list_head list = LIST_HEAD_INIT(list); + struct list_head list; /* Need to guard against multiple cpu's getting out of order. */ mutex_lock(&net_todo_run_mutex); @@ -2995,9 +2995,9 @@ void netdev_run_todo(void) /* Snapshot list, allow later requests */ spin_lock(&net_todo_list_lock); - list_splice_init(&net_todo_list, &list); + list_replace_init(&net_todo_list, &list); spin_unlock(&net_todo_list_lock); - + while (!list_empty(&list)) { struct net_device *dev = list_entry(list.next, struct net_device, todo_list); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 646937cc2d84..0f37266411b5 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -91,11 +91,10 @@ static void rfc2863_policy(struct net_device *dev) /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { - LIST_HEAD(head); - struct list_head *n, *next; + struct list_head head, *n, *next; spin_lock_irq(&lweventlist_lock); - list_splice_init(&lweventlist, &head); + list_replace_init(&lweventlist, &head); spin_unlock_irq(&lweventlist_lock); list_for_each_safe(n, next, &head) { -- cgit v1.2.3