summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/compat.c18
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c31
-rw-r--r--net/dccp/minisocks.c19
-rw-r--r--net/ipv4/fou.c4
-rw-r--r--net/ipv4/geneve.c8
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/inet_hashtables.c4
-rw-r--r--net/ipv4/inet_timewait_sock.c270
-rw-r--r--net/ipv4/proc.c2
-rw-r--r--net/ipv4/raw.c1
-rw-r--r--net/ipv4/tcp.c8
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_minisocks.c35
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_vti.c3
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/netfilter/nfnetlink_log.c6
-rw-r--r--net/netfilter/nfnetlink_queue_core.c9
-rw-r--r--net/netfilter/xt_TPROXY.c4
-rw-r--r--net/nfc/netlink.c2
-rw-r--r--net/rds/connection.c3
-rw-r--r--net/rds/rds.h1
-rw-r--r--net/rds/send.c33
-rw-r--r--net/rxrpc/ar-input.c23
-rw-r--r--net/rxrpc/ar-internal.h2
-rw-r--r--net/rxrpc/ar-local.c98
-rw-r--r--net/rxrpc/ar-output.c53
-rw-r--r--net/sched/sch_ingress.c9
-rw-r--r--net/sched/sch_netem.c3
-rw-r--r--net/socket.c100
-rw-r--r--net/sunrpc/svcsock.c2
-rw-r--r--net/xfrm/xfrm_input.c10
35 files changed, 351 insertions, 438 deletions
diff --git a/net/compat.c b/net/compat.c
index c4b6b0f43d5d..5cfd26a0006f 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -31,10 +31,10 @@
#include <asm/uaccess.h>
#include <net/compat.h>
-ssize_t get_compat_msghdr(struct msghdr *kmsg,
- struct compat_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+int get_compat_msghdr(struct msghdr *kmsg,
+ struct compat_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
{
compat_uptr_t uaddr, uiov, tmp3;
compat_size_t nr_segs;
@@ -81,13 +81,9 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
kmsg->msg_iocb = NULL;
- err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
- compat_ptr(uiov), nr_segs,
- UIO_FASTIOV, *iov, iov);
- if (err >= 0)
- iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
- *iov, nr_segs, err);
- return err;
+ return compat_import_iovec(save_addr ? READ : WRITE,
+ compat_ptr(uiov), nr_segs,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
}
/* Bleech... */
diff --git a/net/core/datagram.c b/net/core/datagram.c
index df493d68330c..b80fb91bb3f7 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -673,7 +673,7 @@ int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
if (!chunk)
return 0;
- if (iov_iter_count(&msg->msg_iter) < chunk) {
+ if (msg_data_left(msg) < chunk) {
if (__skb_checksum_complete(skb))
goto csum_error;
if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
diff --git a/net/core/dev.c b/net/core/dev.c
index b2775f06c710..af4a1b0adc10 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1630,6 +1630,22 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
}
EXPORT_SYMBOL(call_netdevice_notifiers);
+#ifdef CONFIG_NET_CLS_ACT
+static struct static_key ingress_needed __read_mostly;
+
+void net_inc_ingress_queue(void)
+{
+ static_key_slow_inc(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_inc_ingress_queue);
+
+void net_dec_ingress_queue(void)
+{
+ static_key_slow_dec(&ingress_needed);
+}
+EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
+#endif
+
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
/* We are not allowed to call static_key_slow_dec() from irq context
@@ -3547,7 +3563,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
- goto out;
+ return skb;
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
@@ -3561,8 +3577,6 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
return NULL;
}
-out:
- skb->tc_verd = 0;
return skb;
}
#endif
@@ -3698,12 +3712,15 @@ another_round:
skip_taps:
#ifdef CONFIG_NET_CLS_ACT
- skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
- if (!skb)
- goto unlock;
+ if (static_key_false(&ingress_needed)) {
+ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
+ if (!skb)
+ goto unlock;
+ }
+
+ skb->tc_verd = 0;
ncls:
#endif
-
if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
goto drop;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 332f7d6d9942..5f566663e47f 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -27,28 +27,16 @@
struct inet_timewait_death_row dccp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = DCCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(dccp_death_row.death_lock),
.hashinfo = &dccp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&dccp_death_row),
- .twkill_work = __WORK_INITIALIZER(dccp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&dccp_death_row),
};
EXPORT_SYMBOL_GPL(dccp_death_row);
void dccp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
+ struct inet_timewait_sock *tw;
- if (dccp_death_row.tw_count < dccp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &dccp_death_row, state);
if (tw != NULL) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -71,8 +59,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
if (state == DCCP_TIME_WAIT)
timeo = DCCP_TIMEWAIT_LEN;
- inet_twsk_schedule(tw, &dccp_death_row, timeo,
- DCCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 263710259774..af150b43b214 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -886,12 +886,12 @@ EXPORT_SYMBOL(gue_build_header);
#ifdef CONFIG_NET_FOU_IP_TUNNELS
-static const struct ip_tunnel_encap_ops __read_mostly fou_iptun_ops = {
+static const struct ip_tunnel_encap_ops fou_iptun_ops = {
.encap_hlen = fou_encap_hlen,
.build_header = fou_build_header,
};
-static const struct ip_tunnel_encap_ops __read_mostly gue_iptun_ops = {
+static const struct ip_tunnel_encap_ops gue_iptun_ops = {
.encap_hlen = gue_encap_hlen,
.build_header = gue_build_header,
};
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index b77f5e84c623..8986e63f3bda 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -113,10 +113,6 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
int min_headroom;
int err;
- skb = udp_tunnel_handle_offloads(skb, csum);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
@@ -131,6 +127,10 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
if (unlikely(!skb))
return -ENOMEM;
+ skb = udp_tunnel_handle_offloads(skb, csum);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 76322c9867d5..70e8b3c308ec 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -248,7 +248,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
struct inet_timewait_sock *tw = inet_twsk(sk);
struct inet_diag_msg *r;
struct nlmsghdr *nlh;
- s32 tmo;
+ long tmo;
nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
nlmsg_flags);
@@ -258,7 +258,7 @@ static int inet_twsk_diag_fill(struct sock *sk,
r = nlmsg_data(nlh);
BUG_ON(tw->tw_state != TCP_TIME_WAIT);
- tmo = tw->tw_ttd - inet_tw_time_stamp();
+ tmo = tw->tw_timer.expires - jiffies;
if (tmo < 0)
tmo = 0;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d4630bf2d9aa..c6fb80bd5826 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -388,7 +388,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
@@ -565,7 +565,7 @@ ok:
spin_unlock(&head->lock);
if (tw) {
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
while (twrefcnt) {
twrefcnt--;
inet_twsk_put(tw);
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 118f0f195820..00ec8d5d7e7e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -67,9 +67,9 @@ int inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
}
/* Must be called with locally disabled BHs. */
-static void __inet_twsk_kill(struct inet_timewait_sock *tw,
- struct inet_hashinfo *hashinfo)
+static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
+ struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
struct inet_bind_hashbucket *bhead;
int refcnt;
/* Unlink from established hashes. */
@@ -89,6 +89,8 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
BUG_ON(refcnt >= atomic_read(&tw->tw_refcnt));
atomic_sub(refcnt, &tw->tw_refcnt);
+ atomic_dec(&tw->tw_dr->tw_count);
+ inet_twsk_put(tw);
}
void inet_twsk_free(struct inet_timewait_sock *tw)
@@ -168,16 +170,34 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
}
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
-struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
+void tw_timer_handler(unsigned long data)
{
- struct inet_timewait_sock *tw =
- kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
- GFP_ATOMIC);
+ struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data;
+
+ if (tw->tw_kill)
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
+ else
+ NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
+ inet_twsk_kill(tw);
+}
+
+struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk,
+ struct inet_timewait_death_row *dr,
+ const int state)
+{
+ struct inet_timewait_sock *tw;
+
+ if (atomic_read(&dr->tw_count) >= dr->sysctl_max_tw_buckets)
+ return NULL;
+
+ tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+ GFP_ATOMIC);
if (tw) {
const struct inet_sock *inet = inet_sk(sk);
kmemcheck_annotate_bitfield(tw, flags);
+ tw->tw_dr = dr;
/* Give us an identity. */
tw->tw_daddr = inet->inet_daddr;
tw->tw_rcv_saddr = inet->inet_rcv_saddr;
@@ -196,13 +216,14 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
tw->tw_prot = sk->sk_prot_creator;
atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie));
twsk_net_set(tw, sock_net(sk));
+ setup_timer(&tw->tw_timer, tw_timer_handler, (unsigned long)tw);
/*
* Because we use RCU lookups, we should not set tw_refcnt
* to a non null value before everything is setup for this
* timewait socket.
*/
atomic_set(&tw->tw_refcnt, 0);
- inet_twsk_dead_node_init(tw);
+
__module_get(tw->tw_prot->owner);
}
@@ -210,139 +231,20 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat
}
EXPORT_SYMBOL_GPL(inet_twsk_alloc);
-/* Returns non-zero if quota exceeded. */
-static int inet_twdr_do_twkill_work(struct inet_timewait_death_row *twdr,
- const int slot)
-{
- struct inet_timewait_sock *tw;
- unsigned int killed;
- int ret;
-
- /* NOTE: compare this to previous version where lock
- * was released after detaching chain. It was racy,
- * because tw buckets are scheduled in not serialized context
- * in 2.3 (with netfilter), and with softnet it is common, because
- * soft irqs are not sequenced.
- */
- killed = 0;
- ret = 0;
-rescan:
- inet_twsk_for_each_inmate(tw, &twdr->cells[slot]) {
- __inet_twsk_del_dead_node(tw);
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITED);
-#endif
- inet_twsk_put(tw);
- killed++;
- spin_lock(&twdr->death_lock);
- if (killed > INET_TWDR_TWKILL_QUOTA) {
- ret = 1;
- break;
- }
-
- /* While we dropped twdr->death_lock, another cpu may have
- * killed off the next TW bucket in the list, therefore
- * do a fresh re-read of the hlist head node with the
- * lock reacquired. We still use the hlist traversal
- * macro in order to get the prefetches.
- */
- goto rescan;
- }
-
- twdr->tw_count -= killed;
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITED, killed);
-#endif
- return ret;
-}
-
-void inet_twdr_hangman(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- unsigned int need_timer;
-
- twdr = (struct inet_timewait_death_row *)data;
- spin_lock(&twdr->death_lock);
-
- if (twdr->tw_count == 0)
- goto out;
-
- need_timer = 0;
- if (inet_twdr_do_twkill_work(twdr, twdr->slot)) {
- twdr->thread_slots |= (1 << twdr->slot);
- schedule_work(&twdr->twkill_work);
- need_timer = 1;
- } else {
- /* We purged the entire slot, anything left? */
- if (twdr->tw_count)
- need_timer = 1;
- twdr->slot = ((twdr->slot + 1) & (INET_TWDR_TWKILL_SLOTS - 1));
- }
- if (need_timer)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
-out:
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_hangman);
-
-void inet_twdr_twkill_work(struct work_struct *work)
-{
- struct inet_timewait_death_row *twdr =
- container_of(work, struct inet_timewait_death_row, twkill_work);
- int i;
-
- BUILD_BUG_ON((INET_TWDR_TWKILL_SLOTS - 1) >
- (sizeof(twdr->thread_slots) * 8));
-
- while (twdr->thread_slots) {
- spin_lock_bh(&twdr->death_lock);
- for (i = 0; i < INET_TWDR_TWKILL_SLOTS; i++) {
- if (!(twdr->thread_slots & (1 << i)))
- continue;
-
- while (inet_twdr_do_twkill_work(twdr, i) != 0) {
- if (need_resched()) {
- spin_unlock_bh(&twdr->death_lock);
- schedule();
- spin_lock_bh(&twdr->death_lock);
- }
- }
-
- twdr->thread_slots &= ~(1 << i);
- }
- spin_unlock_bh(&twdr->death_lock);
- }
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twkill_work);
-
/* These are always called from BH context. See callers in
* tcp_input.c to verify this.
*/
/* This is for handling early-kills of TIME_WAIT sockets. */
-void inet_twsk_deschedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr)
+void inet_twsk_deschedule(struct inet_timewait_sock *tw)
{
- spin_lock(&twdr->death_lock);
- if (inet_twsk_del_dead_node(tw)) {
- inet_twsk_put(tw);
- if (--twdr->tw_count == 0)
- del_timer(&twdr->tw_timer);
- }
- spin_unlock(&twdr->death_lock);
- __inet_twsk_kill(tw, twdr->hashinfo);
+ if (del_timer_sync(&tw->tw_timer))
+ inet_twsk_kill(tw);
}
EXPORT_SYMBOL(inet_twsk_deschedule);
-void inet_twsk_schedule(struct inet_timewait_sock *tw,
- struct inet_timewait_death_row *twdr,
- const int timeo, const int timewait_len)
+void inet_twsk_schedule(struct inet_timewait_sock *tw, const int timeo)
{
- struct hlist_head *list;
- int slot;
-
/* timeout := RTO * 3.5
*
* 3.5 = 1+2+0.5 to wait for two retransmits.
@@ -367,115 +269,15 @@ void inet_twsk_schedule(struct inet_timewait_sock *tw,
* is greater than TS tick!) and detect old duplicates with help
* of PAWS.
*/
- slot = (timeo + (1 << INET_TWDR_RECYCLE_TICK) - 1) >> INET_TWDR_RECYCLE_TICK;
- spin_lock(&twdr->death_lock);
-
- /* Unlink it, if it was scheduled */
- if (inet_twsk_del_dead_node(tw))
- twdr->tw_count--;
- else
+ tw->tw_kill = timeo <= 4*HZ;
+ if (!mod_timer_pinned(&tw->tw_timer, jiffies + timeo)) {
atomic_inc(&tw->tw_refcnt);
-
- if (slot >= INET_TWDR_RECYCLE_SLOTS) {
- /* Schedule to slow timer */
- if (timeo >= timewait_len) {
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- } else {
- slot = DIV_ROUND_UP(timeo, twdr->period);
- if (slot >= INET_TWDR_TWKILL_SLOTS)
- slot = INET_TWDR_TWKILL_SLOTS - 1;
- }
- tw->tw_ttd = inet_tw_time_stamp() + timeo;
- slot = (twdr->slot + slot) & (INET_TWDR_TWKILL_SLOTS - 1);
- list = &twdr->cells[slot];
- } else {
- tw->tw_ttd = inet_tw_time_stamp() + (slot << INET_TWDR_RECYCLE_TICK);
-
- if (twdr->twcal_hand < 0) {
- twdr->twcal_hand = 0;
- twdr->twcal_jiffie = jiffies;
- twdr->twcal_timer.expires = twdr->twcal_jiffie +
- (slot << INET_TWDR_RECYCLE_TICK);
- add_timer(&twdr->twcal_timer);
- } else {
- if (time_after(twdr->twcal_timer.expires,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK)))
- mod_timer(&twdr->twcal_timer,
- jiffies + (slot << INET_TWDR_RECYCLE_TICK));
- slot = (twdr->twcal_hand + slot) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- list = &twdr->twcal_row[slot];
+ atomic_inc(&tw->tw_dr->tw_count);
}
-
- hlist_add_head(&tw->tw_death_node, list);
-
- if (twdr->tw_count++ == 0)
- mod_timer(&twdr->tw_timer, jiffies + twdr->period);
- spin_unlock(&twdr->death_lock);
}
EXPORT_SYMBOL_GPL(inet_twsk_schedule);
-void inet_twdr_twcal_tick(unsigned long data)
-{
- struct inet_timewait_death_row *twdr;
- int n, slot;
- unsigned long j;
- unsigned long now = jiffies;
- int killed = 0;
- int adv = 0;
-
- twdr = (struct inet_timewait_death_row *)data;
-
- spin_lock(&twdr->death_lock);
- if (twdr->twcal_hand < 0)
- goto out;
-
- slot = twdr->twcal_hand;
- j = twdr->twcal_jiffie;
-
- for (n = 0; n < INET_TWDR_RECYCLE_SLOTS; n++) {
- if (time_before_eq(j, now)) {
- struct hlist_node *safe;
- struct inet_timewait_sock *tw;
-
- inet_twsk_for_each_inmate_safe(tw, safe,
- &twdr->twcal_row[slot]) {
- __inet_twsk_del_dead_node(tw);
- __inet_twsk_kill(tw, twdr->hashinfo);
-#ifdef CONFIG_NET_NS
- NET_INC_STATS_BH(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED);
-#endif
- inet_twsk_put(tw);
- killed++;
- }
- } else {
- if (!adv) {
- adv = 1;
- twdr->twcal_jiffie = j;
- twdr->twcal_hand = slot;
- }
-
- if (!hlist_empty(&twdr->twcal_row[slot])) {
- mod_timer(&twdr->twcal_timer, j);
- goto out;
- }
- }
- j += 1 << INET_TWDR_RECYCLE_TICK;
- slot = (slot + 1) & (INET_TWDR_RECYCLE_SLOTS - 1);
- }
- twdr->twcal_hand = -1;
-
-out:
- if ((twdr->tw_count -= killed) == 0)
- del_timer(&twdr->tw_timer);
-#ifndef CONFIG_NET_NS
- NET_ADD_STATS_BH(&init_net, LINUX_MIB_TIMEWAITKILLED, killed);
-#endif
- spin_unlock(&twdr->death_lock);
-}
-EXPORT_SYMBOL_GPL(inet_twdr_twcal_tick);
-
void inet_twsk_purge(struct inet_hashinfo *hashinfo,
struct inet_timewait_death_row *twdr, int family)
{
@@ -509,7 +311,7 @@ restart:
rcu_read_unlock();
local_bh_disable();
- inet_twsk_deschedule(tw, twdr);
+ inet_twsk_deschedule(tw);
local_bh_enable();
inet_twsk_put(tw);
goto restart_rcu;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index d8953ef0770c..e1f3b911dd1e 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -63,7 +63,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
socket_seq_show(seq);
seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n",
sock_prot_inuse_get(net, &tcp_prot), orphans,
- tcp_death_row.tw_count, sockets,
+ atomic_read(&tcp_death_row.tw_count), sockets,
proto_memory_allocated(&tcp_prot));
seq_printf(seq, "UDP: inuse %d mem %ld\n",
sock_prot_inuse_get(net, &udp_prot),
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index c0bb648fb2f9..561cd4b8fc6e 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -46,7 +46,6 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/errno.h>
-#include <linux/aio.h>
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/spinlock.h>
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 094a6822c71d..18e3a12eb1b2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1119,7 +1119,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
sg = !!(sk->sk_route_caps & NETIF_F_SG);
- while (iov_iter_count(&msg->msg_iter)) {
+ while (msg_data_left(msg)) {
int copy = 0;
int max = size_goal;
@@ -1163,8 +1163,8 @@ new_segment:
}
/* Try to append data to the end of skb. */
- if (copy > iov_iter_count(&msg->msg_iter))
- copy = iov_iter_count(&msg->msg_iter);
+ if (copy > msg_data_left(msg))
+ copy = msg_data_left(msg);
/* Where to copy to? */
if (skb_availroom(skb) > 0) {
@@ -1221,7 +1221,7 @@ new_segment:
tcp_skb_pcount_set(skb, 0);
copied += copy;
- if (!iov_iter_count(&msg->msg_iter)) {
+ if (!msg_data_left(msg)) {
tcp_tx_timestamp(sk, skb);
goto out;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 031cf72cd05c..a7ef679dd3ea 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3099,17 +3099,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out -= acked_pcount;
flag |= FLAG_RETRANS_DATA_ACKED;
- } else {
+ } else if (!(sacked & TCPCB_SACKED_ACKED)) {
last_ackt = skb->skb_mstamp;
WARN_ON_ONCE(last_ackt.v64 == 0);
if (!first_ackt.v64)
first_ackt = last_ackt;
- if (!(sacked & TCPCB_SACKED_ACKED)) {
- reord = min(pkts_acked, reord);
- if (!after(scb->end_seq, tp->high_seq))
- flag |= FLAG_ORIG_SACK_ACKED;
- }
+ reord = min(pkts_acked, reord);
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
}
if (sacked & TCPCB_SACKED_ACKED)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 37578d52897e..3571f2be4470 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1685,7 +1685,7 @@ do_time_wait:
iph->daddr, th->dest,
inet_iif(skb));
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
goto process;
@@ -2242,9 +2242,9 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i)
static void get_timewait4_sock(const struct inet_timewait_sock *tw,
struct seq_file *f, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
__be32 dest, src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = tw->tw_daddr;
src = tw->tw_rcv_saddr;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 2088fdcca141..63d6311b5365 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -34,18 +34,7 @@ int sysctl_tcp_abort_on_overflow __read_mostly;
struct inet_timewait_death_row tcp_death_row = {
.sysctl_max_tw_buckets = NR_FILE * 2,
- .period = TCP_TIMEWAIT_LEN / INET_TWDR_TWKILL_SLOTS,
- .death_lock = __SPIN_LOCK_UNLOCKED(tcp_death_row.death_lock),
.hashinfo = &tcp_hashinfo,
- .tw_timer = TIMER_INITIALIZER(inet_twdr_hangman, 0,
- (unsigned long)&tcp_death_row),
- .twkill_work = __WORK_INITIALIZER(tcp_death_row.twkill_work,
- inet_twdr_twkill_work),
-/* Short-time timewait calendar */
-
- .twcal_hand = -1,
- .twcal_timer = TIMER_INITIALIZER(inet_twdr_twcal_tick, 0,
- (unsigned long)&tcp_death_row),
};
EXPORT_SYMBOL_GPL(tcp_death_row);
@@ -158,7 +147,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
if (!th->fin ||
TCP_SKB_CB(skb)->end_seq != tcptw->tw_rcv_nxt + 1) {
kill_with_rst:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_RST;
}
@@ -174,11 +163,9 @@ kill_with_rst:
if (tcp_death_row.sysctl_tw_recycle &&
tcptw->tw_ts_recent_stamp &&
tcp_tw_remember_stamp(tw))
- inet_twsk_schedule(tw, &tcp_death_row, tw->tw_timeout,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, tw->tw_timeout);
else
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return TCP_TW_ACK;
}
@@ -211,13 +198,12 @@ kill_with_rst:
*/
if (sysctl_tcp_rfc1337 == 0) {
kill:
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
return TCP_TW_SUCCESS;
}
}
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
if (tmp_opt.saw_tstamp) {
tcptw->tw_ts_recent = tmp_opt.rcv_tsval;
@@ -267,8 +253,7 @@ kill:
* Do not reschedule in the last case.
*/
if (paws_reject || th->ack)
- inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, TCP_TIMEWAIT_LEN);
return tcp_timewait_check_oow_rate_limit(
tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
@@ -283,16 +268,15 @@ EXPORT_SYMBOL(tcp_timewait_state_process);
*/
void tcp_time_wait(struct sock *sk, int state, int timeo)
{
- struct inet_timewait_sock *tw = NULL;
const struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_timewait_sock *tw;
bool recycle_ok = false;
if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
recycle_ok = tcp_remember_stamp(sk);
- if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
- tw = inet_twsk_alloc(sk, state);
+ tw = inet_twsk_alloc(sk, &tcp_death_row, state);
if (tw) {
struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
@@ -355,8 +339,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
timeo = TCP_TIMEWAIT_LEN;
}
- inet_twsk_schedule(tw, &tcp_death_row, timeo,
- TCP_TIMEWAIT_LEN);
+ inet_twsk_schedule(tw, timeo);
inet_twsk_put(tw);
} else {
/* Sorry, if we're out of memory, just CLOSE this
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e662d85d1635..8c8d7e06b72f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2994,6 +2994,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
rcu_read_unlock();
#endif
+ /* Do not fool tcpdump (if any), clean our debris */
+ skb->tstamp.tv64 = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 033f17816ef4..871641bc1ed4 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -246,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
*twp = tw;
} else if (tw) {
/* Silly. Should hash-dance instead... */
- inet_twsk_deschedule(tw, death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
}
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index b53148444e15..ed9d681207fa 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
static void vti6_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
if (dev == ip6n->fb_tnl_dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f73a97f6e68e..ad51df85aa00 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1486,7 +1486,7 @@ do_time_wait:
ntohs(th->dest), tcp_v6_iif(skb));
if (sk2) {
struct inet_timewait_sock *tw = inet_twsk(sk);
- inet_twsk_deschedule(tw, &tcp_death_row);
+ inet_twsk_deschedule(tw);
inet_twsk_put(tw);
sk = sk2;
tcp_v6_restore_cb(skb);
@@ -1728,9 +1728,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
static void get_timewait6_sock(struct seq_file *seq,
struct inet_timewait_sock *tw, int i)
{
+ long delta = tw->tw_timer.expires - jiffies;
const struct in6_addr *dest, *src;
__u16 destp, srcp;
- s32 delta = tw->tw_ttd - inet_tw_time_stamp();
dest = &tw->tw_v6_daddr;
src = &tw->tw_v6_rcv_saddr;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 51afea4b0af7..3ad91266c821 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -63,7 +63,7 @@ struct nfulnl_instance {
struct timer_list timer;
struct net *net;
struct user_namespace *peer_user_ns; /* User namespace of the peer process */
- int peer_portid; /* PORTID of the peer process */
+ u32 peer_portid; /* PORTID of the peer process */
/* configurable parameters */
unsigned int flushtimeout; /* timeout until queue flush */
@@ -152,7 +152,7 @@ static void nfulnl_timer(unsigned long data);
static struct nfulnl_instance *
instance_create(struct net *net, u_int16_t group_num,
- int portid, struct user_namespace *user_ns)
+ u32 portid, struct user_namespace *user_ns)
{
struct nfulnl_instance *inst;
struct nfnl_log_net *log = nfnl_log_pernet(net);
@@ -1007,7 +1007,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfulnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %6u %2u\n",
inst->group_num,
inst->peer_portid, inst->qlen,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 628afc350c02..0b98c7420239 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -55,7 +55,7 @@ struct nfqnl_instance {
struct hlist_node hlist; /* global list of queues */
struct rcu_head rcu;
- int peer_portid;
+ u32 peer_portid;
unsigned int queue_maxlen;
unsigned int copy_range;
unsigned int queue_dropped;
@@ -110,8 +110,7 @@ instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
}
static struct nfqnl_instance *
-instance_create(struct nfnl_queue_net *q, u_int16_t queue_num,
- int portid)
+instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
{
struct nfqnl_instance *inst;
unsigned int h;
@@ -870,7 +869,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
};
static struct nfqnl_instance *
-verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, int nlportid)
+verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
{
struct nfqnl_instance *queue;
@@ -1252,7 +1251,7 @@ static int seq_show(struct seq_file *s, void *v)
{
const struct nfqnl_instance *inst = v;
- seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n",
+ seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
inst->queue_num,
inst->peer_portid, inst->queue_total,
inst->copy_mode, inst->copy_range,
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index c205b26a2bee..cca96cec1b68 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -272,7 +272,7 @@ tproxy_handle_time_wait4(struct sk_buff *skb, __be32 laddr, __be16 lport,
hp->source, lport ? lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
@@ -437,7 +437,7 @@ tproxy_handle_time_wait6(struct sk_buff *skb, int tproto, int thoff,
tgi->lport ? tgi->lport : hp->dest,
skb->dev, NFT_LOOKUP_LISTENER);
if (sk2) {
- inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row);
+ inet_twsk_deschedule(inet_twsk(sk));
inet_twsk_put(inet_twsk(sk));
sk = sk2;
}
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 14a2d11581da..3763036710ae 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1584,7 +1584,7 @@ static const struct genl_ops nfc_genl_ops[] = {
struct urelease_work {
struct work_struct w;
- int portid;
+ u32 portid;
};
static void nfc_urelease_event_work(struct work_struct *work)
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 378c3a6acf84..14f041398ca1 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -130,7 +130,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
rcu_read_lock();
conn = rds_conn_lookup(head, laddr, faddr, trans);
if (conn && conn->c_loopback && conn->c_trans != &rds_loop_transport &&
- !is_outgoing) {
+ laddr == faddr && !is_outgoing) {
/* This is a looped back IB connection, and we're
* called by the code handling the incoming connect.
* We need a second connection object into which we
@@ -193,6 +193,7 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
}
atomic_set(&conn->c_state, RDS_CONN_DOWN);
+ conn->c_send_gen = 0;
conn->c_reconnect_jiffies = 0;
INIT_DELAYED_WORK(&conn->c_send_w, rds_send_worker);
INIT_DELAYED_WORK(&conn->c_recv_w, rds_recv_worker);
diff --git a/net/rds/rds.h b/net/rds/rds.h
index c3f2855c3d84..0d41155a2258 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -110,6 +110,7 @@ struct rds_connection {
void *c_transport_data;
atomic_t c_state;
+ unsigned long c_send_gen;
unsigned long c_flags;
unsigned long c_reconnect_jiffies;
struct delayed_work c_send_w;
diff --git a/net/rds/send.c b/net/rds/send.c
index 44672befc0ee..e9430f537f9c 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -140,8 +140,11 @@ int rds_send_xmit(struct rds_connection *conn)
struct scatterlist *sg;
int ret = 0;
LIST_HEAD(to_be_dropped);
+ int batch_count;
+ unsigned long send_gen = 0;
restart:
+ batch_count = 0;
/*
* sendmsg calls here after having queued its message on the send
@@ -157,6 +160,17 @@ restart:
}
/*
+ * we record the send generation after doing the xmit acquire.
+ * if someone else manages to jump in and do some work, we'll use
+ * this to avoid a goto restart farther down.
+ *
+ * The acquire_in_xmit() check above ensures that only one
+ * caller can increment c_send_gen at any time.
+ */
+ conn->c_send_gen++;
+ send_gen = conn->c_send_gen;
+
+ /*
* rds_conn_shutdown() sets the conn state and then tests RDS_IN_XMIT,
* we do the opposite to avoid races.
*/
@@ -202,6 +216,16 @@ restart:
if (!rm) {
unsigned int len;
+ batch_count++;
+
+ /* we want to process as big a batch as we can, but
+ * we also want to avoid softlockups. If we've been
+ * through a lot of messages, lets back off and see
+ * if anyone else jumps in
+ */
+ if (batch_count >= 1024)
+ goto over_batch;
+
spin_lock_irqsave(&conn->c_lock, flags);
if (!list_empty(&conn->c_send_queue)) {
@@ -357,9 +381,9 @@ restart:
}
}
+over_batch:
if (conn->c_trans->xmit_complete)
conn->c_trans->xmit_complete(conn);
-
release_in_xmit(conn);
/* Nuke any messages we decided not to retransmit. */
@@ -380,10 +404,15 @@ restart:
* If the transport cannot continue (i.e ret != 0), then it must
* call us when more room is available, such as from the tx
* completion handler.
+ *
+ * We have an extra generation check here so that if someone manages
+ * to jump in after our release_in_xmit, we'll see that they have done
+ * some work and we will skip our goto
*/
if (ret == 0) {
smp_mb();
- if (!list_empty(&conn->c_send_queue)) {
+ if (!list_empty(&conn->c_send_queue) &&
+ send_gen == conn->c_send_gen) {
rds_stats_inc(s_send_lock_queue_raced);
goto restart;
}
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 481f89f93789..4505a691d88c 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -28,7 +28,7 @@
const char *rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
- "?09", "?10", "?11", "?12", "?13", "?14", "?15"
+ "?09", "?10", "?11", "?12", "VERSION", "?14", "?15"
};
/*
@@ -593,6 +593,20 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
rxrpc_queue_conn(conn);
}
+/*
+ * post endpoint-level events to the local endpoint
+ * - this includes debug and version messages
+ */
+static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
+ struct sk_buff *skb)
+{
+ _enter("%p,%p", local, skb);
+
+ atomic_inc(&local->usage);
+ skb_queue_tail(&local->event_queue, skb);
+ rxrpc_queue_work(&local->event_processor);
+}
+
static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
struct sk_buff *skb,
struct rxrpc_skb_priv *sp)
@@ -699,6 +713,11 @@ void rxrpc_data_ready(struct sock *sk)
goto bad_message;
}
+ if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+ rxrpc_post_packet_to_local(local, skb);
+ goto out;
+ }
+
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
@@ -731,6 +750,8 @@ void rxrpc_data_ready(struct sock *sk)
else
goto cant_route_call;
}
+
+out:
rxrpc_put_local(local);
return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 2fc1e659e5c9..aef1bd294e17 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -152,11 +152,13 @@ struct rxrpc_local {
struct work_struct destroyer; /* endpoint destroyer */
struct work_struct acceptor; /* incoming call processor */
struct work_struct rejecter; /* packet reject writer */
+ struct work_struct event_processor; /* endpoint event processor */
struct list_head services; /* services listening on this endpoint */
struct list_head link; /* link in endpoint list */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
+ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
atomic_t usage;
diff --git a/net/rxrpc/ar-local.c b/net/rxrpc/ar-local.c
index 87f7135d238b..ca904ed5400a 100644
--- a/net/rxrpc/ar-local.c
+++ b/net/rxrpc/ar-local.c
@@ -13,16 +13,22 @@
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+#include <linux/udp.h>
+#include <linux/ip.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#include <generated/utsrelease.h>
#include "ar-internal.h"
+static const char rxrpc_version_string[65] = "linux-" UTS_RELEASE " AF_RXRPC";
+
static LIST_HEAD(rxrpc_locals);
DEFINE_RWLOCK(rxrpc_local_lock);
static DECLARE_RWSEM(rxrpc_local_sem);
static DECLARE_WAIT_QUEUE_HEAD(rxrpc_local_wq);
static void rxrpc_destroy_local(struct work_struct *work);
+static void rxrpc_process_local_events(struct work_struct *work);
/*
* allocate a new local
@@ -37,11 +43,13 @@ struct rxrpc_local *rxrpc_alloc_local(struct sockaddr_rxrpc *srx)
INIT_WORK(&local->destroyer, &rxrpc_destroy_local);
INIT_WORK(&local->acceptor, &rxrpc_accept_incoming_calls);
INIT_WORK(&local->rejecter, &rxrpc_reject_packets);
+ INIT_WORK(&local->event_processor, &rxrpc_process_local_events);
INIT_LIST_HEAD(&local->services);
INIT_LIST_HEAD(&local->link);
init_rwsem(&local->defrag_sem);
skb_queue_head_init(&local->accept_queue);
skb_queue_head_init(&local->reject_queue);
+ skb_queue_head_init(&local->event_queue);
spin_lock_init(&local->lock);
rwlock_init(&local->services_lock);
atomic_set(&local->usage, 1);
@@ -264,10 +272,12 @@ static void rxrpc_destroy_local(struct work_struct *work)
ASSERT(list_empty(&local->services));
ASSERT(!work_pending(&local->acceptor));
ASSERT(!work_pending(&local->rejecter));
+ ASSERT(!work_pending(&local->event_processor));
/* finish cleaning up the local descriptor */
rxrpc_purge_queue(&local->accept_queue);
rxrpc_purge_queue(&local->reject_queue);
+ rxrpc_purge_queue(&local->event_queue);
kernel_sock_shutdown(local->socket, SHUT_RDWR);
sock_release(local->socket);
@@ -308,3 +318,91 @@ void __exit rxrpc_destroy_all_locals(void)
_leave("");
}
+
+/*
+ * Reply to a version request
+ */
+static void rxrpc_send_version_request(struct rxrpc_local *local,
+ struct rxrpc_header *hdr,
+ struct sk_buff *skb)
+{
+ struct sockaddr_in sin;
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t len;
+ int ret;
+
+ _enter("");
+
+ sin.sin_family = AF_INET;
+ sin.sin_port = udp_hdr(skb)->source;
+ sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+
+ msg.msg_name = &sin;
+ msg.msg_namelen = sizeof(sin);
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ hdr->seq = 0;
+ hdr->serial = 0;
+ hdr->type = RXRPC_PACKET_TYPE_VERSION;
+ hdr->flags = RXRPC_LAST_PACKET | (~hdr->flags & RXRPC_CLIENT_INITIATED);
+ hdr->userStatus = 0;
+ hdr->_rsvd = 0;
+
+ iov[0].iov_base = hdr;
+ iov[0].iov_len = sizeof(*hdr);
+ iov[1].iov_base = (char *)rxrpc_version_string;
+ iov[1].iov_len = sizeof(rxrpc_version_string);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ _proto("Tx VERSION (reply)");
+
+ ret = kernel_sendmsg(local->socket, &msg, iov, 2, len);
+ if (ret < 0)
+ _debug("sendmsg failed: %d", ret);
+
+ _leave("");
+}
+
+/*
+ * Process event packets targetted at a local endpoint.
+ */
+static void rxrpc_process_local_events(struct work_struct *work)
+{
+ struct rxrpc_local *local = container_of(work, struct rxrpc_local, event_processor);
+ struct sk_buff *skb;
+ char v;
+
+ _enter("");
+
+ atomic_inc(&local->usage);
+
+ while ((skb = skb_dequeue(&local->event_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ kdebug("{%d},{%u}", local->debug_id, sp->hdr.type);
+
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
+ if (skb_copy_bits(skb, 0, &v, 1) < 0)
+ return;
+ _proto("Rx VERSION { %02x }", v);
+ if (v == 0)
+ rxrpc_send_version_request(local, &sp->hdr, skb);
+ break;
+
+ default:
+ /* Just ignore anything we don't understand */
+ break;
+ }
+
+ rxrpc_put_local(local);
+ rxrpc_free_skb(skb);
+ }
+
+ rxrpc_put_local(local);
+ _leave("");
+}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 09f584566e23..c0042807bfc6 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -542,11 +542,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
call->tx_pending = NULL;
copied = 0;
- if (len > iov_iter_count(&msg->msg_iter))
- len = iov_iter_count(&msg->msg_iter);
- while (len) {
- int copy;
-
+ do {
if (!skb) {
size_t size, chunk, max, space;
@@ -568,8 +564,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
max &= ~(call->conn->size_align - 1UL);
chunk = max;
- if (chunk > len && !more)
- chunk = len;
+ if (chunk > msg_data_left(msg) && !more)
+ chunk = msg_data_left(msg);
space = chunk + call->conn->size_align;
space &= ~(call->conn->size_align - 1UL);
@@ -612,23 +608,23 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
sp = rxrpc_skb(skb);
/* append next segment of data to the current buffer */
- copy = skb_tailroom(skb);
- ASSERTCMP(copy, >, 0);
- if (copy > len)
- copy = len;
- if (copy > sp->remain)
- copy = sp->remain;
-
- _debug("add");
- ret = skb_add_data(skb, &msg->msg_iter, copy);
- _debug("added");
- if (ret < 0)
- goto efault;
- sp->remain -= copy;
- skb->mark += copy;
- copied += copy;
-
- len -= copy;
+ if (msg_data_left(msg) > 0) {
+ int copy = skb_tailroom(skb);
+ ASSERTCMP(copy, >, 0);
+ if (copy > msg_data_left(msg))
+ copy = msg_data_left(msg);
+ if (copy > sp->remain)
+ copy = sp->remain;
+
+ _debug("add");
+ ret = skb_add_data(skb, &msg->msg_iter, copy);
+ _debug("added");
+ if (ret < 0)
+ goto efault;
+ sp->remain -= copy;
+ skb->mark += copy;
+ copied += copy;
+ }
/* check for the far side aborting the call or a network error
* occurring */
@@ -636,7 +632,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
goto call_aborted;
/* add the packet to the send queue if it's now full */
- if (sp->remain <= 0 || (!len && !more)) {
+ if (sp->remain <= 0 ||
+ (msg_data_left(msg) == 0 && !more)) {
struct rxrpc_connection *conn = call->conn;
uint32_t seq;
size_t pad;
@@ -666,7 +663,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
sp->hdr.serviceId = conn->service_id;
sp->hdr.flags = conn->out_clientflag;
- if (len == 0 && !more)
+ if (msg_data_left(msg) == 0 && !more)
sp->hdr.flags |= RXRPC_LAST_PACKET;
else if (CIRC_SPACE(call->acks_head, call->acks_tail,
call->acks_winsz) > 1)
@@ -682,10 +679,10 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
memcpy(skb->head, &sp->hdr,
sizeof(struct rxrpc_header));
- rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more);
+ rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
skb = NULL;
}
- }
+ } while (msg_data_left(msg) > 0);
success:
ret = copied;
diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c
index eb5b8445fef9..4cdbfb85686a 100644
--- a/net/sched/sch_ingress.c
+++ b/net/sched/sch_ingress.c
@@ -88,11 +88,19 @@ static int ingress_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* ------------------------------------------------------------- */
+static int ingress_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ net_inc_ingress_queue();
+
+ return 0;
+}
+
static void ingress_destroy(struct Qdisc *sch)
{
struct ingress_qdisc_data *p = qdisc_priv(sch);
tcf_destroy_chain(&p->filter_list);
+ net_dec_ingress_queue();
}
static int ingress_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -124,6 +132,7 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = {
.id = "ingress",
.priv_size = sizeof(struct ingress_qdisc_data),
.enqueue = ingress_enqueue,
+ .init = ingress_init,
.destroy = ingress_destroy,
.dump = ingress_dump,
.owner = THIS_MODULE,
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 179f1c8c0d8b..956ead2cab9a 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -560,8 +560,8 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
tfifo_dequeue:
skb = __skb_dequeue(&sch->q);
if (skb) {
-deliver:
qdisc_qstats_backlog_dec(sch, skb);
+deliver:
qdisc_unthrottled(sch);
qdisc_bstats_update(sch, skb);
return skb;
@@ -578,6 +578,7 @@ deliver:
rb_erase(p, &q->t_root);
sch->q.qlen--;
+ qdisc_qstats_backlog_dec(sch, skb);
skb->next = NULL;
skb->prev = NULL;
skb->tstamp = netem_skb_cb(skb)->tstamp_save;
diff --git a/net/socket.c b/net/socket.c
index 073809f4125f..5b0126234606 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -610,35 +610,27 @@ void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
}
EXPORT_SYMBOL(__sock_tx_timestamp);
-static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg,
- size_t size)
+static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
- return sock->ops->sendmsg(sock, msg, size);
+ int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
+ BUG_ON(ret == -EIOCBQUEUED);
+ return ret;
}
-int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
+int sock_sendmsg(struct socket *sock, struct msghdr *msg)
{
- int err = security_socket_sendmsg(sock, msg, size);
+ int err = security_socket_sendmsg(sock, msg,
+ msg_data_left(msg));
- return err ?: sock_sendmsg_nosec(sock, msg, size);
+ return err ?: sock_sendmsg_nosec(sock, msg);
}
EXPORT_SYMBOL(sock_sendmsg);
int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t size)
{
- mm_segment_t oldfs = get_fs();
- int result;
-
- set_fs(KERNEL_DS);
- /*
- * the following is safe, since for compiler definitions of kvec and
- * iovec are identical, yielding the same in-core layout and alignment
- */
- iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
- result = sock_sendmsg(sock, msg, size);
- set_fs(oldfs);
- return result;
+ iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size);
+ return sock_sendmsg(sock, msg);
}
EXPORT_SYMBOL(kernel_sendmsg);
@@ -755,12 +747,8 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
mm_segment_t oldfs = get_fs();
int result;
+ iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, vec, num, size);
set_fs(KERNEL_DS);
- /*
- * the following is safe, since for compiler definitions of kvec and
- * iovec are identical, yielding the same in-core layout and alignment
- */
- iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
result = sock_recvmsg(sock, msg, size, flags);
set_fs(oldfs);
return result;
@@ -808,10 +796,10 @@ static ssize_t sock_read_iter(struct kiocb *iocb, struct iov_iter *to)
if (iocb->ki_pos != 0)
return -ESPIPE;
- if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */
+ if (!iov_iter_count(to)) /* Match SYS5 behaviour */
return 0;
- res = sock_recvmsg(sock, &msg, iocb->ki_nbytes, msg.msg_flags);
+ res = sock_recvmsg(sock, &msg, iov_iter_count(to), msg.msg_flags);
*to = msg.msg_iter;
return res;
}
@@ -833,7 +821,7 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (sock->type == SOCK_SEQPACKET)
msg.msg_flags |= MSG_EOR;
- res = sock_sendmsg(sock, &msg, iocb->ki_nbytes);
+ res = sock_sendmsg(sock, &msg);
*from = msg.msg_iter;
return res;
}
@@ -1650,18 +1638,14 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
struct iovec iov;
int fput_needed;
- if (len > INT_MAX)
- len = INT_MAX;
- if (unlikely(!access_ok(VERIFY_READ, buff, len)))
- return -EFAULT;
+ err = import_single_range(WRITE, buff, len, &iov, &msg.msg_iter);
+ if (unlikely(err))
+ return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
- iov.iov_base = buff;
- iov.iov_len = len;
msg.msg_name = NULL;
- iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_namelen = 0;
@@ -1675,7 +1659,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
msg.msg_flags = flags;
- err = sock_sendmsg(sock, &msg, len);
+ err = sock_sendmsg(sock, &msg);
out_put:
fput_light(sock->file, fput_needed);
@@ -1710,26 +1694,22 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
int err, err2;
int fput_needed;
- if (size > INT_MAX)
- size = INT_MAX;
- if (unlikely(!access_ok(VERIFY_WRITE, ubuf, size)))
- return -EFAULT;
+ err = import_single_range(READ, ubuf, size, &iov, &msg.msg_iter);
+ if (unlikely(err))
+ return err;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (!sock)
goto out;
msg.msg_control = NULL;
msg.msg_controllen = 0;
- iov.iov_len = size;
- iov.iov_base = ubuf;
- iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
/* Save some cycles and don't copy the address if not needed */
msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
/* We assume all kernel code knows the size of sockaddr_storage */
msg.msg_namelen = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
- err = sock_recvmsg(sock, &msg, size, flags);
+ err = sock_recvmsg(sock, &msg, iov_iter_count(&msg.msg_iter), flags);
if (err >= 0 && addr != NULL) {
err2 = move_addr_to_user(&address,
@@ -1849,10 +1829,10 @@ struct used_address {
unsigned int name_len;
};
-static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
- struct user_msghdr __user *umsg,
- struct sockaddr __user **save_addr,
- struct iovec **iov)
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec **iov)
{
struct sockaddr __user *uaddr;
struct iovec __user *uiov;
@@ -1898,13 +1878,8 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
kmsg->msg_iocb = NULL;
- err = rw_copy_check_uvector(save_addr ? READ : WRITE,
- uiov, nr_segs,
- UIO_FASTIOV, *iov, iov);
- if (err >= 0)
- iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
- *iov, nr_segs, err);
- return err;
+ return import_iovec(save_addr ? READ : WRITE, uiov, nr_segs,
+ UIO_FASTIOV, iov, &kmsg->msg_iter);
}
static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
@@ -1919,7 +1894,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
__attribute__ ((aligned(sizeof(__kernel_size_t))));
/* 20 is size of ipv6_pktinfo */
unsigned char *ctl_buf = ctl;
- int ctl_len, total_len;
+ int ctl_len;
ssize_t err;
msg_sys->msg_name = &address;
@@ -1929,8 +1904,7 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
else
err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
if (err < 0)
- goto out_freeiov;
- total_len = err;
+ return err;
err = -ENOBUFS;
@@ -1977,10 +1951,10 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
used_address->name_len == msg_sys->msg_namelen &&
!memcmp(&used_address->name, msg_sys->msg_name,
used_address->name_len)) {
- err = sock_sendmsg_nosec(sock, msg_sys, total_len);
+ err = sock_sendmsg_nosec(sock, msg_sys);
goto out_freectl;
}
- err = sock_sendmsg(sock, msg_sys, total_len);
+ err = sock_sendmsg(sock, msg_sys);
/*
* If this is sendmmsg() and sending to current destination address was
* successful, remember it.
@@ -1996,8 +1970,7 @@ out_freectl:
if (ctl_buf != ctl)
sock_kfree_s(sock->sk, ctl_buf, ctl_len);
out_freeiov:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
return err;
}
@@ -2122,8 +2095,8 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
else
err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
if (err < 0)
- goto out_freeiov;
- total_len = err;
+ return err;
+ total_len = iov_iter_count(&msg_sys->msg_iter);
cmsg_ptr = (unsigned long)msg_sys->msg_control;
msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
@@ -2161,8 +2134,7 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
err = len;
out_freeiov:
- if (iov != iovstack)
- kfree(iov);
+ kfree(iov);
return err;
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index cc331b6cf573..0c8120229a03 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -257,7 +257,7 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
svc_set_cmsg_data(rqstp, cmh);
- if (sock_sendmsg(sock, &msg, 0) < 0)
+ if (sock_sendmsg(sock, &msg) < 0)
goto out;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 85d1d4764612..526c4feb3b50 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -238,11 +238,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
skb->sp->xvec[skb->sp->len++] = x;
- if (xfrm_tunnel_check(skb, x, family)) {
- XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
- goto drop;
- }
-
spin_lock(&x->lock);
if (unlikely(x->km.state == XFRM_STATE_ACQ)) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMACQUIREERROR);
@@ -271,6 +266,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
spin_unlock(&x->lock);
+ if (xfrm_tunnel_check(skb, x, family)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMODEERROR);
+ goto drop;
+ }
+
seq_hi = htonl(xfrm_replay_seqhi(x, seq));
XFRM_SKB_CB(skb)->seq.input.low = seq;