From d6c416148545bd99d3cc05e672460168245cc156 Mon Sep 17 00:00:00 2001 From: Chang Xiangzhong Date: Thu, 21 Nov 2013 22:56:28 +0100 Subject: net: sctp: find the correct highest_new_tsn in sack Function sctp_check_transmitted(transport t, ...) would iterate all of transport->transmitted queue and looking for the highest __newly__ acked tsn. The original algorithm would depend on the order of the assoc->transport_list (in function sctp_outq_sack line 1215 - 1226). The result might not be the expected due to the order of the tranport_list. Solution: checking if the exising is smaller than the new one before assigning Signed-off-by: Chang Xiangzhong Acked-by: Vlad Yasevich Acked-by: Neil Horman Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 94df75877869..abb6db008df1 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -1391,7 +1391,8 @@ static void sctp_check_transmitted(struct sctp_outq *q, */ if (!tchunk->tsn_gap_acked) { tchunk->tsn_gap_acked = 1; - *highest_new_tsn_in_sack = tsn; + if (TSN_lt(*highest_new_tsn_in_sack, tsn)) + *highest_new_tsn_in_sack = tsn; bytes_acked += sctp_data_size(tchunk); if (!tchunk->transport) migrate_bytes += sctp_data_size(tchunk); -- cgit v1.2.3 From cc5c00bbb44c5d68b883aa5cb9d01514a2525d94 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:31:29 +0800 Subject: gro: Only verify TCP checksums for candidates In some cases we may receive IP packets that are longer than their stated lengths. Such packets are never merged in GRO. However, we may end up computing their checksums incorrectly and end up allowing packets with a bogus checksum enter our stack with the checksum status set as verified. Since such packets are rare and not performance-critical, this patch simply skips the checksum verification for them. Reported-by: Alexander Duyck Signed-off-by: Herbert Xu Acked-by: Alexander Duyck Thanks, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 5 +++++ net/ipv6/tcpv6_offload.c | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a2b68a108eae..55aeec930140 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -276,6 +276,10 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, @@ -301,6 +305,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1097c798900..71923d14127a 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -39,6 +39,10 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, __wsum wsum; __sum16 sum; + /* Don't bother verifying checksum if we're going to flush anyway. */ + if (NAPI_GRO_CB(skb)->flush) + goto skip_csum; + switch (skb->ip_summed) { case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, @@ -65,6 +69,7 @@ flush: break; } +skip_csum: return tcp_gro_receive(head, skb); } -- cgit v1.2.3 From b8ee93ba80b5a0b6c3c06b65c34dd1276f16c047 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 22 Nov 2013 10:32:11 +0800 Subject: gro: Clean up tcpX_gro_receive checksum verification This patch simplifies the checksum verification in tcpX_gro_receive by reusing the CHECKSUM_COMPLETE code for CHECKSUM_NONE. All it does for CHECKSUM_NONE is compute the partial checksum and then treat it as if it came from the hardware (CHECKSUM_COMPLETE). Signed-off-by: Herbert Xu Cheers, Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_offload.c | 26 ++++++++++---------------- net/ipv6/tcpv6_offload.c | 27 ++++++++++----------------- 2 files changed, 20 insertions(+), 33 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 55aeec930140..05606353c7e7 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -274,35 +274,29 @@ static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff * { const struct iphdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + 0); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v4_check(skb_gro_len(skb), iph->saddr, iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb_gro_len(skb), IPPROTO_TCP, 0); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 71923d14127a..6d18157dc32c 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,36 +37,29 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, { const struct ipv6hdr *iph = skb_gro_network_header(skb); __wsum wsum; - __sum16 sum; /* Don't bother verifying checksum if we're going to flush anyway. */ if (NAPI_GRO_CB(skb)->flush) goto skip_csum; + wsum = skb->csum; + switch (skb->ip_summed) { + case CHECKSUM_NONE: + wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), + wsum); + + /* fall through */ + case CHECKSUM_COMPLETE: if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, - skb->csum)) { + wsum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; break; } -flush: + NAPI_GRO_CB(skb)->flush = 1; return NULL; - - case CHECKSUM_NONE: - wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, - skb_gro_len(skb), - IPPROTO_TCP, 0)); - sum = csum_fold(skb_checksum(skb, - skb_gro_offset(skb), - skb_gro_len(skb), - wsum)); - if (sum) - goto flush; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; } skip_csum: -- cgit v1.2.3 From fb10f802b0fb76079612cb78505cbc9ad81e683b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 22 Nov 2013 14:48:40 +0800 Subject: tcp_memcg: remove useless var old_lim nobody needs it. remove. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/tcp_memcontrol.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c index 03e9154f7e68..269a89ecd2f4 100644 --- a/net/ipv4/tcp_memcontrol.c +++ b/net/ipv4/tcp_memcontrol.c @@ -60,7 +60,6 @@ EXPORT_SYMBOL(tcp_destroy_cgroup); static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) { struct cg_proto *cg_proto; - u64 old_lim; int i; int ret; @@ -71,7 +70,6 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val) if (val > RES_COUNTER_MAX) val = RES_COUNTER_MAX; - old_lim = res_counter_read_u64(&cg_proto->memory_allocated, RES_LIMIT); ret = res_counter_set_limit(&cg_proto->memory_allocated, val); if (ret) return ret; -- cgit v1.2.3 From ca15a078bd907df5fc1c009477869c5cbde3b753 Mon Sep 17 00:00:00 2001 From: Oussama Ghorbel Date: Fri, 22 Nov 2013 16:23:20 +0100 Subject: sit: generate icmpv6 error when receiving icmpv4 error Send icmpv6 error with type "destination unreachable" and code "address unreachable" when receiving icmpv4 error and sufficient data bytes are available This patch enhances the compliance of sit tunnel with section 3.4 of rfc 4213 Signed-off-by: Oussama Ghorbel Signed-off-by: David S. Miller --- net/ipv6/sit.c | 44 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 1b4a4a953675..8435267836a7 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -478,14 +478,44 @@ static void ipip6_tunnel_uninit(struct net_device *dev) dev_put(dev); } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ + const struct iphdr *iph = (const struct iphdr *) skb->data; + struct rt6_info *rt; + struct sk_buff *skb2; + + if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) + return 1; + + skb2 = skb_clone(skb, GFP_ATOMIC); + + if (!skb2) + return 1; + + skb_dst_drop(skb2); + skb_pull(skb2, iph->ihl * 4); + skb_reset_network_header(skb2); + + rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + + if (rt && rt->dst.dev) + skb2->dev = rt->dst.dev; + + icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + + if (rt) + ip6_rt_put(rt); + + kfree_skb(skb2); + + return 0; +} static int ipip6_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ const struct iphdr *iph = (const struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; @@ -500,7 +530,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) case ICMP_DEST_UNREACH: switch (code) { case ICMP_SR_FAILED: - case ICMP_PORT_UNREACH: /* Impossible event. */ return 0; default: @@ -545,6 +574,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) goto out; err = 0; + if (!ipip6_err_gen_icmpv6_unreach(skb)) + goto out; + if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; -- cgit v1.2.3 From 85fbaa75037d0b6b786ff18658ddf0b4014ce2a4 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 00:46:12 +0100 Subject: inet: fix addr_len/msg->msg_namelen assignment in recv_error and rxpmtu functions Commit bceaa90240b6019ed73b49965eac7d167610be69 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") conditionally updated addr_len if the msg_name is written to. The recv_error and rxpmtu functions relied on the recvmsg functions to set up addr_len before. As this does not happen any more we have to pass addr_len to those functions as well and set it to the size of the corresponding sockaddr length. This broke traceroute and such. Fixes: bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") Reported-by: Brad Spengler Reported-by: Tom Labanowski Cc: mpb Cc: David S. Miller Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- include/net/ipv6.h | 6 ++++-- include/net/ping.h | 3 ++- net/ipv4/ip_sockglue.c | 3 ++- net/ipv4/ping.c | 5 +++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- net/ipv6/datagram.c | 7 +++++-- net/ipv6/ping.c | 3 ++- net/ipv6/raw.c | 4 ++-- net/ipv6/udp.c | 4 ++-- net/l2tp/l2tp_ip6.c | 2 +- 12 files changed, 26 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/ip.h b/include/net/ip.h index 217bc5bfc6c6..5a25f36fe3a7 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -473,7 +473,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct sock *)); -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 2a5f668cd683..eb198acaac1d 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -776,8 +776,10 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len); -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len); +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload); void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); diff --git a/include/net/ping.h b/include/net/ping.h index 3f67704f3747..90f48417b03d 100644 --- a/include/net/ping.h +++ b/include/net/ping.h @@ -31,7 +31,8 @@ /* Compatibility glue so we can support IPv6 when it's compiled as a module */ struct pingv6_ops { - int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len); + int (*ipv6_recv_error)(struct sock *sk, struct msghdr *msg, int len, + int *addr_len); int (*ip6_datagram_recv_ctl)(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); int (*icmpv6_err_convert)(u8 type, u8 code, int *err); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 3f858266fa7e..ddf32a6bc415 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -386,7 +386,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -423,6 +423,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 876c6ca2d8f9..840cf1b9e6ee 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -841,10 +841,11 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_ERRQUEUE) { if (family == AF_INET) { - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); #if IS_ENABLED(CONFIG_IPV6) } else if (family == AF_INET6) { - return pingv6_ops.ipv6_recv_error(sk, msg, len); + return pingv6_ops.ipv6_recv_error(sk, msg, len, + addr_len); #endif } } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 5cb8ddb505ee..23c3e5b5bb53 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -697,7 +697,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5944d7d668dd..44dfaa09b584 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1236,7 +1236,7 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, bool slow; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index a454b0ff57c7..d690204a0275 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -423,7 +424,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +459,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 8815e31a87fe..a83243c3d656 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -57,7 +57,8 @@ static struct inet_protosw pingv6_protosw = { /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e24ff1df0401..7fb4e14c467f 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -466,10 +466,10 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 81eb8cf8389b..bcd5699313c3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -393,10 +393,10 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, bool slow; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index cfd65304be60..d9b437e55007 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) -- cgit v1.2.3 From 1fa4c710b6fe7b0aac9907240291b6fe6aafc3b8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 23 Nov 2013 07:22:33 +0100 Subject: ipv6: fix leaking uninitialized port number of offender sockaddr Offenders don't have port numbers, so set it to 0. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/datagram.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index d690204a0275..8dfe1f4d3c1a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -378,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) -- cgit v1.2.3 From 4d0820cf6a55d72350cb2d24a4504f62fbde95d9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Nov 2013 12:59:20 -0800 Subject: sch_tbf: handle too small burst If a too small burst is inadvertently set on TBF, we might trigger a bug in tbf_segment(), as 'skb' instead of 'segs' was used in a qdisc_reshape_fail() call. tc qdisc add dev eth0 root handle 1: tbf latency 50ms burst 1KB rate 50mbit Fix the bug, and add a warning, as such configuration is not going to work anyway for non GSO packets. (For some reason, one has to use a burst >= 1520 to get a working configuration, even with old kernels. This is a probable iproute2/tc bug) Based on a report and initial patch from Yang Yingliang Fixes: e43ac79a4bc6 ("sch_tbf: segment too big GSO packets") Signed-off-by: Eric Dumazet Reported-by: Yang Yingliang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 68f98595819c..a6090051c5db 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -21,6 +21,7 @@ #include #include #include +#include /* Simple Token Bucket Filter. @@ -117,6 +118,22 @@ struct tbf_sched_data { }; +/* + * Return length of individual segments of a gso packet, + * including all headers (MAC, IP, TCP/UDP) + */ +static unsigned int skb_gso_seglen(const struct sk_buff *skb) +{ + unsigned int hdr_len = skb_transport_header(skb) - skb_mac_header(skb); + const struct skb_shared_info *shinfo = skb_shinfo(skb); + + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) + hdr_len += tcp_hdrlen(skb); + else + hdr_len += sizeof(struct udphdr); + return hdr_len + shinfo->gso_size; +} + /* GSO packet is too big, segment it so that tbf can transmit * each segment in time */ @@ -136,12 +153,8 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch) while (segs) { nskb = segs->next; segs->next = NULL; - if (likely(segs->len <= q->max_size)) { - qdisc_skb_cb(segs)->pkt_len = segs->len; - ret = qdisc_enqueue(segs, q->qdisc); - } else { - ret = qdisc_reshape_fail(skb, sch); - } + qdisc_skb_cb(segs)->pkt_len = segs->len; + ret = qdisc_enqueue(segs, q->qdisc); if (ret != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(ret)) sch->qstats.drops++; @@ -163,7 +176,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) int ret; if (qdisc_pkt_len(skb) > q->max_size) { - if (skb_is_gso(skb)) + if (skb_is_gso(skb) && skb_gso_seglen(skb) <= q->max_size) return tbf_segment(skb, sch); return qdisc_reshape_fail(skb, sch); } @@ -319,6 +332,11 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt) if (max_size < 0) goto done; + if (max_size < psched_mtu(qdisc_dev(sch))) + pr_warn_ratelimited("sch_tbf: burst %u is lower than device %s mtu (%u) !\n", + max_size, qdisc_dev(sch)->name, + psched_mtu(qdisc_dev(sch))); + if (q->qdisc != &noop_qdisc) { err = fifo_set_limit(q->qdisc, qopt->limit); if (err) -- cgit v1.2.3 From 0f0e2159c0c101426b705d70f43b9f423d51c869 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Sat, 23 Nov 2013 13:01:50 +0100 Subject: genetlink: Fix uninitialized variable in genl_validate_assign_mc_groups() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/netlink/genetlink.c: In function ‘genl_validate_assign_mc_groups’: net/netlink/genetlink.c:217: warning: ‘err’ may be used uninitialized in this function Commit 2a94fe48f32ccf7321450a2cc07f2b724a444e5b ("genetlink: make multicast groups const, prevent abuse") split genl_register_mc_group() in multiple functions, but dropped the initialization of err. Initialize err to zero to fix this. Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- net/netlink/genetlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 4518a57aa5fe..803206e82450 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -214,7 +214,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) { int first_id; int n_groups = family->n_mcgrps; - int err, i; + int err = 0, i; bool groups_allocated = false; if (!n_groups) -- cgit v1.2.3 From 5e53e689b737526308db2b5c9f56e9d0371a1676 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sun, 24 Nov 2013 21:09:26 +0100 Subject: genetlink/pmcraid: use proper genetlink multicast API The pmcraid driver is abusing the genetlink API and is using its family ID as the multicast group ID, which is invalid and may belong to somebody else (and likely will.) Make it use the correct API, but since this may already be used as-is by userspace, reserve a family ID for this code and also reserve that group ID to not break userspace assumptions. My previous patch broke event delivery in the driver as I missed that it wasn't using the right API and forgot to update it later in my series. While changing this, I noticed that the genetlink code could use the static group ID instead of a strcmp(), so also do that for the VFS_DQUOT family. Cc: Anil Ravindranath Cc: "James E.J. Bottomley" Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/scsi/pmcraid.c | 20 +++++++++++++++----- include/uapi/linux/genetlink.h | 1 + net/netlink/genetlink.c | 11 +++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c index bd6f743d87a7..892ea6161376 100644 --- a/drivers/scsi/pmcraid.c +++ b/drivers/scsi/pmcraid.c @@ -1404,11 +1404,22 @@ enum { }; #define PMCRAID_AEN_CMD_MAX (__PMCRAID_AEN_CMD_MAX - 1) +static struct genl_multicast_group pmcraid_mcgrps[] = { + { .name = "events", /* not really used - see ID discussion below */ }, +}; + static struct genl_family pmcraid_event_family = { - .id = GENL_ID_GENERATE, + /* + * Due to prior multicast group abuse (the code having assumed that + * the family ID can be used as a multicast group ID) we need to + * statically allocate a family (and thus group) ID. + */ + .id = GENL_ID_PMCRAID, .name = "pmcraid", .version = 1, - .maxattr = PMCRAID_AEN_ATTR_MAX + .maxattr = PMCRAID_AEN_ATTR_MAX, + .mcgrps = pmcraid_mcgrps, + .n_mcgrps = ARRAY_SIZE(pmcraid_mcgrps), }; /** @@ -1511,9 +1522,8 @@ static int pmcraid_notify_aen( return result; } - result = - genlmsg_multicast(&pmcraid_event_family, skb, 0, - pmcraid_event_family.id, GFP_ATOMIC); + result = genlmsg_multicast(&pmcraid_event_family, skb, + 0, 0, GFP_ATOMIC); /* If there are no listeners, genlmsg_multicast may return non-zero * value. diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h index 1af72d8228e0..c3363ba1ae05 100644 --- a/include/uapi/linux/genetlink.h +++ b/include/uapi/linux/genetlink.h @@ -28,6 +28,7 @@ struct genlmsghdr { #define GENL_ID_GENERATE 0 #define GENL_ID_CTRL NLMSG_MIN_TYPE #define GENL_ID_VFS_DQUOT (NLMSG_MIN_TYPE + 1) +#define GENL_ID_PMCRAID (NLMSG_MIN_TYPE + 2) /************************************************************************** * Controller diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 803206e82450..713671ae45af 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -74,9 +74,12 @@ static struct list_head family_ht[GENL_FAM_TAB_SIZE]; * Bit 17 is marked as already used since the VFS quota code * also abused this API and relied on family == group ID, we * cater to that by giving it a static family and group ID. + * Bit 18 is marked as already used since the PMCRAID driver + * did the same thing as the VFS quota code (maybe copied?) */ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | - BIT(GENL_ID_VFS_DQUOT); + BIT(GENL_ID_VFS_DQUOT) | + BIT(GENL_ID_PMCRAID); static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; @@ -139,6 +142,7 @@ static u16 genl_generate_id(void) for (i = 0; i <= GENL_MAX_ID - GENL_MIN_ID; i++) { if (id_gen_idx != GENL_ID_VFS_DQUOT && + id_gen_idx != GENL_ID_PMCRAID && !genl_family_find_byid(id_gen_idx)) return id_gen_idx; if (++id_gen_idx > GENL_MAX_ID) @@ -236,9 +240,12 @@ static int genl_validate_assign_mc_groups(struct genl_family *family) } else if (strcmp(family->name, "NET_DM") == 0) { first_id = 1; BUG_ON(n_groups != 1); - } else if (strcmp(family->name, "VFS_DQUOT") == 0) { + } else if (family->id == GENL_ID_VFS_DQUOT) { first_id = GENL_ID_VFS_DQUOT; BUG_ON(n_groups != 1); + } else if (family->id == GENL_ID_PMCRAID) { + first_id = GENL_ID_PMCRAID; + BUG_ON(n_groups != 1); } else { groups_allocated = true; err = genl_allocate_reserve_groups(n_groups, &first_id); -- cgit v1.2.3 From 6eabca54d6781f61c7318517c1463a098acb7a87 Mon Sep 17 00:00:00 2001 From: Xufeng Zhang Date: Mon, 25 Nov 2013 11:26:57 +0800 Subject: sctp: Restore 'resent' bit to avoid retransmitted chunks for RTT measurements Currently retransmitted DATA chunks could also be used for RTT measurements since there are no flag to identify whether the transmitted DATA chunk is a new one or a retransmitted one. This problem is introduced by commit ae19c5486 ("sctp: remove 'resent' bit from the chunk") which inappropriately removed the 'resent' bit completely, instead of doing this, we should set the resent bit only for the retransmitted DATA chunks. Signed-off-by: Xufeng Zhang Acked-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/output.c | 3 ++- net/sctp/outqueue.c | 3 +++ 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 2174d8da0770..ea0ca5f6e629 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -629,6 +629,7 @@ struct sctp_chunk { #define SCTP_NEED_FRTX 0x1 #define SCTP_DONT_FRTX 0x2 __u16 rtt_in_progress:1, /* This chunk used for RTT calc? */ + resent:1, /* Has this chunk ever been resent. */ has_tsn:1, /* Does this chunk have a TSN yet? */ has_ssn:1, /* Does this chunk have a SSN yet? */ singleton:1, /* Only chunk in the packet? */ diff --git a/net/sctp/output.c b/net/sctp/output.c index e650978daf27..0e2644d0a773 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -474,10 +474,11 @@ int sctp_packet_transmit(struct sctp_packet *packet) * for a given destination transport address. */ - if (!tp->rto_pending) { + if (!chunk->resent && !tp->rto_pending) { chunk->rtt_in_progress = 1; tp->rto_pending = 1; } + has_data = 1; } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index abb6db008df1..f51ba985a36e 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -446,6 +446,8 @@ void sctp_retransmit_mark(struct sctp_outq *q, transport->rto_pending = 0; } + chunk->resent = 1; + /* Move the chunk to the retransmit queue. The chunks * on the retransmit queue are always kept in order. */ @@ -1375,6 +1377,7 @@ static void sctp_check_transmitted(struct sctp_outq *q, * instance). */ if (!tchunk->tsn_gap_acked && + !tchunk->resent && tchunk->rtt_in_progress) { tchunk->rtt_in_progress = 0; rtt = jiffies - tchunk->sent_at; -- cgit v1.2.3 From 66028310aedc782e3a9a221f1a9ec12b7e587e50 Mon Sep 17 00:00:00 2001 From: Gao feng Date: Mon, 25 Nov 2013 17:21:24 +0800 Subject: sit: use kfree_skb to replace dev_kfree_skb In failure case, we should use kfree_skb not dev_kfree_skb to free skbuff, dev_kfree_skb is defined as consume_skb. Trace takes advantage of this point. Signed-off-by: Gao feng Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 8435267836a7..366fbba3359a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -951,7 +951,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (!new_skb) { ip_rt_put(rt); dev->stats.tx_dropped++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } if (skb->sk) @@ -977,7 +977,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tx_error_icmp: dst_link_failure(skb); tx_error: - dev_kfree_skb(skb); + kfree_skb(skb); out: dev->stats.tx_errors++; return NETDEV_TX_OK; @@ -1017,7 +1017,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, tx_err: dev->stats.tx_errors++; - dev_kfree_skb(skb); + kfree_skb(skb); return NETDEV_TX_OK; } -- cgit v1.2.3 From 39af0c409e8c06b51caf7dc43e49ef96ae790a55 Mon Sep 17 00:00:00 2001 From: Baker Zhang Date: Tue, 26 Nov 2013 09:29:15 +0800 Subject: net: remove outdated comment for ipv4 and ipv6 protocol handler since f9242b6b28d61295f2bf7e8adfb1060b382e5381 inet: Sanitize inet{,6} protocol demux. there are not pretended hash tables for ipv4 or ipv6 protocol handler. Signed-off-by: Baker Zhang Signed-off-by: David S. Miller --- net/ipv4/protocol.c | 8 -------- net/ipv6/protocol.c | 4 ---- 2 files changed, 12 deletions(-) (limited to 'net') diff --git a/net/ipv4/protocol.c b/net/ipv4/protocol.c index ce848461acbb..46d6a1c923a8 100644 --- a/net/ipv4/protocol.c +++ b/net/ipv4/protocol.c @@ -31,10 +31,6 @@ const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS] __read_mostly; const struct net_offload __rcu *inet_offloads[MAX_INET_PROTOS] __read_mostly; -/* - * Add a protocol handler to the hash tables - */ - int inet_add_protocol(const struct net_protocol *prot, unsigned char protocol) { if (!prot->netns_ok) { @@ -55,10 +51,6 @@ int inet_add_offload(const struct net_offload *prot, unsigned char protocol) } EXPORT_SYMBOL(inet_add_offload); -/* - * Remove a protocol from the hash tables. - */ - int inet_del_protocol(const struct net_protocol *prot, unsigned char protocol) { int ret; diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670da..e048cf1bb6a2 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol } EXPORT_SYMBOL(inet6_add_protocol); -/* - * Remove a protocol from the hash tables. - */ - int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol) { int ret; -- cgit v1.2.3 From ec6f809ff6f19fafba3212f6aff0dda71dfac8e8 Mon Sep 17 00:00:00 2001 From: Veaceslav Falico Date: Fri, 29 Nov 2013 09:53:23 +0100 Subject: af_packet: block BH in prb_shutdown_retire_blk_timer() Currently we're using plain spin_lock() in prb_shutdown_retire_blk_timer(), however the timer might fire right in the middle and thus try to re-aquire the same spinlock, leaving us in a endless loop. To fix that, use the spin_lock_bh() to block it. Fixes: f6fb8f100b80 ("af-packet: TPACKET_V3 flexible buffer implementation.") CC: "David S. Miller" CC: Daniel Borkmann CC: Willem de Bruijn CC: Phil Sutter CC: Eric Dumazet Reported-by: Jan Stancek Tested-by: Jan Stancek Signed-off-by: Veaceslav Falico Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/packet/af_packet.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ac27c86ef6d1..ba2548bd85bf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -439,9 +439,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } -- cgit v1.2.3 From db31c55a6fb245fdbb752a2ca4aefec89afabb06 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 27 Nov 2013 15:40:21 +0300 Subject: net: clamp ->msg_namelen instead of returning an error If kmsg->msg_namelen > sizeof(struct sockaddr_storage) then in the original code that would lead to memory corruption in the kernel if you had audit configured. If you didn't have audit configured it was harmless. There are some programs such as beta versions of Ruby which use too large of a buffer and returning an error code breaks them. We should clamp the ->msg_namelen value instead. Fixes: 1661bf364ae9 ("net: heap overflow in __audit_sockaddr()") Reported-by: Eric Wong Signed-off-by: Dan Carpenter Tested-by: Eric Wong Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/compat.c | 2 +- net/socket.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 618c6a8a911b..dd32e34c1e2c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -72,7 +72,7 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); diff --git a/net/socket.c b/net/socket.c index 0b18693f2be6..e83c416708af 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1973,7 +1973,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) return -EFAULT; if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) - return -EINVAL; + kmsg->msg_namelen = sizeof(struct sockaddr_storage); return 0; } -- cgit v1.2.3 From d3f7d56a7a4671d395e8af87071068a195257bf6 Mon Sep 17 00:00:00 2001 From: Shawn Landden Date: Sun, 24 Nov 2013 22:36:28 -0800 Subject: net: update consumers of MSG_MORE to recognize MSG_SENDPAGE_NOTLAST Commit 35f9c09fe (tcp: tcp_sendpages() should call tcp_push() once) added an internal flag MSG_SENDPAGE_NOTLAST, similar to MSG_MORE. algif_hash, algif_skcipher, and udp used MSG_MORE from tcp_sendpages() and need to see the new flag as identical to MSG_MORE. This fixes sendfile() on AF_ALG. v3: also fix udp Cc: Tom Herbert Cc: Eric Dumazet Cc: David S. Miller Cc: # 3.4.x + 3.2.x Reported-and-tested-by: Shawn Landden Original-patch: Richard Weinberger Signed-off-by: Shawn Landden Signed-off-by: David S. Miller --- crypto/algif_hash.c | 3 +++ crypto/algif_skcipher.c | 3 +++ net/ipv4/udp.c | 3 +++ 3 files changed, 9 insertions(+) (limited to 'net') diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c index ef5356cd280a..850246206b12 100644 --- a/crypto/algif_hash.c +++ b/crypto/algif_hash.c @@ -114,6 +114,9 @@ static ssize_t hash_sendpage(struct socket *sock, struct page *page, struct hash_ctx *ctx = ask->private; int err; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); sg_init_table(ctx->sgl.sg, 1); sg_set_page(ctx->sgl.sg, page, size, offset); diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index 6a6dfc062d2a..a19c027b29bd 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -378,6 +378,9 @@ static ssize_t skcipher_sendpage(struct socket *sock, struct page *page, struct skcipher_sg_list *sgl; int err = -EINVAL; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + lock_sock(sk); if (!ctx->more && ctx->used) goto unlock; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 44dfaa09b584..bf2b85b25491 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1098,6 +1098,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; -- cgit v1.2.3 From f1d8cba61c3c4b1eb88e507249c4cb8d635d9a76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 28 Nov 2013 09:51:22 -0800 Subject: inet: fix possible seqlock deadlocks In commit c9e9042994d3 ("ipv4: fix possible seqlock deadlock") I left another places where IP_INC_STATS_BH() were improperly used. udp_sendmsg(), ping_v4_sendmsg() and tcp_v4_connect() are called from process context, not from softirq context. This was detected by lockdep seqlock support. Reported-by: jongman heo Fixes: 584bdf8cbdf6 ("[IPV4]: Fix "ipOutNoRoutes" counter error for TCP and UDP") Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind") Signed-off-by: Eric Dumazet Cc: Hannes Frederic Sowa Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/ping.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv4/udp.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 840cf1b9e6ee..242e7f4ed6f4 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -772,7 +772,7 @@ int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 59a6f8b90cd9..067213924751 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -177,7 +177,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index bf2b85b25491..44f6a20fa29d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -999,7 +999,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } -- cgit v1.2.3 From 7f88c6b23afbd31545c676dea77ba9593a1a14bf Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 29 Nov 2013 06:39:44 +0100 Subject: ipv6: fix possible seqlock deadlock in ip6_finish_output2 IPv6 stats are 64 bits and thus are protected with a seqlock. By not disabling bottom-half we could deadlock here if we don't disable bh and a softirq reentrantly updates the same mib. Cc: Eric Dumazet Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 59df872e2f4d..4acdb63495db 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } -- cgit v1.2.3 From 213e3bc723e53af0976421d2808ea3f6cc821c56 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:37:07 +0100 Subject: net/hsr: Very small fix of comment style. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- net/hsr/hsr_framereg.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 003f5bb3acd2..4bdab1521878 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -288,7 +288,8 @@ void hsr_addr_subst_dest(struct hsr_priv *hsr_priv, struct ethhdr *ethhdr, static bool seq_nr_after(u16 a, u16 b) { /* Remove inconsistency where - * seq_nr_after(a, b) == seq_nr_before(a, b) */ + * seq_nr_after(a, b) == seq_nr_before(a, b) + */ if ((int) b - a == 32768) return false; -- cgit v1.2.3 From 98bf8362220af717862b8262b21348774890b7b4 Mon Sep 17 00:00:00 2001 From: Arvid Brodin Date: Fri, 29 Nov 2013 23:38:16 +0100 Subject: net/hsr: Support iproute print_opt ('ip -details ...') This implements the rtnl_link_ops fill_info routine for HSR. Signed-off-by: Arvid Brodin Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 4 +++- net/hsr/hsr_netlink.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index b78566f59aba..6db460121f84 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -488,7 +488,9 @@ enum { IFLA_HSR_UNSPEC, IFLA_HSR_SLAVE1, IFLA_HSR_SLAVE2, - IFLA_HSR_MULTICAST_SPEC, + IFLA_HSR_MULTICAST_SPEC, /* Last byte of supervision addr */ + IFLA_HSR_SUPERVISION_ADDR, /* Supervision frame multicast addr */ + IFLA_HSR_SEQ_NR, __IFLA_HSR_MAX, }; diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index 5325af85eea6..01a5261ac7a5 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -23,6 +23,8 @@ static const struct nla_policy hsr_policy[IFLA_HSR_MAX + 1] = { [IFLA_HSR_SLAVE1] = { .type = NLA_U32 }, [IFLA_HSR_SLAVE2] = { .type = NLA_U32 }, [IFLA_HSR_MULTICAST_SPEC] = { .type = NLA_U8 }, + [IFLA_HSR_SUPERVISION_ADDR] = { .type = NLA_BINARY, .len = ETH_ALEN }, + [IFLA_HSR_SEQ_NR] = { .type = NLA_U16 }, }; @@ -59,6 +61,31 @@ static int hsr_newlink(struct net *src_net, struct net_device *dev, return hsr_dev_finalize(dev, link, multicast_spec); } +static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct hsr_priv *hsr_priv; + + hsr_priv = netdev_priv(dev); + + if (hsr_priv->slave[0]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE1, hsr_priv->slave[0]->ifindex)) + goto nla_put_failure; + + if (hsr_priv->slave[1]) + if (nla_put_u32(skb, IFLA_HSR_SLAVE2, hsr_priv->slave[1]->ifindex)) + goto nla_put_failure; + + if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, + hsr_priv->sup_multicast_addr) || + nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr_priv->sequence_nr)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static struct rtnl_link_ops hsr_link_ops __read_mostly = { .kind = "hsr", .maxtype = IFLA_HSR_MAX, @@ -66,6 +93,7 @@ static struct rtnl_link_ops hsr_link_ops __read_mostly = { .priv_size = sizeof(struct hsr_priv), .setup = hsr_dev_setup, .newlink = hsr_newlink, + .fill_info = hsr_fill_info, }; -- cgit v1.2.3 From 7c2781fa92f5b9ca3188817a56a2ced0400355f3 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:02:43 -0800 Subject: netem: missing break in ge loss generator There is a missing break statement in the Gilbert Elliot loss model generator which makes state machine behave incorrectly. Reported-by: Martin Burri Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 75c94e59a3bd..6e91323f3dac 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -268,6 +268,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) clg->state = 2; if (net_random() < clg->a4) return true; + break; case 2: if (net_random() < clg->a2) clg->state = 1; -- cgit v1.2.3 From ab6c27be8178a4682446faa5aa017b948997937f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:03:35 -0800 Subject: netem: fix loss 4 state model Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "In the case 1 of the switch statement in the if conditions we need to add clg->a4 to clg->a1, according to the model." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 6e91323f3dac..9685624f7bc7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -215,10 +215,10 @@ static bool loss_4state(struct netem_sched_data *q) if (rnd < clg->a4) { clg->state = 4; return true; - } else if (clg->a4 < rnd && rnd < clg->a1) { + } else if (clg->a4 < rnd && rnd < clg->a1 + clg->a4) { clg->state = 3; return true; - } else if (clg->a1 < rnd) + } else if (clg->a1 + clg->a4 < rnd) clg->state = 1; break; -- cgit v1.2.3 From eff7979f00b2c546f36f6829f4072c8db54763a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 29 Nov 2013 11:04:26 -0800 Subject: netem: fix gemodel loss generator Patch from developers of the alternative loss models, downloaded from: http://netgroup.uniroma2.it/twiki/bin/view.cgi/Main/NetemCLG "in case 2, of the switch we change the direction of the inequality to net_random()>clg->a3, because clg->a3 is h in the GE model and when h is 0 all packets will be lost." Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 9685624f7bc7..bccd52b36e97 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -272,7 +272,7 @@ static bool loss_gilb_ell(struct netem_sched_data *q) case 2: if (net_random() < clg->a2) clg->state = 1; - if (clg->a3 > net_random()) + if (net_random() > clg->a3) return true; } -- cgit v1.2.3 From 3868204d6b89ea373a273e760609cb08020beb1a Mon Sep 17 00:00:00 2001 From: "fan.du" Date: Sun, 1 Dec 2013 16:28:48 +0800 Subject: {pktgen, xfrm} Update IPv4 header total len and checksum after tranformation commit a553e4a6317b2cfc7659542c10fe43184ffe53da ("[PKTGEN]: IPSEC support") tried to support IPsec ESP transport transformation for pktgen, but acctually this doesn't work at all for two reasons(The orignal transformed packet has bad IPv4 checksum value, as well as wrong auth value, reported by wireshark) - After transpormation, IPv4 header total length needs update, because encrypted payload's length is NOT same as that of plain text. - After transformation, IPv4 checksum needs re-caculate because of payload has been changed. With this patch, armmed pktgen with below cofiguration, Wireshark is able to decrypted ESP packet generated by pktgen without any IPv4 checksum error or auth value error. pgset "flag IPSEC" pgset "flows 1" Signed-off-by: Fan Du Signed-off-by: David S. Miller --- net/core/pktgen.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 261357a66300..a797fff7f222 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2527,6 +2527,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2548,6 +2550,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; -- cgit v1.2.3