summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig9
-rw-r--r--net/ipv4/af_inet.c2
-rw-r--r--net/ipv4/cipso_ipv4.c8
-rw-r--r--net/ipv4/esp4.c14
-rw-r--r--net/ipv4/fou.c388
-rw-r--r--net/ipv4/geneve.c2
-rw-r--r--net/ipv4/igmp.c45
-rw-r--r--net/ipv4/ip_fragment.c16
-rw-r--r--net/ipv4/ip_gre.c2
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/ip_sockglue.c2
-rw-r--r--net/ipv4/ip_tunnel.c61
-rw-r--r--net/ipv4/ipconfig.c19
-rw-r--r--net/ipv4/ipip.c2
-rw-r--r--net/ipv4/ping.c2
-rw-r--r--net/ipv4/proc.c6
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/syncookies.c86
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c5
-rw-r--r--net/ipv4/tcp_cong.c2
-rw-r--r--net/ipv4/tcp_input.c32
-rw-r--r--net/ipv4/tcp_offload.c2
-rw-r--r--net/ipv4/tcp_output.c13
-rw-r--r--net/ipv4/udp.c15
-rw-r--r--net/ipv4/udp_offload.c69
26 files changed, 587 insertions, 232 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e682b48e0709..bd2901604842 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -322,6 +322,15 @@ config NET_FOU
network mechanisms and optimizations for UDP (such as ECMP
and RSS) can be leveraged to provide better service.
+config NET_FOU_IP_TUNNELS
+ bool "IP: FOU encapsulation of IP tunnels"
+ depends on NET_IPIP || NET_IPGRE || IPV6_SIT
+ select NET_FOU
+ ---help---
+ Allow configuration of FOU or GUE encapsulation for IP tunnels.
+ When this option is enabled IP tunnels can be configured to use
+ FOU or GUE encapsulation.
+
config GENEVE
tristate "Generic Network Virtualization Encapsulation (Geneve)"
depends on INET
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b03906..3a096bb2d596 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1222,7 +1222,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
- SKB_GSO_MPLS |
+ SKB_GSO_TUNNEL_REMCSUM |
0)))
goto out;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 4715f25dfe03..5160c710f2eb 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -50,7 +50,7 @@
#include <net/netlabel.h>
#include <net/cipso_ipv4.h>
#include <linux/atomic.h>
-#include <asm/bug.h>
+#include <linux/bug.h>
#include <asm/unaligned.h>
/* List of available DOI definitions */
@@ -72,6 +72,7 @@ struct cipso_v4_map_cache_bkt {
u32 size;
struct list_head list;
};
+
struct cipso_v4_map_cache_entry {
u32 hash;
unsigned char *key;
@@ -82,7 +83,8 @@ struct cipso_v4_map_cache_entry {
u32 activity;
struct list_head list;
};
-static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL;
+
+static struct cipso_v4_map_cache_bkt *cipso_v4_cache;
/* Restricted bitmap (tag #1) flags */
int cipso_v4_rbm_optfmt = 0;
@@ -539,7 +541,7 @@ doi_add_return:
/**
* cipso_v4_doi_free - Frees a DOI definition
- * @entry: the entry's RCU field
+ * @doi_def: the DOI definition
*
* Description:
* This function frees all of the memory associated with a DOI definition.
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 360b565918c4..60173d4d3a0e 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -392,8 +392,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
if (elen <= 0)
goto out;
- if ((err = skb_cow_data(skb, 0, &trailer)) < 0)
+ err = skb_cow_data(skb, 0, &trailer);
+ if (err < 0)
goto out;
+
nfrags = err;
assoclen = sizeof(*esph);
@@ -601,12 +603,12 @@ static int esp_init_authenc(struct xfrm_state *x)
BUG_ON(!aalg_desc);
err = -EINVAL;
- if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
+ if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n",
- x->aalg->alg_name,
- crypto_aead_authsize(aead),
- aalg_desc->uinfo.auth.icv_fullbits/8);
+ pr_info("ESP: %s digestsize %u != %hu\n",
+ x->aalg->alg_name,
+ crypto_aead_authsize(aead),
+ aalg_desc->uinfo.auth.icv_fullbits / 8);
goto free_key;
}
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e78924e246..740ae099a0d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -38,21 +38,17 @@ static inline struct fou *fou_from_sock(struct sock *sk)
return sk->sk_user_data;
}
-static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
- u8 protocol, size_t len)
+static void fou_recv_pull(struct sk_buff *skb, size_t len)
{
struct iphdr *iph = ip_hdr(skb);
/* Remove 'len' bytes from the packet (UDP header and
- * FOU header if present), modify the protocol to the one
- * we found, and then call rcv_encap.
+ * FOU header if present).
*/
iph->tot_len = htons(ntohs(iph->tot_len) - len);
__skb_pull(skb, len);
skb_postpull_rcsum(skb, udp_hdr(skb), len);
skb_reset_transport_header(skb);
-
- return -protocol;
}
static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -62,16 +58,78 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!fou)
return 1;
- return fou_udp_encap_recv_deliver(skb, fou->protocol,
- sizeof(struct udphdr));
+ fou_recv_pull(skb, sizeof(struct udphdr));
+
+ return -fou->protocol;
+}
+
+static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
+ void *data, int hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload) {
+ /* Already processed in GRO path */
+ skb->remcsum_offload = 0;
+ return guehdr;
+ }
+
+ if (start > skb->len - hdrlen ||
+ offset > skb->len - hdrlen - sizeof(u16))
+ return NULL;
+
+ if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+ __skb_checksum_complete(skb);
+
+ plen = hdrlen + offset + sizeof(u16);
+ if (!pskb_may_pull(skb, plen))
+ return NULL;
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ if (ipproto == IPPROTO_IP && sizeof(struct iphdr) < plen) {
+ struct iphdr *ip = (struct iphdr *)(skb->data + hdrlen);
+
+ /* If next header happens to be IP we can skip that for the
+ * checksum calculation since the IP header checksum is zero
+ * if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(skb->csum, skb_checksum(skb, poffset + hdrlen,
+ start - poffset - hdrlen, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(skb->data + hdrlen + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+
+ return guehdr;
+}
+
+static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
+{
+ /* No support yet */
+ kfree_skb(skb);
+ return 0;
}
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct fou *fou = fou_from_sock(sk);
- size_t len;
+ size_t len, optlen, hdrlen;
struct guehdr *guehdr;
- struct udphdr *uh;
+ void *data;
+ u16 doffset = 0;
if (!fou)
return 1;
@@ -80,25 +138,61 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- len += guehdr->hlen << 2;
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ /* guehdr may change after pull */
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- if (guehdr->version != 0)
- goto drop;
+ hdrlen = sizeof(struct guehdr) + optlen;
- if (guehdr->flags) {
- /* No support yet */
+ if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
goto drop;
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
+
+ /* Pull UDP header now, skb->data points to guehdr */
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Pull csum through the guehdr now . This can be used if
+ * there is a remote checksum offload.
+ */
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_remcsum(skb, guehdr, data + doffset,
+ hdrlen, guehdr->proto_ctype);
+ if (!guehdr)
+ goto drop;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
- return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+ if (unlikely(guehdr->control))
+ return gue_control_message(skb, guehdr);
+
+ __skb_pull(skb, hdrlen);
+ skb_reset_transport_header(skb);
+
+ return -guehdr->proto_ctype;
+
drop:
kfree_skb(skb);
return 0;
@@ -147,6 +241,66 @@ out_unlock:
return err;
}
+static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
+ struct guehdr *guehdr, void *data,
+ size_t hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ void *ptr;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload)
+ return guehdr;
+
+ if (start > skb_gro_len(skb) - hdrlen ||
+ offset > skb_gro_len(skb) - hdrlen - sizeof(u16) ||
+ !NAPI_GRO_CB(skb)->csum_valid || skb->remcsum_offload)
+ return NULL;
+
+ plen = hdrlen + offset + sizeof(u16);
+
+ /* Pull checksum that will be written */
+ if (skb_gro_header_hard(skb, off + plen)) {
+ guehdr = skb_gro_header_slow(skb, off + plen, off);
+ if (!guehdr)
+ return NULL;
+ }
+
+ ptr = (void *)guehdr + hdrlen;
+
+ if (ipproto == IPPROTO_IP &&
+ (hdrlen + sizeof(struct iphdr) < plen)) {
+ struct iphdr *ip = (struct iphdr *)(ptr + hdrlen);
+
+ /* If next header happens to be IP we can skip
+ * that for the checksum calculation since the
+ * IP header checksum is zero if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(NAPI_GRO_CB(skb)->csum,
+ csum_partial(ptr + poffset, start - poffset, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(ptr + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ skb->remcsum_offload = 1;
+
+ return guehdr;
+}
+
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -154,38 +308,64 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- u8 proto;
struct guehdr *guehdr;
- unsigned int hlen, guehlen;
- unsigned int off;
+ size_t len, optlen, hdrlen, off;
+ void *data;
+ u16 doffset = 0;
int flush = 1;
off = skb_gro_offset(skb);
- hlen = off + sizeof(*guehdr);
+ len = off + sizeof(*guehdr);
+
guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
}
- proto = guehdr->next_hdr;
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- rcu_read_lock();
- offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
- goto out_unlock;
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ if (unlikely(guehdr->control) || guehdr->version != 0 ||
+ validate_gue_flags(guehdr, optlen))
+ goto out;
- hlen = off + guehlen;
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!guehdr))
- goto out_unlock;
+ hdrlen = sizeof(*guehdr) + optlen;
+
+ /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
+ * this is needed if there is a remote checkcsum offload.
+ */
+ skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_gro_remcsum(skb, off, guehdr,
+ data + doffset, hdrlen,
+ guehdr->proto_ctype);
+ if (!guehdr)
+ goto out;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
+ skb_gro_pull(skb, hdrlen);
+
flush = 0;
for (p = *head; p; p = p->next) {
@@ -197,7 +377,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
guehdr2 = (struct guehdr *)(p->data + off);
/* Compare base GUE header to be equal (covers
- * hlen, version, next_hdr, and flags.
+ * hlen, version, proto_ctype, and flags.
*/
if (guehdr->word != guehdr2->word) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -212,10 +392,11 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
}
}
- skb_gro_pull(skb, guehlen);
-
- /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
- skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
@@ -236,7 +417,7 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff)
u8 proto;
int err = -ENOENT;
- proto = guehdr->next_hdr;
+ proto = guehdr->proto_ctype;
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
@@ -487,6 +668,125 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(fou_build_header);
+
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ struct guehdr *guehdr;
+ size_t hdrlen, optlen = 0;
+ __be16 sport;
+ void *data;
+ bool need_priv = false;
+
+ if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ csum = false;
+ optlen += GUE_PLEN_REMCSUM;
+ type |= SKB_GSO_TUNNEL_REMCSUM;
+ need_priv = true;
+ }
+
+ optlen += need_priv ? GUE_LEN_PRIV : 0;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get source port (based on flow hash) before skb_push */
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
+
+ guehdr = (struct guehdr *)skb->data;
+
+ guehdr->control = 0;
+ guehdr->version = 0;
+ guehdr->hlen = optlen >> 2;
+ guehdr->flags = 0;
+ guehdr->proto_ctype = *protocol;
+
+ data = &guehdr[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+
+ guehdr->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (type & SKB_GSO_TUNNEL_REMCSUM) {
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd = data;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
+ }
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(gue_build_header);
+
static int __init fou_init(void)
{
int ret;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index dedb21e99914..31802afce34f 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -104,7 +104,7 @@ static void geneve_build_header(struct genevehdr *geneveh,
memcpy(geneveh->options, options, options_len);
}
-/* Transmit a fully formated Geneve frame.
+/* Transmit a fully formatted Geneve frame.
*
* When calling this function. The skb->data should point
* to the geneve header which is fully formed.
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index fb70e3ecc3e4..666cf364df86 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -112,17 +112,17 @@
#ifdef CONFIG_IP_MULTICAST
/* Parameter names and values are taken from igmp-v2-06 draft */
-#define IGMP_V1_Router_Present_Timeout (400*HZ)
-#define IGMP_V2_Router_Present_Timeout (400*HZ)
-#define IGMP_V2_Unsolicited_Report_Interval (10*HZ)
-#define IGMP_V3_Unsolicited_Report_Interval (1*HZ)
-#define IGMP_Query_Response_Interval (10*HZ)
-#define IGMP_Query_Robustness_Variable 2
+#define IGMP_V1_ROUTER_PRESENT_TIMEOUT (400*HZ)
+#define IGMP_V2_ROUTER_PRESENT_TIMEOUT (400*HZ)
+#define IGMP_V2_UNSOLICITED_REPORT_INTERVAL (10*HZ)
+#define IGMP_V3_UNSOLICITED_REPORT_INTERVAL (1*HZ)
+#define IGMP_QUERY_RESPONSE_INTERVAL (10*HZ)
+#define IGMP_QUERY_ROBUSTNESS_VARIABLE 2
-#define IGMP_Initial_Report_Delay (1)
+#define IGMP_INITIAL_REPORT_DELAY (1)
-/* IGMP_Initial_Report_Delay is not from IGMP specs!
+/* IGMP_INITIAL_REPORT_DELAY is not from IGMP specs!
* IGMP specs require to report membership immediately after
* joining a group, but we delay the first report by a
* small interval. It seems more natural and still does not
@@ -318,9 +318,7 @@ igmp_scount(struct ip_mc_list *pmc, int type, int gdeleted, int sdeleted)
return scount;
}
-#define igmp_skb_size(skb) (*(unsigned int *)((skb)->cb))
-
-static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
+static struct sk_buff *igmpv3_newpack(struct net_device *dev, unsigned int mtu)
{
struct sk_buff *skb;
struct rtable *rt;
@@ -330,6 +328,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
struct flowi4 fl4;
int hlen = LL_RESERVED_SPACE(dev);
int tlen = dev->needed_tailroom;
+ unsigned int size = mtu;
while (1) {
skb = alloc_skb(size + hlen + tlen,
@@ -341,7 +340,6 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
return NULL;
}
skb->priority = TC_PRIO_CONTROL;
- igmp_skb_size(skb) = size;
rt = ip_route_output_ports(net, &fl4, NULL, IGMPV3_ALL_MCR, 0,
0, 0,
@@ -354,6 +352,8 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
skb_dst_set(skb, &rt->dst);
skb->dev = dev;
+ skb->reserved_tailroom = skb_end_offset(skb) -
+ min(mtu, skb_end_offset(skb));
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
@@ -423,8 +423,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ip_mc_list *pmc,
return skb;
}
-#define AVAILABLE(skb) ((skb) ? ((skb)->dev ? igmp_skb_size(skb) - (skb)->len : \
- skb_tailroom(skb)) : 0)
+#define AVAILABLE(skb) ((skb) ? skb_availroom(skb) : 0)
static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc,
int type, int gdeleted, int sdeleted)
@@ -879,15 +878,15 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
if (ih->code == 0) {
/* Alas, old v1 router presents here. */
- max_delay = IGMP_Query_Response_Interval;
+ max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
in_dev->mr_v1_seen = jiffies +
- IGMP_V1_Router_Present_Timeout;
+ IGMP_V1_ROUTER_PRESENT_TIMEOUT;
group = 0;
} else {
/* v2 router present */
max_delay = ih->code*(HZ/IGMP_TIMER_SCALE);
in_dev->mr_v2_seen = jiffies +
- IGMP_V2_Router_Present_Timeout;
+ IGMP_V2_ROUTER_PRESENT_TIMEOUT;
}
/* cancel the interface change timer */
in_dev->mr_ifc_count = 0;
@@ -899,7 +898,7 @@ static bool igmp_heard_query(struct in_device *in_dev, struct sk_buff *skb,
return true; /* ignore bogus packet; freed by caller */
} else if (IGMP_V1_SEEN(in_dev)) {
/* This is a v3 query with v1 queriers present */
- max_delay = IGMP_Query_Response_Interval;
+ max_delay = IGMP_QUERY_RESPONSE_INTERVAL;
group = 0;
} else if (IGMP_V2_SEEN(in_dev)) {
/* this is a v3 query with v2 queriers present;
@@ -1218,7 +1217,7 @@ static void igmp_group_added(struct ip_mc_list *im)
return;
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
- igmp_start_timer(im, IGMP_Initial_Report_Delay);
+ igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
spin_unlock_bh(&im->lock);
return;
}
@@ -1541,7 +1540,7 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
#ifdef CONFIG_IP_MULTICAST
-int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable;
+int sysctl_igmp_qrv __read_mostly = IGMP_QUERY_ROBUSTNESS_VARIABLE;
#endif
static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
@@ -2687,11 +2686,7 @@ static int igmp_mcf_seq_show(struct seq_file *seq, void *v)
struct igmp_mcf_iter_state *state = igmp_mcf_seq_private(seq);
if (v == SEQ_START_TOKEN) {
- seq_printf(seq,
- "%3s %6s "
- "%10s %10s %6s %6s\n", "Idx",
- "Device", "MCA",
- "SRC", "INC", "EXC");
+ seq_puts(seq, "Idx Device MCA SRC INC EXC\n");
} else {
seq_printf(seq,
"%3d %6.6s 0x%08x "
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 2811cc18701a..4d964dadd655 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -80,7 +80,7 @@ struct ipq {
struct inet_peer *peer;
};
-static inline u8 ip4_frag_ecn(u8 tos)
+static u8 ip4_frag_ecn(u8 tos)
{
return 1 << (tos & INET_ECN_MASK);
}
@@ -148,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, 1) : NULL;
}
-static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
+static void ip4_frag_free(struct inet_frag_queue *q)
{
struct ipq *qp;
@@ -160,7 +160,7 @@ static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
/* Destruction primitives. */
-static __inline__ void ipq_put(struct ipq *ipq)
+static void ipq_put(struct ipq *ipq)
{
inet_frag_put(&ipq->q, &ip4_frags);
}
@@ -236,7 +236,7 @@ out:
/* Find the correct entry in the "incomplete datagrams" queue for
* this IP datagram, and create new one, if nothing is found.
*/
-static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
+static struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
{
struct inet_frag_queue *q;
struct ip4_create_arg arg;
@@ -256,7 +256,7 @@ static inline struct ipq *ip_find(struct net *net, struct iphdr *iph, u32 user)
}
/* Is the fragment too far ahead to be part of ipq? */
-static inline int ip_frag_too_far(struct ipq *qp)
+static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
unsigned int max = sysctl_ipfrag_max_dist;
@@ -795,16 +795,16 @@ static void __init ip4_frags_ctl_register(void)
register_net_sysctl(&init_net, "net/ipv4", ip4_frags_ctl_table);
}
#else
-static inline int ip4_frags_ns_ctl_register(struct net *net)
+static int ip4_frags_ns_ctl_register(struct net *net)
{
return 0;
}
-static inline void ip4_frags_ns_ctl_unregister(struct net *net)
+static void ip4_frags_ns_ctl_unregister(struct net *net)
{
}
-static inline void __init ip4_frags_ctl_register(void)
+static void __init ip4_frags_ctl_register(void)
{
}
#endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 12055fdbe716..ac8491245e5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -789,7 +789,7 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u16(skb, IFLA_GRE_ENCAP_DPORT,
t->encap.dport) ||
nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
- t->encap.dport))
+ t->encap.flags))
goto nla_put_failure;
return 0;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index bc6471d4abcd..4a929adf2ab7 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -662,12 +662,10 @@ slow_path:
if (len < left) {
len &= ~7;
}
- /*
- * Allocate buffer.
- */
- if ((skb2 = alloc_skb(len+hlen+ll_rs, GFP_ATOMIC)) == NULL) {
- NETDEBUG(KERN_INFO "IP: frag: no memory for new fragment!\n");
+ /* Allocate buffer */
+ skb2 = alloc_skb(len + hlen + ll_rs, GFP_ATOMIC);
+ if (!skb2) {
err = -ENOMEM;
goto fail;
}
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9ad4555..21894df66262 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -424,7 +424,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
msg->msg_flags |= MSG_TRUNC;
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto out_free_skb;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0bb8e141eacc..c3587e1c8b82 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,7 +56,10 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
-#include <net/gue.h>
+
+#if IS_ENABLED(CONFIG_NET_FOU)
+#include <net/fou.h>
+#endif
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -494,10 +497,12 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
switch (e->type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
- return sizeof(struct udphdr);
+ return fou_encap_hlen(e);
case TUNNEL_ENCAP_GUE:
- return sizeof(struct udphdr) + sizeof(struct guehdr);
+ return gue_encap_hlen(e);
+#endif
default:
return -EINVAL;
}
@@ -526,60 +531,18 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
}
EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
-static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
-{
- struct udphdr *uh;
- __be16 sport;
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-
- skb = iptunnel_handle_offloads(skb, csum, type);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- /* Get length and hash before making space in skb */
-
- sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
- skb, 0, 0, false);
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- if (e->type == TUNNEL_ENCAP_GUE) {
- struct guehdr *guehdr = (struct guehdr *)&uh[1];
-
- guehdr->version = 0;
- guehdr->hlen = 0;
- guehdr->flags = 0;
- guehdr->next_hdr = *protocol;
- }
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- uh->check = 0;
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-
- return 0;
-}
-
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
u8 *protocol, struct flowi4 *fl4)
{
switch (t->encap.type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, protocol, fl4);
case TUNNEL_ENCAP_GUE:
- return fou_build_header(skb, &t->encap, t->encap_hlen,
- protocol, fl4);
+ return gue_build_header(skb, &t->encap, protocol, fl4);
+#endif
default:
return -EINVAL;
}
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 648fa1490ea7..7fa18bc7e47f 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -115,7 +115,7 @@
*/
int ic_set_manually __initdata = 0; /* IPconfig parameters set manually */
-static int ic_enable __initdata = 0; /* IP config enabled? */
+static int ic_enable __initdata; /* IP config enabled? */
/* Protocol choice */
int ic_proto_enabled __initdata = 0
@@ -130,7 +130,7 @@ int ic_proto_enabled __initdata = 0
#endif
;
-static int ic_host_name_set __initdata = 0; /* Host name set by us? */
+static int ic_host_name_set __initdata; /* Host name set by us? */
__be32 ic_myaddr = NONE; /* My IP address */
static __be32 ic_netmask = NONE; /* Netmask for local subnet */
@@ -160,17 +160,17 @@ static u8 ic_domain[64]; /* DNS (not NIS) domain name */
static char user_dev_name[IFNAMSIZ] __initdata = { 0, };
/* Protocols supported by available interfaces */
-static int ic_proto_have_if __initdata = 0;
+static int ic_proto_have_if __initdata;
/* MTU for boot device */
-static int ic_dev_mtu __initdata = 0;
+static int ic_dev_mtu __initdata;
#ifdef IPCONFIG_DYNAMIC
static DEFINE_SPINLOCK(ic_recv_lock);
-static volatile int ic_got_reply __initdata = 0; /* Proto(s) that replied */
+static volatile int ic_got_reply __initdata; /* Proto(s) that replied */
#endif
#ifdef IPCONFIG_DHCP
-static int ic_dhcp_msgtype __initdata = 0; /* DHCP msg type received */
+static int ic_dhcp_msgtype __initdata; /* DHCP msg type received */
#endif
@@ -186,8 +186,8 @@ struct ic_device {
__be32 xid;
};
-static struct ic_device *ic_first_dev __initdata = NULL;/* List of open device */
-static struct net_device *ic_dev __initdata = NULL; /* Selected device */
+static struct ic_device *ic_first_dev __initdata; /* List of open device */
+static struct net_device *ic_dev __initdata; /* Selected device */
static bool __init ic_is_init_dev(struct net_device *dev)
{
@@ -498,7 +498,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
struct arphdr *rarp;
unsigned char *rarp_ptr;
__be32 sip, tip;
- unsigned char *sha, *tha; /* s for "source", t for "target" */
+ unsigned char *tha; /* t for "target" */
struct ic_device *d;
if (!net_eq(dev_net(dev), &init_net))
@@ -549,7 +549,6 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt
goto drop_unlock; /* should never happen */
/* Extract variable-width fields */
- sha = rarp_ptr;
rarp_ptr += dev->addr_len;
memcpy(&sip, rarp_ptr, 4);
rarp_ptr += 4;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 37096d64730e..40403114f00a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -465,7 +465,7 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u16(skb, IFLA_IPTUN_ENCAP_DPORT,
tunnel->encap.dport) ||
nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS,
- tunnel->encap.dport))
+ tunnel->encap.flags))
goto nla_put_failure;
return 0;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c9804139..736236c3e554 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -875,7 +875,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
}
/* Don't bother checking the checksum */
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8e3eb39f84e7..f0d4eb8b99b9 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -296,12 +296,12 @@ static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
int j;
if (count) {
- seq_printf(seq, "\nIcmpMsg:");
+ seq_puts(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %sType%u",
type[j] & 0x100 ? "Out" : "In",
type[j] & 0xff);
- seq_printf(seq, "\nIcmpMsg:");
+ seq_puts(seq, "\nIcmpMsg:");
for (j = 0; j < count; ++j)
seq_printf(seq, " %lu", vals[j]);
}
@@ -342,7 +342,7 @@ static void icmp_put(struct seq_file *seq)
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
- seq_printf(seq, " OutMsgs OutErrors");
+ seq_puts(seq, " OutMsgs OutErrors");
for (i = 0; icmpmibmap[i].name != NULL; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 739db3100c23..ee8fa4bf3b73 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -718,7 +718,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
copied = len;
}
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, 0, msg, copied);
if (err)
goto done;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 32b98d0207b4..45fe60c5238e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -19,10 +19,6 @@
#include <net/tcp.h>
#include <net/route.h>
-/* Timestamps: lowest bits store TCP options */
-#define TSBITS 6
-#define TSMASK (((__u32)1 << TSBITS) - 1)
-
extern int sysctl_tcp_syncookies;
static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
@@ -30,6 +26,30 @@ static u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS] __read_mostly;
#define COOKIEBITS 24 /* Upper bits store count */
#define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1)
+/* TCP Timestamp: 6 lowest bits of timestamp sent in the cookie SYN-ACK
+ * stores TCP options:
+ *
+ * MSB LSB
+ * | 31 ... 6 | 5 | 4 | 3 2 1 0 |
+ * | Timestamp | ECN | SACK | WScale |
+ *
+ * When we receive a valid cookie-ACK, we look at the echoed tsval (if
+ * any) to figure out which TCP options we should use for the rebuilt
+ * connection.
+ *
+ * A WScale setting of '0xf' (which is an invalid scaling value)
+ * means that original syn did not include the TCP window scaling option.
+ */
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK BIT(4)
+#define TS_OPT_ECN BIT(5)
+/* There is no TS_OPT_TIMESTAMP:
+ * if ACK contains timestamp option, we already know it was
+ * requested/supported by the syn/synack exchange.
+ */
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+
static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],
ipv4_cookie_scratch);
@@ -67,9 +87,11 @@ __u32 cookie_init_timestamp(struct request_sock *req)
ireq = inet_rsk(req);
- options = ireq->wscale_ok ? ireq->snd_wscale : 0xf;
- options |= ireq->sack_ok << 4;
- options |= ireq->ecn_ok << 5;
+ options = ireq->wscale_ok ? ireq->snd_wscale : TS_OPT_WSCALE_MASK;
+ if (ireq->sack_ok)
+ options |= TS_OPT_SACK;
+ if (ireq->ecn_ok)
+ options |= TS_OPT_ECN;
ts = ts_now & ~TSMASK;
ts |= options;
@@ -219,16 +241,13 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
* additional tcp options in the timestamp.
* This extracts these options from the timestamp echo.
*
- * The lowest 4 bits store snd_wscale.
- * next 2 bits indicate SACK and ECN support.
- *
- * return false if we decode an option that should not be.
+ * return false if we decode a tcp option that is disabled
+ * on the host.
*/
-bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
- struct net *net, bool *ecn_ok)
+bool cookie_timestamp_decode(struct tcp_options_received *tcp_opt)
{
/* echoed timestamp, lowest bits contain options */
- u32 options = tcp_opt->rcv_tsecr & TSMASK;
+ u32 options = tcp_opt->rcv_tsecr;
if (!tcp_opt->saw_tstamp) {
tcp_clear_options(tcp_opt);
@@ -238,22 +257,35 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt,
if (!sysctl_tcp_timestamps)
return false;
- tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
- *ecn_ok = (options >> 5) & 1;
- if (*ecn_ok && !net->ipv4.sysctl_tcp_ecn)
- return false;
+ tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
if (tcp_opt->sack_ok && !sysctl_tcp_sack)
return false;
- if ((options & 0xf) == 0xf)
+ if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
return true; /* no window scaling */
tcp_opt->wscale_ok = 1;
- tcp_opt->snd_wscale = options & 0xf;
+ tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
+
return sysctl_tcp_window_scaling != 0;
}
-EXPORT_SYMBOL(cookie_check_timestamp);
+EXPORT_SYMBOL(cookie_timestamp_decode);
+
+bool cookie_ecn_ok(const struct tcp_options_received *tcp_opt,
+ const struct net *net, const struct dst_entry *dst)
+{
+ bool ecn_ok = tcp_opt->rcv_tsecr & TS_OPT_ECN;
+
+ if (!ecn_ok)
+ return false;
+
+ if (net->ipv4.sysctl_tcp_ecn)
+ return true;
+
+ return dst_feature(dst, RTAX_FEATURE_ECN);
+}
+EXPORT_SYMBOL(cookie_ecn_ok);
struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
{
@@ -269,14 +301,16 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
int mss;
struct rtable *rt;
__u8 rcv_wscale;
- bool ecn_ok = false;
struct flowi4 fl4;
if (!sysctl_tcp_syncookies || !th->ack || th->rst)
goto out;
- if (tcp_synq_no_recent_overflow(sk) ||
- (mss = __cookie_v4_check(ip_hdr(skb), th, cookie)) == 0) {
+ if (tcp_synq_no_recent_overflow(sk))
+ goto out;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, cookie);
+ if (mss == 0) {
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_SYNCOOKIESFAILED);
goto out;
}
@@ -287,7 +321,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
memset(&tcp_opt, 0, sizeof(tcp_opt));
tcp_parse_options(skb, &tcp_opt, 0, NULL);
- if (!cookie_check_timestamp(&tcp_opt, sock_net(sk), &ecn_ok))
+ if (!cookie_timestamp_decode(&tcp_opt))
goto out;
ret = NULL;
@@ -305,7 +339,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->ir_loc_addr = ip_hdr(skb)->daddr;
ireq->ir_rmt_addr = ip_hdr(skb)->saddr;
ireq->ir_mark = inet_request_mark(sk, skb);
- ireq->ecn_ok = ecn_ok;
ireq->snd_wscale = tcp_opt.snd_wscale;
ireq->sack_ok = tcp_opt.sack_ok;
ireq->wscale_ok = tcp_opt.wscale_ok;
@@ -354,6 +387,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
dst_metric(&rt->dst, RTAX_INITRWND));
ireq->rcv_wscale = rcv_wscale;
+ ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), &rt->dst);
ret = get_cookie_sock(sk, skb, req, &rt->dst);
/* ip_queue_xmit() depends on our flow being setup
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b3c53c8b331e..e0ee384a448f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -496,6 +496,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_max_reordering",
+ .data = &sysctl_tcp_max_reordering,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "tcp_dsack",
.data = &sysctl_tcp_dsack,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c379545..c239f4740d10 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1377,7 +1377,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
/* XXX -- need to support SO_PEEK_OFF */
skb_queue_walk(&sk->sk_write_queue, skb) {
- err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+ err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
if (err)
break;
@@ -1833,8 +1833,7 @@ do_prequeue:
}
if (!(flags & MSG_TRUNC)) {
- err = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, used);
+ err = skb_copy_datagram_msg(skb, offset, msg, used);
if (err) {
/* Exception. Bailout! */
if (!copied)
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index b1c5970d47a1..27ead0dd16bc 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -1,5 +1,5 @@
/*
- * Plugable TCP congestion control support and newReno
+ * Pluggable TCP congestion control support and newReno
* congestion control.
* Based on ideas from I/O scheduler support and Web100.
*
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 88fa2d160685..5f979c7f5135 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
int sysctl_tcp_sack __read_mostly = 1;
int sysctl_tcp_fack __read_mostly = 1;
int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_max_reordering __read_mostly = 300;
EXPORT_SYMBOL(sysctl_tcp_reordering);
int sysctl_tcp_dsack __read_mostly = 1;
int sysctl_tcp_app_win __read_mostly = 31;
@@ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
if (metric > tp->reordering) {
int mib_idx;
- tp->reordering = min(TCP_MAX_REORDERING, metric);
+ tp->reordering = min(sysctl_tcp_max_reordering, metric);
/* This exciting event is worth to be remembered. 8) */
if (ts)
@@ -5030,7 +5031,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
/* step 3: check security and precedence [ignored] */
/* step 4: Check for a SYN
- * RFC 5691 4.2 : Send a challenge ack
+ * RFC 5961 4.2 : Send a challenge ack
*/
if (th->syn) {
syn_challenge:
@@ -5867,7 +5868,7 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
* If we receive a SYN packet with these bits set, it means a
* network is playing bad games with TOS bits. In order to
* avoid possible false congestion notifications, we disable
- * TCP ECN negociation.
+ * TCP ECN negotiation.
*
* Exception: tcp_ca wants ECN. This is required for DCTCP
* congestion control; it requires setting ECT on all packets,
@@ -5877,20 +5878,22 @@ static inline void pr_drop_req(struct request_sock *req, __u16 port, int family)
*/
static void tcp_ecn_create_request(struct request_sock *req,
const struct sk_buff *skb,
- const struct sock *listen_sk)
+ const struct sock *listen_sk,
+ const struct dst_entry *dst)
{
const struct tcphdr *th = tcp_hdr(skb);
const struct net *net = sock_net(listen_sk);
bool th_ecn = th->ece && th->cwr;
- bool ect, need_ecn;
+ bool ect, need_ecn, ecn_ok;
if (!th_ecn)
return;
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
need_ecn = tcp_ca_needs_ecn(listen_sk);
+ ecn_ok = net->ipv4.sysctl_tcp_ecn || dst_feature(dst, RTAX_FEATURE_ECN);
- if (!ect && !need_ecn && net->ipv4.sysctl_tcp_ecn)
+ if (!ect && !need_ecn && ecn_ok)
inet_rsk(req)->ecn_ok = 1;
else if (ect && need_ecn)
inet_rsk(req)->ecn_ok = 1;
@@ -5955,13 +5958,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
if (security_inet_conn_request(sk, skb, req))
goto drop_and_free;
- if (!want_cookie || tmp_opt.tstamp_ok)
- tcp_ecn_create_request(req, skb, sk);
-
- if (want_cookie) {
- isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
- req->cookie_ts = tmp_opt.tstamp_ok;
- } else if (!isn) {
+ if (!want_cookie && !isn) {
/* VJ's idea. We save last timestamp seen
* from the destination in peer table, when entering
* state TIME-WAIT, and check against it before
@@ -6009,6 +6006,15 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
goto drop_and_free;
}
+ tcp_ecn_create_request(req, skb, sk, dst);
+
+ if (want_cookie) {
+ isn = cookie_init_sequence(af_ops, sk, skb, &req->mss);
+ req->cookie_ts = tmp_opt.tstamp_ok;
+ if (!tmp_opt.tstamp_ok)
+ inet_rsk(req)->ecn_ok = 0;
+ }
+
tcp_rsk(req)->snt_isn = isn;
tcp_openreq_init_rwin(req, sk, dst);
fastopen = !want_cookie &&
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5b90f2f447a5..9d7930ba8e0f 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -94,9 +94,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |
SKB_GSO_SIT |
- SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a3d453b94747..0b88158dd4a7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -333,10 +333,19 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
+ bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ tcp_ca_needs_ecn(sk);
+
+ if (!use_ecn) {
+ const struct dst_entry *dst = __sk_dst_get(sk);
+
+ if (dst && dst_feature(dst, RTAX_FEATURE_ECN))
+ use_ecn = true;
+ }
tp->ecn_flags = 0;
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
- tcp_ca_needs_ecn(sk)) {
+
+ if (use_ecn) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk))
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd0db5471bb5..df19027f44f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1281,8 +1281,8 @@ try_again:
}
if (skb_csum_unnecessary(skb))
- err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
- msg->msg_iov, copied);
+ err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+ msg, copied);
else {
err = skb_copy_and_csum_datagram_iovec(skb,
sizeof(struct udphdr),
@@ -1777,14 +1777,13 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (ret > 0)
return -ret;
return 0;
- } else {
- if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
- return __udp4_lib_mcast_deliver(net, skb, uh,
- saddr, daddr, udptable);
-
- sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
}
+ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
+ return __udp4_lib_mcast_deliver(net, skb, uh,
+ saddr, daddr, udptable);
+
+ sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk != NULL) {
int ret;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6480cea7aa53..d3e537ef6b7f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,7 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
- __be16 new_protocol)
+ __be16 new_protocol, bool is_ipv6)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -39,7 +39,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
- bool need_csum;
+ bool need_csum = !!(skb_shinfo(skb)->gso_type &
+ SKB_GSO_UDP_TUNNEL_CSUM);
+ bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+ bool offload_csum = false, dont_encap = (need_csum || remcsum);
oldlen = (u16)~skb->len;
@@ -52,10 +55,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
+ skb->encap_hdr_csum = need_csum;
+ skb->remcsum_offload = remcsum;
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum &&
+ (skb->dev->features &
+ (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
@@ -72,11 +78,21 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
do {
struct udphdr *uh;
int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ __be32 delta;
+
+ if (dont_encap) {
+ skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* Only set up inner headers if we might be offloading
+ * inner checksum.
+ */
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
skb->mac_len = mac_len;
+ skb->protocol = protocol;
skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
@@ -86,19 +102,36 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(len);
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
+ if (!need_csum)
+ continue;
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
+ delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ if (offload_csum) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ } else if (remcsum) {
+ /* Need to calculate checksum from scratch,
+ * inner checksums are never when doing
+ * remote_checksum_offload.
+ */
+
+ skb->csum = skb_checksum(skb, udp_offset,
+ skb->len - udp_offset,
+ 0);
+ uh->check = csum_fold(skb->csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
uh->check = gso_make_checksum(skb, ~uh->check);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
-
- skb->protocol = protocol;
} while ((skb = skb->next));
out:
return segs;
@@ -134,7 +167,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
- protocol);
+ protocol, is_ipv6);
out_unlock:
rcu_read_unlock();
@@ -172,9 +205,9 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_IPIP |
- SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
- SKB_GSO_MPLS) ||
+ SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
!(type & (SKB_GSO_UDP))))
goto out;