summaryrefslogtreecommitdiffstats
path: root/net/ipv6/ip6_offload.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2022-01-25 05:44:44 +0100
committerDavid S. Miller <davem@davemloft.net>2022-01-25 14:05:11 +0100
commit6fc2f3832d36d74289680a1bf4484def8321f6e0 (patch)
treee077ef05f168076713ff4fd8ecc1b111fc8605db /net/ipv6/ip6_offload.c
parentnet: mana: Use struct_size() helper in mana_gd_create_dma_region() (diff)
downloadlinux-6fc2f3832d36d74289680a1bf4484def8321f6e0.tar.xz
linux-6fc2f3832d36d74289680a1bf4484def8321f6e0.zip
ipv6: gro: flush instead of assuming different flows on hop_limit mismatch
IPv6 GRO considers packets to belong to different flows when their hop_limit is different. This seems counter-intuitive, the flow is the same. hop_limit may vary because of various bugs or hacks but that doesn't mean it's okay for GRO to reorder packets. Practical impact of this problem on overall TCP performance is unclear, but TCP itself detects this reordering and bumps TCPSACKReorder resulting in user complaints. Eric warns that there may be performance regressions in setups which do packet spraying across links with similar RTT but different hop count. To be safe let's target -next and not treat this as a fix. If the packet spraying is using flow label there should be no difference in behavior as flow label is checked first. Note that the code plays an easy to miss trick by upcasting next_hdr to a u16 pointer and compares next_hdr and hop_limit in one go. Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6/ip6_offload.c')
-rw-r--r--net/ipv6/ip6_offload.c5
1 files changed, 3 insertions, 2 deletions
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index b29e9ba5e113..d37a79a8554e 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -249,7 +249,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head,
if ((first_word & htonl(0xF00FFFFF)) ||
!ipv6_addr_equal(&iph->saddr, &iph2->saddr) ||
!ipv6_addr_equal(&iph->daddr, &iph2->daddr) ||
- *(u16 *)&iph->nexthdr != *(u16 *)&iph2->nexthdr) {
+ iph->nexthdr != iph2->nexthdr) {
not_same_flow:
NAPI_GRO_CB(p)->same_flow = 0;
continue;
@@ -260,7 +260,8 @@ not_same_flow:
goto not_same_flow;
}
/* flush if Traffic Class fields are different */
- NAPI_GRO_CB(p)->flush |= !!(first_word & htonl(0x0FF00000));
+ NAPI_GRO_CB(p)->flush |= !!((first_word & htonl(0x0FF00000)) |
+ (__force __be32)(iph->hop_limit ^ iph2->hop_limit));
NAPI_GRO_CB(p)->flush |= flush;
/* If the previous IP ID value was based on an atomic