summaryrefslogtreecommitdiffstats
path: root/net/ipv4/udp.c
diff options
context:
space:
mode:
authorWillem de Bruijn <willemb@google.com>2018-04-26 19:42:17 +0200
committerDavid S. Miller <davem@davemloft.net>2018-04-26 21:08:04 +0200
commitbec1f6f697362c5bc635dacd7ac8499d0a10a4e7 (patch)
tree90f8dcb39e9c7b62034c8010e054cc237940339a /net/ipv4/udp.c
parentudp: add udp gso (diff)
downloadlinux-bec1f6f697362c5bc635dacd7ac8499d0a10a4e7.tar.xz
linux-bec1f6f697362c5bc635dacd7ac8499d0a10a4e7.zip
udp: generate gso with UDP_SEGMENT
Support generic segmentation offload for udp datagrams. Callers can concatenate and send at once the payload of multiple datagrams with the same destination. To set segment size, the caller sets socket option UDP_SEGMENT to the length of each discrete payload. This value must be smaller than or equal to the relevant MTU. A follow-up patch adds cmsg UDP_SEGMENT to specify segment size on a per send call basis. Total byte length may then exceed MTU. If not an exact multiple of segment size, the last segment will be shorter. The implementation adds a gso_size field to the udp socket, ip(v6) cmsg cookie and inet_cork structure to be able to set the value at setsockopt or cmsg time and to work with both lockless and corked paths. Initial benchmark numbers show UDP GSO about as expensive as TCP GSO. tcp tso 3197 MB/s 54232 msg/s 54232 calls/s 6,457,754,262 cycles tcp gso 1765 MB/s 29939 msg/s 29939 calls/s 11,203,021,806 cycles tcp without tso/gso * 739 MB/s 12548 msg/s 12548 calls/s 11,205,483,630 cycles udp 876 MB/s 14873 msg/s 624666 calls/s 11,205,777,429 cycles udp gso 2139 MB/s 36282 msg/s 36282 calls/s 11,204,374,561 cycles [*] after reverting commit 0a6b2a1dc2a2 ("tcp: switch to GSO being always on") Measured total system cycles ('-a') for one core while pinning both the network receive path and benchmark process to that core: perf stat -a -C 12 -e cycles \ ./udpgso_bench_tx -C 12 -4 -D "$DST" -l 4 Note the reduction in calls/s with GSO. Bytes per syscall drops increases from 1470 to 61818. Signed-off-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/udp.c')
-rw-r--r--net/ipv4/udp.c33
1 files changed, 30 insertions, 3 deletions
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6b9d8017b319..bda022c5480b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -757,7 +757,8 @@ void udp_set_csum(bool nocheck, struct sk_buff *skb,
}
EXPORT_SYMBOL(udp_set_csum);
-static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
+static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4,
+ struct inet_cork *cork)
{
struct sock *sk = skb->sk;
struct inet_sock *inet = inet_sk(sk);
@@ -777,6 +778,21 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
uh->len = htons(len);
uh->check = 0;
+ if (cork->gso_size) {
+ const int hlen = skb_network_header_len(skb) +
+ sizeof(struct udphdr);
+
+ if (hlen + cork->gso_size > cork->fragsize)
+ return -EINVAL;
+ if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS)
+ return -EINVAL;
+ if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite)
+ return -EIO;
+
+ skb_shinfo(skb)->gso_size = cork->gso_size;
+ skb_shinfo(skb)->gso_type = SKB_GSO_UDP_L4;
+ }
+
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
@@ -828,7 +844,7 @@ int udp_push_pending_frames(struct sock *sk)
if (!skb)
goto out;
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &inet->cork.base);
out:
up->len = 0;
@@ -922,6 +938,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
ipc.oif = sk->sk_bound_dev_if;
+ ipc.gso_size = up->gso_size;
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
@@ -1037,7 +1054,7 @@ back_from_confirm:
&cork, msg->msg_flags);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
- err = udp_send_skb(skb, fl4);
+ err = udp_send_skb(skb, fl4, &cork);
goto out;
}
@@ -2367,6 +2384,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->no_check6_rx = valbool;
break;
+ case UDP_SEGMENT:
+ if (val < 0 || val > USHRT_MAX)
+ return -EINVAL;
+ up->gso_size = val;
+ break;
+
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
@@ -2457,6 +2480,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
val = up->no_check6_rx;
break;
+ case UDP_SEGMENT:
+ val = up->gso_size;
+ break;
+
/* The following two cannot be changed on UDP sockets, the return is
* always 0 (which corresponds to the full checksum coverage of UDP). */
case UDPLITE_SEND_CSCOV: