diff options
author | Daniel Borkmann <daniel@iogearbox.net> | 2019-07-03 16:52:03 +0200 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2019-07-03 16:52:03 +0200 |
commit | e5a3e259ef239f443951d401db10db7d426c9497 (patch) | |
tree | 6ef3c235c14a2ed5352d166637965af44cd8e103 /samples | |
parent | selftests: bpf: standardize to static __always_inline (diff) | |
parent | samples/bpf: fix tcp_bpf.readme detach command (diff) | |
download | linux-e5a3e259ef239f443951d401db10db7d426c9497.tar.xz linux-e5a3e259ef239f443951d401db10db7d426c9497.zip |
Merge branch 'bpf-tcp-rtt-hook'
Stanislav Fomichev says:
====================
Congestion control team would like to have a periodic callback to
track some TCP statistics. Let's add a sock_ops callback that can be
selectively enabled on a socket by socket basis and is executed for
every RTT. BPF program frequency can be further controlled by calling
bpf_ktime_get_ns and bailing out early.
I run neper tcp_stream and tcp_rr tests with the sample program
from the last patch and didn't observe any noticeable performance
difference.
v2:
* add a comment about second accept() in selftest (Yonghong Song)
* refer to tcp_bpf.readme in sample program (Yonghong Song)
====================
Suggested-by: Eric Dumazet <edumazet@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Priyaranjan Jha <priyarjha@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Lawrence Brakmo <brakmo@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'samples')
-rw-r--r-- | samples/bpf/Makefile | 1 | ||||
-rw-r--r-- | samples/bpf/tcp_bpf.readme | 2 | ||||
-rw-r--r-- | samples/bpf/tcp_dumpstats_kern.c | 68 |
3 files changed, 70 insertions, 1 deletions
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 35640414ebb3..f90daadfbc89 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -154,6 +154,7 @@ always += tcp_iw_kern.o always += tcp_clamp_kern.o always += tcp_basertt_kern.o always += tcp_tos_reflect_kern.o +always += tcp_dumpstats_kern.o always += xdp_redirect_kern.o always += xdp_redirect_map_kern.o always += xdp_redirect_cpu_kern.o diff --git a/samples/bpf/tcp_bpf.readme b/samples/bpf/tcp_bpf.readme index fee746621aec..78e247f62108 100644 --- a/samples/bpf/tcp_bpf.readme +++ b/samples/bpf/tcp_bpf.readme @@ -25,4 +25,4 @@ attached to the cgroupv2). To remove (unattach) a socket_ops BPF program from a cgroupv2: - bpftool cgroup attach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog + bpftool cgroup detach /tmp/cgroupv2/foo sock_ops pinned /sys/fs/bpf/tcp_prog diff --git a/samples/bpf/tcp_dumpstats_kern.c b/samples/bpf/tcp_dumpstats_kern.c new file mode 100644 index 000000000000..8557913106a0 --- /dev/null +++ b/samples/bpf/tcp_dumpstats_kern.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Refer to samples/bpf/tcp_bpf.readme for the instructions on + * how to run this sample program. + */ +#include <linux/bpf.h> + +#include "bpf_helpers.h" +#include "bpf_endian.h" + +#define INTERVAL 1000000000ULL + +int _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; + +struct { + __u32 type; + __u32 map_flags; + int *key; + __u64 *value; +} bpf_next_dump SEC(".maps") = { + .type = BPF_MAP_TYPE_SK_STORAGE, + .map_flags = BPF_F_NO_PREALLOC, +}; + +SEC("sockops") +int _sockops(struct bpf_sock_ops *ctx) +{ + struct bpf_tcp_sock *tcp_sk; + struct bpf_sock *sk; + __u64 *next_dump; + __u64 now; + + switch (ctx->op) { + case BPF_SOCK_OPS_TCP_CONNECT_CB: + bpf_sock_ops_cb_flags_set(ctx, BPF_SOCK_OPS_RTT_CB_FLAG); + return 1; + case BPF_SOCK_OPS_RTT_CB: + break; + default: + return 1; + } + + sk = ctx->sk; + if (!sk) + return 1; + + next_dump = bpf_sk_storage_get(&bpf_next_dump, sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!next_dump) + return 1; + + now = bpf_ktime_get_ns(); + if (now < *next_dump) + return 1; + + tcp_sk = bpf_tcp_sock(sk); + if (!tcp_sk) + return 1; + + *next_dump = now + INTERVAL; + + bpf_printk("dsack_dups=%u delivered=%u\n", + tcp_sk->dsack_dups, tcp_sk->delivered); + bpf_printk("delivered_ce=%u icsk_retransmits=%u\n", + tcp_sk->delivered_ce, tcp_sk->icsk_retransmits); + + return 1; +} |