summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/asm/cputype.h10
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h803
-rw-r--r--tools/arch/x86/include/asm/msr-index.h11
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h49
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h1
-rw-r--r--tools/build/feature/Makefile53
-rw-r--r--tools/crypto/ccp/dbc.c1
-rw-r--r--tools/hv/Makefile2
-rwxr-xr-x[-rw-r--r--]tools/hv/lsvmbus2
-rw-r--r--tools/include/uapi/README73
-rw-r--r--tools/include/uapi/asm-generic/unistd.h2
-rw-r--r--tools/include/uapi/drm/i915_drm.h27
-rw-r--r--tools/include/uapi/linux/bpf.h3
-rw-r--r--tools/include/uapi/linux/in.h2
-rw-r--r--tools/include/uapi/linux/kvm.h17
-rw-r--r--tools/include/uapi/linux/netdev.h13
-rw-r--r--tools/include/uapi/linux/perf_event.h6
-rw-r--r--tools/include/uapi/linux/stat.h12
-rw-r--r--tools/net/ynl/lib/.gitignore1
-rw-r--r--tools/net/ynl/lib/ynl.c4
-rw-r--r--tools/net/ynl/lib/ynl.py7
-rw-r--r--tools/net/ynl/samples/netdev.c6
-rwxr-xr-xtools/net/ynl/ynl-gen-c.py6
-rw-r--r--tools/perf/Documentation/Build.txt28
-rw-r--r--tools/perf/Makefile.config20
-rw-r--r--tools/perf/Makefile.perf27
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl6
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl2
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl8
-rw-r--r--tools/perf/builtin-daemon.c17
-rw-r--r--tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json2
-rw-r--r--tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json2
-rw-r--r--tools/perf/tests/pmu.c4
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h5
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/fs.h163
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/mount.h10
-rw-r--r--tools/perf/trace/beauty/include/uapi/linux/stat.h12
-rw-r--r--tools/perf/trace/beauty/include/uapi/sound/asound.h9
-rw-r--r--tools/perf/util/bpf_lock_contention.c3
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/python.c1
-rw-r--r--tools/power/cpupower/bindings/python/.gitignore8
-rw-r--r--tools/power/cpupower/bindings/python/Makefile33
-rw-r--r--tools/power/cpupower/bindings/python/README59
-rw-r--r--tools/power/cpupower/bindings/python/raw_pylibcpupower.i247
-rwxr-xr-xtools/power/cpupower/bindings/python/test_raw_pylibcpupower.py42
-rw-r--r--tools/power/cpupower/lib/cpuidle.c8
-rw-r--r--tools/power/cpupower/lib/cpuidle.h2
-rw-r--r--tools/power/cpupower/lib/powercap.c8
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c4
-rw-r--r--tools/power/pm-graph/.gitignore3
-rw-r--r--tools/power/pm-graph/Makefile111
-rw-r--r--tools/spi/spidev_fdx.c2
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/mock.c12
-rw-r--r--tools/testing/selftests/Makefile4
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c14
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c4
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile2
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.c56
-rw-r--r--tools/testing/selftests/arm64/signal/sve_helpers.h21
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c46
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c30
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c32
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c27
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h28
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_no_regs.c32
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c36
-rw-r--r--tools/testing/selftests/bpf/Makefile2
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h149
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c117
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c4
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c25
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c23
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c54
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c24
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c43
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h1
-rw-r--r--tools/testing/selftests/core/close_range_test.c74
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c4
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
-rw-r--r--tools/testing/selftests/drivers/net/config4
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py117
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh234
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py33
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c26
-rw-r--r--tools/testing/selftests/hid/progs/hid.c2
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c6
-rw-r--r--tools/testing/selftests/kselftest/ksft.py2
-rw-r--r--tools/testing/selftests/kselftest/runner.sh7
-rw-r--r--tools/testing/selftests/kvm/Makefile2
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c1062
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c18
-rw-r--r--tools/testing/selftests/kvm/aarch64/no-vgic-v3.c175
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c1
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c11
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/arch_timer.h18
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h3
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c6
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c28
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh7
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c3
-rw-r--r--tools/testing/selftests/mm/Makefile6
-rw-r--r--tools/testing/selftests/mm/compaction_test.c5
-rw-r--r--tools/testing/selftests/mm/mremap_test.c2
-rw-r--r--tools/testing/selftests/mm/mseal_test.c37
-rw-r--r--tools/testing/selftests/mm/pkey-arm64.h139
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h8
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h3
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h4
-rw-r--r--tools/testing/selftests/mm/protection_keys.c109
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh3
-rw-r--r--tools/testing/selftests/mm/seal_elf.c13
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile15
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c25
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh9
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh55
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh304
-rw-r--r--tools/testing/selftests/net/forwarding/README2
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh54
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh8
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh64
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh435
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh40
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh13
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh58
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh46
-rw-r--r--tools/testing/selftests/net/lib.sh16
-rw-r--r--tools/testing/selftests/net/lib/csum.c16
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py60
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c8
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh17
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh697
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh1
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c10
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh1
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh1
-rw-r--r--tools/testing/selftests/net/ncdevmem.c570
-rwxr-xr-xtools/testing/selftests/net/netdevice.sh60
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile1
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh78
-rw-r--r--tools/testing/selftests/net/netfilter/config2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh129
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config11
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh63
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh41
-rwxr-xr-xtools/testing/selftests/net/packetdrill/set_sysctls.py38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt51
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt51
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt28
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt55
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt61
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt63
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt56
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt118
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt57
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh10
-rw-r--r--tools/testing/selftests/net/psock_fanout.c6
-rw-r--r--tools/testing/selftests/net/rds/Makefile12
-rw-r--r--tools/testing/selftests/net/rds/README.txt41
-rwxr-xr-xtools/testing/selftests/net/rds/config.sh53
-rwxr-xr-xtools/testing/selftests/net/rds/run.sh224
-rw-r--r--tools/testing/selftests/net/rds/test.py262
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c18
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c202
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile3
-rw-r--r--tools/testing/selftests/net/tcp_ao/bench-lookups.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config1
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c25
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c6
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c18
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h180
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c559
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace.c543
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/kconfig.c31
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c17
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c1
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/utils.c26
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c30
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c19
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c28
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c6
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c35
-rw-r--r--tools/testing/selftests/net/txtimestamp.c6
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh53
-rw-r--r--tools/testing/selftests/net/udpgso.c25
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh9
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh3
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy_add_speed.sh83
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c2
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h67
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c2
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py1
-rw-r--r--tools/testing/selftests/timers/posix_timers.c550
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/srso.c70
-rw-r--r--tools/testing/vsock/util.c6
-rw-r--r--tools/testing/vsock/util.h3
-rw-r--r--tools/testing/vsock/vsock_test.c85
-rw-r--r--tools/tracing/rtla/src/osnoise_top.c11
251 files changed, 11145 insertions, 1725 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h
index 7b32b99023a2..5fd7caea4419 100644
--- a/tools/arch/arm64/include/asm/cputype.h
+++ b/tools/arch/arm64/include/asm/cputype.h
@@ -86,9 +86,14 @@
#define ARM_CPU_PART_CORTEX_X2 0xD48
#define ARM_CPU_PART_NEOVERSE_N2 0xD49
#define ARM_CPU_PART_CORTEX_A78C 0xD4B
+#define ARM_CPU_PART_CORTEX_X1C 0xD4C
+#define ARM_CPU_PART_CORTEX_X3 0xD4E
#define ARM_CPU_PART_NEOVERSE_V2 0xD4F
+#define ARM_CPU_PART_CORTEX_A720 0xD81
#define ARM_CPU_PART_CORTEX_X4 0xD82
#define ARM_CPU_PART_NEOVERSE_V3 0xD84
+#define ARM_CPU_PART_CORTEX_X925 0xD85
+#define ARM_CPU_PART_CORTEX_A725 0xD87
#define APM_CPU_PART_XGENE 0x000
#define APM_CPU_VAR_POTENZA 0x00
@@ -162,9 +167,14 @@
#define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2)
#define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2)
#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C)
+#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3)
#define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2)
+#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720)
#define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4)
#define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3)
+#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925)
+#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725)
#define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
#define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
#define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 1691297a766a..eaeda001784e 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
+#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6)
+#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7)
+#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8)
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 3c7434329661..dd4682857c12 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -18,170 +18,170 @@
/*
* Note: If the comment begins with a quoted string, that string is used
- * in /proc/cpuinfo instead of the macro name. If the string is "",
- * this feature bit is not displayed in /proc/cpuinfo at all.
+ * in /proc/cpuinfo instead of the macro name. Otherwise, this feature
+ * bit is not displayed in /proc/cpuinfo at all.
*
* When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c as well.
*/
/* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
-#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
-#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
-#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
-#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */
-#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */
-#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */
-#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */
-#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */
-#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */
-#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */
-#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */
-#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
-#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
-#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
-#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
-#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
-#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
-#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
-#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */
+#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */
+#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */
+#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */
+#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */
+#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */
+#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */
+#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */
+#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */
+#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */
+#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */
+#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */
+#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */
+#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */
+#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */
+#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
+#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */
+#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */
+#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */
+#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */
#define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */
-#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */
-#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */
-#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
+#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */
+#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */
+#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */
#define X86_FEATURE_XMM ( 0*32+25) /* "sse" */
#define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */
#define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */
-#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */
+#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */
#define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */
-#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */
-#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */
+#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */
+#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */
-#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
-#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
-#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
-#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
-#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
+#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */
+#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */
+#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */
+#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */
+#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
-#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
-#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
-#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
-#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
+#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */
+#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */
+#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */
+#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
-#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
-#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */
-#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */
+#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */
+#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */
+#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */
/* Other features, Linux-defined mapping, word 3 */
/* This range is used for feature bits which conflict or are synthesized */
-#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */
-#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
-#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
-#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
-#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */
-#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
-#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
-#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
-#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
-#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
-#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
-#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
-#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
-#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
-#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
-#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
-#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
-#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
-#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
-#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
-#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
-#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
-#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
-#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
-#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
-#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
-#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
-#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */
-#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
-#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
+#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */
+#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */
+#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */
+#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
+#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
+#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
+#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */
+#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */
+#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
+#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */
+#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */
+#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */
+#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */
+#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */
+#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */
+#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */
+#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */
+#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */
+#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */
+#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */
+#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */
+#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */
+#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */
+#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */
+#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */
+#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */
+#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */
+#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */
+#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */
+#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */
+#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
-#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
-#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
+#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */
+#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
-#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
-#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
-#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
-#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
-#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
-#define X86_FEATURE_CID ( 4*32+10) /* Context ID */
-#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
-#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
-#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
-#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
-#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
-#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
-#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
+#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */
+#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */
+#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */
+#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */
+#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */
+#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */
+#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */
+#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */
+#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */
+#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */
+#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */
+#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */
+#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
-#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
-#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
-#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
-#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
-#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
-#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
-#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
-#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
-#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
-#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
-#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
+#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */
+#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */
+#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */
+#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */
+#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */
+#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */
+#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */
+#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */
+#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */
+#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */
+#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */
#define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */
#define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
#define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */
-#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */
-#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */
-#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */
-#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */
-#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
-#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
+#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */
+#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */
+#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */
+#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */
+#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */
+#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
-#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
-#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
-#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
-#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
-#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
-#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
-#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */
-#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */
-#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */
-#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */
-#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */
-#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */
-#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */
-#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
-#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
-#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
-#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
-#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
-#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
-#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
-#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
-#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
-#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
-#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
-#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
-#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
+#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */
+#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */
+#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */
+#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */
+#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */
+#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */
+#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */
+#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */
+#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */
+#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */
+#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */
+#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */
+#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */
+#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */
+#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */
+#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */
+#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */
+#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */
+#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */
+#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */
+#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */
+#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */
+#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */
+#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */
+#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */
+#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */
/*
* Auxiliary flags: Linux defined - For features scattered in various
@@ -189,93 +189,93 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
-#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
-#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
-#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
-#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
-#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */
-#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */
-#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */
-#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
-#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
-#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
-#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
-#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
-#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
-#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
-#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
-#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
-#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
-#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
-#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
-#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */
-#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */
-#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
-#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
-#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
+#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */
+#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */
+#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */
+#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */
+#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */
+#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */
+#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */
+#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */
+#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */
+#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */
+#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */
+#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */
+#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */
+#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */
+#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */
+#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */
+#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */
+#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */
+#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */
/* Virtualization flags: Linux defined, word 8 */
-#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
-#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */
-#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */
-#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */
+#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */
+#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */
+#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */
+#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */
-#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
-#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
-#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
-#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
-#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
-#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
-#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
+#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */
+#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */
+#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */
+#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */
+#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
-#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
-#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
-#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */
-#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
-#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
-#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
-#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */
-#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
-#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
-#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
-#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
-#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
-#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
-#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */
-#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
-#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
-#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
-#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
-#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
-#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
-#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
-#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
-#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
-#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */
-#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */
-#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */
-#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */
-#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */
-#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */
-#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
-#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
+#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
+#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */
+#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */
+#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */
+#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */
+#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */
+#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */
+#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */
+#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */
+#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */
+#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */
+#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */
+#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */
+#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */
+#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */
+#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */
+#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */
+#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */
+#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */
+#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */
+#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */
+#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */
+#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */
+#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */
+#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */
+#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */
+#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */
+#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */
+#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */
+#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */
+#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
-#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
-#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
-#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
-#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
-#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */
+#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */
+#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */
+#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */
+#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */
+#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -283,181 +283,183 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */
-#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */
-#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */
-#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
-#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
-#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
-#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
-#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
-#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
-#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
-#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
-#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
-#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
-#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
-#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
-#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
-#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */
-#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */
-#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */
-#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */
-#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */
-#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */
-#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */
-#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */
-#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */
-#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
-#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */
-#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */
-#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */
-#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */
+#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */
+#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */
+#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */
+#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */
+#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */
+#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */
+#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */
+#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */
+#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */
+#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */
+#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */
+#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */
+#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */
+#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */
+#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */
+#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */
+#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */
+#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */
+#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */
+#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */
+#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
-#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
-#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
-#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */
-#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */
-#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */
-#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */
-#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */
-#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */
-#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
-#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */
-#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
-#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
-#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
+#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */
+#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */
+#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */
+#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */
+#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */
+#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */
+#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */
+#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */
+#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */
+#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */
+#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
-#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
-#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
-#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */
-#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */
-#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */
-#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */
-#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
-#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
-#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
-#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
-#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
-#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
-#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
-#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */
-#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
-#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
+#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */
+#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */
+#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */
+#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */
+#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */
+#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */
+#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */
+#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */
+#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */
+#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
-#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
-#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
-#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
-#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */
-#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */
-#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */
-#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */
-#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */
-#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
-#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
-#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */
+#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */
+#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */
+#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */
+#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */
+#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */
+#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */
+#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */
+#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */
+#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */
+#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */
+#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */
+#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
-#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
-#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
+#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */
+#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
#define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */
#define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */
#define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */
-#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */
-#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */
-#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
-#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
-#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
-#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
-#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
-#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */
-#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */
-#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */
-#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */
+#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */
+#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */
+#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */
+#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */
+#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */
+#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */
+#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */
+#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */
+#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */
+#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */
+#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
-#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
-#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */
-#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
-#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */
-#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
-#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */
-#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
-#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
-#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
-#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
-#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
-#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */
-#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
-#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
-#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
-#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */
-#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
-#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
-#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
-#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
-#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */
+#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/
+#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */
+#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */
+#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */
+#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */
+#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */
+#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */
+#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */
+#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */
+#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */
+#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
+#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */
+#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */
+#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */
+#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */
+#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */
+#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */
+#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */
+#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
-#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
-#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
-#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
+#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */
+#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */
+#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
-#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
-#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
-#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */
-#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */
-#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
-#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
-#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */
-#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
-#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
-#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */
-#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
-#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
-#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
-#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */
-#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */
-#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */
-#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */
-#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */
-#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
-#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
-#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
-#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
-#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
+#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */
+#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */
+#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */
+#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */
+#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */
+#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */
+#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */
+#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */
+#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */
+#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */
+#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */
+#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */
+#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */
+#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */
+#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */
+#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */
+#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */
/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
-#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */
-#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
-#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
-#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */
-#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
-#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
-#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */
+#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */
+#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */
+#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */
/* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */
-#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */
-#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
-#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */
-#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */
-#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */
-#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */
+#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */
+#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */
+#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */
+#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */
+#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */
+#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */
-#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */
-#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
-#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */
+#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
+#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */
/*
* Extended auxiliary flags: Linux defined - for features scattered in various
@@ -465,59 +467,60 @@
*
* Reuse free bits when adding new feature flags!
*/
-#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
-#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */
-#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */
-#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */
-#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */
+#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */
+#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */
+#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
+#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
+#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
-#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */
-#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */
-#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */
+#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */
+#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */
+#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */
#define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */
#define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */
-#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */
-#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */
-#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */
-#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */
+#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */
+#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */
+#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */
+#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */
#ifdef CONFIG_X86_32
/*
* 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional
* to avoid confusion.
*/
-#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */
+#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */
#endif
-#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */
-#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
-#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
-#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
-#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
-#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
-#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
-#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
-#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */
-#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */
-#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */
-#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */
-#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
-#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
-#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
-#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */
-#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */
-#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */
-#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */
-#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */
-#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */
+#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */
+#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */
+#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */
+#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */
+#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */
+#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */
+#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */
+#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */
+#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */
+#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */
+#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */
+#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */
+#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
+#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
+#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
+#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
+#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */
+#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */
/* BUG word 2 */
-#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */
-#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */
-#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */
-#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */
+#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */
+#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */
+#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */
+#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e022e6eb766c..82c6a4d350e0 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -566,6 +566,12 @@
#define MSR_RELOAD_PMC0 0x000014c1
#define MSR_RELOAD_FIXED_CTR0 0x00001309
+/* V6 PMON MSR range */
+#define MSR_IA32_PMC_V6_GP0_CTR 0x1900
+#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901
+#define MSR_IA32_PMC_V6_FX0_CTR 0x1980
+#define MSR_IA32_PMC_V6_STEP 4
+
/* KeyID partitioning between MKTME and TDX */
#define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087
@@ -660,6 +666,8 @@
#define MSR_AMD64_RMP_BASE 0xc0010132
#define MSR_AMD64_RMP_END 0xc0010133
+#define MSR_SVSM_CAA 0xc001f000
+
/* AMD Collaborative Processor Performance Control MSRs */
#define MSR_AMD_CPPC_CAP1 0xc00102b0
#define MSR_AMD_CPPC_ENABLE 0xc00102b1
@@ -781,6 +789,8 @@
#define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT)
#define MSR_K7_FID_VID_CTL 0xc0010041
#define MSR_K7_FID_VID_STATUS 0xc0010042
+#define MSR_K7_HWCR_CPB_DIS_BIT 25
+#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT)
/* K6 MSRs */
#define MSR_K6_WHCR 0xc0000082
@@ -1164,6 +1174,7 @@
#define MSR_IA32_QM_CTR 0xc8e
#define MSR_IA32_PQR_ASSOC 0xc8f
#define MSR_IA32_L3_CBM_BASE 0xc90
+#define MSR_RMID_SNC_CONFIG 0xca0
#define MSR_IA32_L2_CBM_BASE 0xd10
#define MSR_IA32_MBA_THRTL_BASE 0xd50
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 9fae1b73b529..bf57a824f722 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,7 @@ struct kvm_ioapic_state {
#define KVM_RUN_X86_SMM (1 << 0)
#define KVM_RUN_X86_BUS_LOCK (1 << 1)
+#define KVM_RUN_X86_GUEST_MODE (1 << 2)
/* for KVM_GET_REGS and KVM_SET_REGS */
struct kvm_regs {
@@ -697,6 +698,11 @@ enum sev_cmd_id {
/* Second time is the charm; improved versions of the above ioctls. */
KVM_SEV_INIT2,
+ /* SNP-specific commands */
+ KVM_SEV_SNP_LAUNCH_START = 100,
+ KVM_SEV_SNP_LAUNCH_UPDATE,
+ KVM_SEV_SNP_LAUNCH_FINISH,
+
KVM_SEV_NR_MAX,
};
@@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data {
__u32 pad2;
};
+struct kvm_sev_snp_launch_start {
+ __u64 policy;
+ __u8 gosvw[16];
+ __u16 flags;
+ __u8 pad0[6];
+ __u64 pad1[4];
+};
+
+/* Kept in sync with firmware values for simplicity. */
+#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1
+#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3
+#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4
+#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5
+#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6
+
+struct kvm_sev_snp_launch_update {
+ __u64 gfn_start;
+ __u64 uaddr;
+ __u64 len;
+ __u8 type;
+ __u8 pad0;
+ __u16 flags;
+ __u32 pad1;
+ __u64 pad2[4];
+};
+
+#define KVM_SEV_SNP_ID_BLOCK_SIZE 96
+#define KVM_SEV_SNP_ID_AUTH_SIZE 4096
+#define KVM_SEV_SNP_FINISH_DATA_SIZE 32
+
+struct kvm_sev_snp_launch_finish {
+ __u64 id_block_uaddr;
+ __u64 id_auth_uaddr;
+ __u8 id_block_en;
+ __u8 auth_key_en;
+ __u8 vcek_disabled;
+ __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE];
+ __u8 pad0[3];
+ __u16 flags;
+ __u64 pad1[4];
+};
+
#define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0)
#define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1)
@@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd {
#define KVM_X86_SW_PROTECTED_VM 1
#define KVM_X86_SEV_VM 2
#define KVM_X86_SEV_ES_VM 3
+#define KVM_X86_SNP_VM 4
#endif /* _ASM_X86_KVM_H */
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 80e1df482337..1814b413fd57 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -115,6 +115,7 @@
#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
#define SVM_VMGEXIT_AP_CREATE 1
#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018
#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe
#define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 489cbed7e82a..12796808f07a 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -82,7 +82,30 @@ FILES= \
FILES := $(addprefix $(OUTPUT),$(FILES))
-PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
+# Some distros provide the command $(CROSS_COMPILE)pkg-config for
+# searching packges installed with Multiarch. Use it for cross
+# compilation if it is existed.
+ifneq (, $(shell which $(CROSS_COMPILE)pkg-config))
+ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
+else
+ PKG_CONFIG ?= pkg-config
+
+ # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR
+ # for modified system root, are required for the cross compilation.
+ # If these PKG_CONFIG environment variables are not set, Multiarch library
+ # paths are used instead.
+ ifdef CROSS_COMPILE
+ ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
+ CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/
+ export PKG_CONFIG_LIBDIR
+ endif
+ endif
+endif
all: $(FILES)
@@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin:
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
-DWARFLIBS += -lelf -lebl -lz -llzma -lbz2
+ DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd
+
+ LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw)
+ LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION)))
+ LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION)))
+
+ # Elfutils merged libebl.a into libdw.a starting from version 0.177,
+ # Link libebl.a only if libdw is older than this version.
+ ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0)
+ DWARFLIBS += -lebl
+ endif
endif
$(OUTPUT)test-dwarf.bin:
@@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin:
$(BUILD) -lnuma
$(OUTPUT)test-libunwind.bin:
- $(BUILD) -lelf
+ $(BUILD) -lelf -llzma
$(OUTPUT)test-libunwind-debug-frame.bin:
- $(BUILD) -lelf
+ $(BUILD) -lelf -llzma
$(OUTPUT)test-libunwind-x86.bin:
- $(BUILD) -lelf -lunwind-x86
+ $(BUILD) -lelf -llzma -lunwind-x86
$(OUTPUT)test-libunwind-x86_64.bin:
- $(BUILD) -lelf -lunwind-x86_64
+ $(BUILD) -lelf -llzma -lunwind-x86_64
$(OUTPUT)test-libunwind-arm.bin:
- $(BUILD) -lelf -lunwind-arm
+ $(BUILD) -lelf -llzma -lunwind-arm
$(OUTPUT)test-libunwind-aarch64.bin:
- $(BUILD) -lelf -lunwind-aarch64
+ $(BUILD) -lelf -llzma -lunwind-aarch64
$(OUTPUT)test-libunwind-debug-frame-arm.bin:
- $(BUILD) -lelf -lunwind-arm
+ $(BUILD) -lelf -llzma -lunwind-arm
$(OUTPUT)test-libunwind-debug-frame-aarch64.bin:
- $(BUILD) -lelf -lunwind-aarch64
+ $(BUILD) -lelf -llzma -lunwind-aarch64
$(OUTPUT)test-libaudit.bin:
$(BUILD) -laudit
diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c
index a807df0f0597..80248d3d3a5a 100644
--- a/tools/crypto/ccp/dbc.c
+++ b/tools/crypto/ccp/dbc.c
@@ -57,7 +57,6 @@ int process_param(int fd, int msg_index, __u8 *signature, int *data)
.msg_index = msg_index,
.param = *data,
};
- int ret;
assert(signature);
assert(data);
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index 2e60e2c212cd..34ffcec264ab 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN)
clean:
rm -f $(ALL_PROGRAMS)
- find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+ find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(sbindir); \
diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus
index 099f2c44dbed..f83698f14da2 100644..100755
--- a/tools/hv/lsvmbus
+++ b/tools/hv/lsvmbus
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
import os
diff --git a/tools/include/uapi/README b/tools/include/uapi/README
new file mode 100644
index 000000000000..7147b1b2cb28
--- /dev/null
+++ b/tools/include/uapi/README
@@ -0,0 +1,73 @@
+Why we want a copy of kernel headers in tools?
+==============================================
+
+There used to be no copies, with tools/ code using kernel headers
+directly. From time to time tools/perf/ broke due to legitimate kernel
+hacking. At some point Linus complained about such direct usage. Then we
+adopted the current model.
+
+The way these headers are used in perf are not restricted to just
+including them to compile something.
+
+There are sometimes used in scripts that convert defines into string
+tables, etc, so some change may break one of these scripts, or new MSRs
+may use some different #define pattern, etc.
+
+E.g.:
+
+ $ ls -1 tools/perf/trace/beauty/*.sh | head -5
+ tools/perf/trace/beauty/arch_errno_names.sh
+ tools/perf/trace/beauty/drm_ioctl.sh
+ tools/perf/trace/beauty/fadvise.sh
+ tools/perf/trace/beauty/fsconfig.sh
+ tools/perf/trace/beauty/fsmount.sh
+ $
+ $ tools/perf/trace/beauty/fadvise.sh
+ static const char *fadvise_advices[] = {
+ [0] = "NORMAL",
+ [1] = "RANDOM",
+ [2] = "SEQUENTIAL",
+ [3] = "WILLNEED",
+ [4] = "DONTNEED",
+ [5] = "NOREUSE",
+ };
+ $
+
+The tools/perf/check-headers.sh script, part of the tools/ build
+process, points out changes in the original files.
+
+So its important not to touch the copies in tools/ when doing changes in
+the original kernel headers, that will be done later, when
+check-headers.sh inform about the change to the perf tools hackers.
+
+Another explanation from Ingo Molnar:
+It's better than all the alternatives we tried so far:
+
+ - Symbolic links and direct #includes: this was the original approach but
+ was pushed back on from the kernel side, when tooling modified the
+ headers and broke them accidentally for kernel builds.
+
+ - Duplicate self-defined ABI headers like glibc: double the maintenance
+ burden, double the chance for mistakes, plus there's no tech-driven
+ notification mechanism to look at new kernel side changes.
+
+What we are doing now is a third option:
+
+ - A software-enforced copy-on-write mechanism of kernel headers to
+ tooling, driven by non-fatal warnings on the tooling side build when
+ kernel headers get modified:
+
+ Warning: Kernel ABI header differences:
+ diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h
+ diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h
+ diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h
+ ...
+
+ The tooling policy is to always pick up the kernel side headers as-is,
+ and integate them into the tooling build. The warnings above serve as a
+ notification to tooling maintainers that there's changes on the kernel
+ side.
+
+We've been using this for many years now, and it might seem hacky, but
+works surprisingly well.
+
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index a00d53d02723..5bf6148cac2b 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64)
#define __NR_ppoll_time64 414
__SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64)
#define __NR_io_pgetevents_time64 416
-__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents)
+__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64)
#define __NR_recvmmsg_time64 417
__SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64)
#define __NR_mq_timedsend_time64 418
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index d4d86e566e07..535cb68fdb5c 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param {
* supports this per context flag.
*/
#define I915_CONTEXT_PARAM_LOW_LATENCY 0xe
+
+/*
+ * I915_CONTEXT_PARAM_CONTEXT_IMAGE:
+ *
+ * Allows userspace to provide own context images.
+ *
+ * Note that this is a debug API not available on production kernel builds.
+ */
+#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf
/* Must be kept compact -- no holes and well documented */
/** @value: Context parameter value to be set or queried */
@@ -2564,6 +2573,24 @@ struct i915_context_param_engines {
struct i915_engine_class_instance engines[N__]; \
} __attribute__((packed)) name__
+struct i915_gem_context_param_context_image {
+ /** @engine: Engine class & instance to be configured. */
+ struct i915_engine_class_instance engine;
+
+ /** @flags: One of the supported flags or zero. */
+ __u32 flags;
+#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0)
+
+ /** @size: Size of the image blob pointed to by @image. */
+ __u32 size;
+
+ /** @mbz: Must be zero. */
+ __u32 mbz;
+
+ /** @image: Userspace memory containing the context image. */
+ __u64 image;
+} __attribute__((packed));
+
/**
* struct drm_i915_gem_context_create_ext_setparam - Context parameter
* to set or query during context creation.
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 35bcf52dbc65..e05b39e39c3f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2851,7 +2851,7 @@ union bpf_attr {
* **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**,
* **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**,
* **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**,
- * **TCP_BPF_RTO_MIN**.
+ * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports the following *optname*\ s:
* **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**.
@@ -7080,6 +7080,7 @@ enum {
TCP_BPF_SYN = 1005, /* Copy the TCP header */
TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */
TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */
+ TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */
};
enum {
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index e682ab628dfa..d358add1611c 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -81,6 +81,8 @@ enum {
#define IPPROTO_ETHERNET IPPROTO_ETHERNET
IPPROTO_RAW = 255, /* Raw IP packets */
#define IPPROTO_RAW IPPROTO_RAW
+ IPPROTO_SMC = 256, /* Shared Memory Communications */
+#define IPPROTO_SMC IPPROTO_SMC
IPPROTO_MPTCP = 262, /* Multipath TCP connection */
#define IPPROTO_MPTCP IPPROTO_MPTCP
IPPROTO_MAX
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index e5af8c692dc0..637efc055145 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -192,11 +192,24 @@ struct kvm_xen_exit {
/* Flags that describe what fields in emulation_failure hold valid data. */
#define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0)
+/*
+ * struct kvm_run can be modified by userspace at any time, so KVM must be
+ * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM()
+ * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when
+ * compiled into the kernel, ensuring that any use within KVM is obvious and
+ * gets extra scrutiny.
+ */
+#ifdef __KERNEL__
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe
+#else
+#define HINT_UNSAFE_IN_KVM(_symbol) _symbol
+#endif
+
/* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
struct kvm_run {
/* in */
__u8 request_interrupt_window;
- __u8 immediate_exit;
+ __u8 HINT_UNSAFE_IN_KVM(immediate_exit);
__u8 padding1[6];
/* out */
@@ -918,6 +931,8 @@ struct kvm_enable_cap {
#define KVM_CAP_GUEST_MEMFD 234
#define KVM_CAP_VM_TYPES 235
#define KVM_CAP_PRE_FAULT_MEMORY 236
+#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237
+#define KVM_CAP_X86_GUEST_MODE 238
struct kvm_irq_routing_irqchip {
__u32 irqchip;
diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h
index 43742ac5b00d..7c308f04e7a0 100644
--- a/tools/include/uapi/linux/netdev.h
+++ b/tools/include/uapi/linux/netdev.h
@@ -93,6 +93,7 @@ enum {
NETDEV_A_PAGE_POOL_INFLIGHT,
NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
NETDEV_A_PAGE_POOL_DETACH_TIME,
+ NETDEV_A_PAGE_POOL_DMABUF,
__NETDEV_A_PAGE_POOL_MAX,
NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1)
@@ -131,6 +132,7 @@ enum {
NETDEV_A_QUEUE_IFINDEX,
NETDEV_A_QUEUE_TYPE,
NETDEV_A_QUEUE_NAPI_ID,
+ NETDEV_A_QUEUE_DMABUF,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@@ -174,6 +176,16 @@ enum {
};
enum {
+ NETDEV_A_DMABUF_IFINDEX = 1,
+ NETDEV_A_DMABUF_QUEUES,
+ NETDEV_A_DMABUF_FD,
+ NETDEV_A_DMABUF_ID,
+
+ __NETDEV_A_DMABUF_MAX,
+ NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1)
+};
+
+enum {
NETDEV_CMD_DEV_GET = 1,
NETDEV_CMD_DEV_ADD_NTF,
NETDEV_CMD_DEV_DEL_NTF,
@@ -186,6 +198,7 @@ enum {
NETDEV_CMD_QUEUE_GET,
NETDEV_CMD_NAPI_GET,
NETDEV_CMD_QSTATS_GET,
+ NETDEV_CMD_BIND_RX,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3a64499b0f5d..4842c36fdf80 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1349,12 +1349,14 @@ union perf_mem_data_src {
#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */
#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */
#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */
-/* 5-0x7 available */
+#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */
+#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */
+/* 0x7 available */
#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */
#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */
#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */
#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */
-#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */
+#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */
#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */
#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */
#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index 67626d535316..887a25286441 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -126,9 +126,15 @@ struct statx {
__u64 stx_mnt_id;
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
- __u64 stx_subvol; /* Subvolume identifier */
/* 0xa0 */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ __u64 stx_subvol; /* Subvolume identifier */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -157,6 +163,7 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index c18dd8d83cee..296c4035dbf2 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1 +1,2 @@
__pycache__/
+*.d
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index fcb18a5a6d70..e16cef160bc2 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -696,14 +696,14 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
addr.nl_family = AF_NETLINK;
if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) {
__perr(yse, "unable to bind to a socket address");
- goto err_close_sock;;
+ goto err_close_sock;
}
memset(&addr, 0, sizeof(addr));
addrlen = sizeof(addr);
if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) {
__perr(yse, "unable to read socket address");
- goto err_close_sock;;
+ goto err_close_sock;
}
ys->portid = addr.nl_pid;
ys->seq = random();
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py
index d42c1d605969..c22c22bf2cb7 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/lib/ynl.py
@@ -388,6 +388,8 @@ class NetlinkProtocol:
def decode(self, ynl, nl_msg, op):
msg = self._decode(nl_msg)
+ if op is None:
+ op = ynl.rsp_by_value[msg.cmd()]
fixed_header_size = ynl._struct_size(op.fixed_header)
msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size)
return msg
@@ -921,8 +923,7 @@ class YnlFamily(SpecFamily):
print("Netlink done while checking for ntf!?")
continue
- op = self.rsp_by_value[nl_msg.cmd()]
- decoded = self.nlproto.decode(self, nl_msg, op)
+ decoded = self.nlproto.decode(self, nl_msg, None)
if decoded.cmd() not in self.async_msg_ids:
print("Unexpected msg id done while checking for ntf", decoded)
continue
@@ -980,7 +981,7 @@ class YnlFamily(SpecFamily):
if nl_msg.extack:
self._decode_extack(req_msg, op, nl_msg.extack)
else:
- op = self.rsp_by_value[nl_msg.cmd()]
+ op = None
req_flags = []
if nl_msg.error:
diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c
index 3e7b29bd55d5..22609d44c89a 100644
--- a/tools/net/ynl/samples/netdev.c
+++ b/tools/net/ynl/samples/netdev.c
@@ -79,7 +79,10 @@ int main(int argc, char **argv)
goto err_close;
printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): ");
- scanf("%d", &ifindex);
+ if (scanf("%d", &ifindex) != 1) {
+ fprintf(stderr, "Error: unable to parse input\n");
+ goto err_destroy;
+ }
if (ifindex > 0) {
struct netdev_dev_get_req *req;
@@ -119,6 +122,7 @@ int main(int argc, char **argv)
err_close:
fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
ynl_sock_destroy(ys);
return 2;
}
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py
index 51529fabd517..717530bc9c52 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/ynl-gen-c.py
@@ -2668,13 +2668,15 @@ def main():
cw.p('#define ' + hdr_prot)
cw.nl()
+ hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+
if args.mode == 'kernel':
cw.p('#include <net/netlink.h>')
cw.p('#include <net/genetlink.h>')
cw.nl()
if not args.header:
if args.out_file:
- cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"')
+ cw.p(f'#include "{hdr_file}"')
cw.nl()
headers = ['uapi/' + parsed.uapi_header]
headers += parsed.kernel_family.get('headers', [])
@@ -2686,7 +2688,7 @@ def main():
if family_contains_bitfield32(parsed):
cw.p('#include <linux/netlink.h>')
else:
- cw.p(f'#include "{parsed.name}-user.h"')
+ cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
headers = [parsed.uapi_header]
for definition in parsed['definitions']:
diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt
index 3766886c4bca..83dc87c662b6 100644
--- a/tools/perf/Documentation/Build.txt
+++ b/tools/perf/Documentation/Build.txt
@@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime.
$ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a
If UBSan detects any problem at runtime, it outputs a “runtime error:” message.
+
+4) Cross compilation
+====================
+As Multiarch is commonly supported in Linux distributions, we can install
+libraries for multiple architectures on the same system and then cross-compile
+Linux perf. For example, Aarch64 libraries and toolchains can be installed on
+an x86_64 machine, allowing us to compile perf for an Aarch64 target.
+
+Below is the command for building the perf with dynamic linking.
+
+ $ cd /path/to/Linux
+ $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf
+
+For static linking, the option `LDFLAGS="-static"` is required.
+
+ $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \
+ LDFLAGS="-static" -C tools/perf
+
+In the embedded system world, a use case is to explicitly specify the package
+configuration paths for cross building:
+
+ $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \
+ PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \
+ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf
+
+In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the
+variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to
+the library paths for cross compilation.
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index c896babf7a74..fa679db61f62 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -152,7 +152,17 @@ ifdef LIBDW_DIR
endif
DWARFLIBS := -ldw
ifeq ($(findstring -static,${LDFLAGS}),-static)
- DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2
+ DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd
+
+ LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw)
+ LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION)))
+ LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION)))
+
+ # Elfutils merged libebl.a into libdw.a starting from version 0.177,
+ # Link libebl.a only if libdw is older than this version.
+ ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0)
+ DWARFLIBS += -lebl
+ endif
endif
FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS)
@@ -296,6 +306,11 @@ endif
ifdef PYTHON_CONFIG
PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
+ # Update the python flags for cross compilation
+ ifdef CROSS_COMPILE
+ PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/')
+ PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS))
+ endif
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -897,6 +912,9 @@ else
PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no")
ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes)
PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])')
+ ifdef CROSS_COMPILE
+ PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX))
+ endif
LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX)
else
$(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent)
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 175e4c7898f0..f8148db5fc38 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -193,7 +193,32 @@ HOSTLD ?= ld
HOSTAR ?= ar
CLANG ?= clang
-PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+# Some distros provide the command $(CROSS_COMPILE)pkg-config for
+# searching packges installed with Multiarch. Use it for cross
+# compilation if it is existed.
+ifneq (, $(shell which $(CROSS_COMPILE)pkg-config))
+ PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
+else
+ PKG_CONFIG ?= pkg-config
+
+ # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR
+ # for modified system root, is required for the cross compilation.
+ # If these PKG_CONFIG environment variables are not set, Multiarch library
+ # paths are used instead.
+ ifdef CROSS_COMPILE
+ ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),)
+ CROSS_ARCH = $(shell $(CC) -dumpmachine)
+ PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/
+ PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/
+ export PKG_CONFIG_LIBDIR
+ $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,)
+ $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.)
+ endif
+ endif
+endif
RM = rm -f
LN = ln -f
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index 3656f1ca7a21..ebae8415dfbb 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -230,8 +230,10 @@
178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend
179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64
179 64 pread64 sys_pread64
+179 spu pread64 sys_pread64
180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64
180 64 pwrite64 sys_pwrite64
+180 spu pwrite64 sys_pwrite64
181 common chown sys_chown
182 common getcwd sys_getcwd
183 common capget sys_capget
@@ -246,6 +248,7 @@
190 common ugetrlimit sys_getrlimit compat_sys_getrlimit
191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead
191 64 readahead sys_readahead
+191 spu readahead sys_readahead
192 32 mmap2 sys_mmap2 compat_sys_mmap2
193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64
194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64
@@ -293,6 +296,7 @@
232 nospu set_tid_address sys_set_tid_address
233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64
233 64 fadvise64 sys_fadvise64
+233 spu fadvise64 sys_fadvise64
234 nospu exit_group sys_exit_group
235 nospu lookup_dcookie sys_ni_syscall
236 common epoll_create sys_epoll_create
@@ -502,7 +506,7 @@
412 32 utimensat_time64 sys_utimensat sys_utimensat
413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64
414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents
+416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend
419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index bd0fee24ad10..01071182763e 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -418,7 +418,7 @@
412 32 utimensat_time64 - sys_utimensat
413 32 pselect6_time64 - compat_sys_pselect6_time64
414 32 ppoll_time64 - compat_sys_ppoll_time64
-416 32 io_pgetevents_time64 - sys_io_pgetevents
+416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64
417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64
418 32 mq_timedsend_time64 - sys_mq_timedsend
419 32 mq_timedreceive_time64 - sys_mq_timedreceive
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index a396f6e6ab5b..7093ee21c0d1 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -1,8 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
#
# 64-bit system call numbers and entry vectors
#
# The format is:
-# <number> <abi> <name> <entry point>
+# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]]
#
# The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls
#
@@ -68,7 +69,7 @@
57 common fork sys_fork
58 common vfork sys_vfork
59 64 execve sys_execve
-60 common exit sys_exit
+60 common exit sys_exit - noreturn
61 common wait4 sys_wait4
62 common kill sys_kill
63 common uname sys_newuname
@@ -239,7 +240,7 @@
228 common clock_gettime sys_clock_gettime
229 common clock_getres sys_clock_getres
230 common clock_nanosleep sys_clock_nanosleep
-231 common exit_group sys_exit_group
+231 common exit_group sys_exit_group - noreturn
232 common epoll_wait sys_epoll_wait
233 common epoll_ctl sys_epoll_ctl
234 common tgkill sys_tgkill
@@ -343,6 +344,7 @@
332 common statx sys_statx
333 common io_pgetevents sys_io_pgetevents
334 common rseq sys_rseq
+335 common uretprobe sys_uretprobe
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
424 common pidfd_send_signal sys_pidfd_send_signal
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index de76bbc50bfb..9a95871afc95 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <internal/lib.h>
+#include <inttypes.h>
#include <subcmd/parse-options.h>
#include <api/fd/array.h>
#include <api/fs/fs.h>
@@ -688,9 +689,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
/* lock */
csv_sep, daemon->base, "lock");
- fprintf(out, "%c%lu",
+ fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - daemon->start) / 60);
+ csv_sep, (uint64_t)((curr - daemon->start) / 60));
fprintf(out, "\n");
} else {
@@ -700,8 +701,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
daemon->base, SESSION_OUTPUT);
fprintf(out, " lock: %s/lock\n",
daemon->base);
- fprintf(out, " up: %lu minutes\n",
- (curr - daemon->start) / 60);
+ fprintf(out, " up: %" PRIu64 " minutes\n",
+ (uint64_t)((curr - daemon->start) / 60));
}
}
@@ -727,9 +728,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
/* session ack */
csv_sep, session->base, SESSION_ACK);
- fprintf(out, "%c%lu",
+ fprintf(out, "%c%" PRIu64,
/* session up time */
- csv_sep, (curr - session->start) / 60);
+ csv_sep, (uint64_t)((curr - session->start) / 60));
fprintf(out, "\n");
} else {
@@ -745,8 +746,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out)
session->base, SESSION_CONTROL);
fprintf(out, " ack: %s/%s\n",
session->base, SESSION_ACK);
- fprintf(out, " up: %lu minutes\n",
- (curr - session->start) / 60);
+ fprintf(out, " up: %" PRIu64 " minutes\n",
+ (uint64_t)((curr - session->start) / 60));
}
}
diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
index a9939823b14b..0c9b9a2d2958 100644
--- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
@@ -74,7 +74,7 @@
{
"PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event",
"ConfigCode": "0x800000000000000c",
- "EventName": "FW_SFENCE_VMA_RECEIVED",
+ "EventName": "FW_SFENCE_VMA_ASID_SENT",
"BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event"
},
{
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
index 9b4a032186a7..7149caec4f80 100644
--- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
+++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json
@@ -36,7 +36,7 @@
"ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
},
{
- "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+ "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT"
},
{
"ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 40132655ccd1..c76f53a90a7b 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __
/**
* Test perf_pmu__match() that's used to search for a PMU given a name passed
* on the command line. The name that's passed may also be a filename type glob
- * match.
+ * match. If the name does not match, perf_pmu__match() attempts to match the
+ * alias of the PMU, if provided.
*/
static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
struct perf_pmu test_pmu;
+ test_pmu.alias_name = NULL;
test_pmu.name = "pmuname";
TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
index 89d16b90370b..df9cdb8bbfb8 100644
--- a/tools/perf/trace/beauty/include/linux/socket.h
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -76,7 +76,7 @@ struct msghdr {
__kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
struct ubuf_info *msg_ubuf;
- int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb,
+ int (*sg_from_iter)(struct sk_buff *skb,
struct iov_iter *from, size_t length);
};
@@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
extern int __sys_socket(int family, int type, int protocol);
extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address,
+ int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
int addrlen);
extern int __sys_listen(int fd, int backlog);
+extern int __sys_listen_socket(struct socket *sock, int backlog);
extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
int __user *usockaddr_len);
extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h
index 45e4e64fd664..753971770733 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/fs.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h
@@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t;
/* per-IO negation of O_APPEND */
#define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020)
+/* Atomic Write */
+#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040)
+
/* mask of flags supported by the kernel */
#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
- RWF_APPEND | RWF_NOAPPEND)
+ RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC)
+
+#define PROCFS_IOCTL_MAGIC 'f'
/* Pagemap ioctl */
-#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg)
+#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
#define PAGE_IS_WPALLOWED (1 << 0)
@@ -393,4 +398,158 @@ struct pm_scan_arg {
__u64 return_mask;
};
+/* /proc/<pid>/maps ioctl */
+#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
+
+enum procmap_query_flags {
+ /*
+ * VMA permission flags.
+ *
+ * Can be used as part of procmap_query.query_flags field to look up
+ * only VMAs satisfying specified subset of permissions. E.g., specifying
+ * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
+ * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
+ * return read/write VMAs, though both executable/non-executable and
+ * private/shared will be ignored.
+ *
+ * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
+ * field to specify actual VMA permissions.
+ */
+ PROCMAP_QUERY_VMA_READABLE = 0x01,
+ PROCMAP_QUERY_VMA_WRITABLE = 0x02,
+ PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
+ PROCMAP_QUERY_VMA_SHARED = 0x08,
+ /*
+ * Query modifier flags.
+ *
+ * By default VMA that covers provided address is returned, or -ENOENT
+ * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
+ * VMA with vma_start > addr will be returned if no covering VMA is
+ * found.
+ *
+ * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
+ * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
+ * to iterate all VMAs with file backing.
+ */
+ PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
+ PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
+};
+
+/*
+ * Input/output argument structured passed into ioctl() call. It can be used
+ * to query a set of VMAs (Virtual Memory Areas) of a process.
+ *
+ * Each field can be one of three kinds, marked in a short comment to the
+ * right of the field:
+ * - "in", input argument, user has to provide this value, kernel doesn't modify it;
+ * - "out", output argument, kernel sets this field with VMA data;
+ * - "in/out", input and output argument; user provides initial value (used
+ * to specify maximum allowable buffer size), and kernel sets it to actual
+ * amount of data written (or zero, if there is no data).
+ *
+ * If matching VMA is found (according to criterias specified by
+ * query_addr/query_flags, all the out fields are filled out, and ioctl()
+ * returns 0. If there is no matching VMA, -ENOENT will be returned.
+ * In case of any other error, negative error code other than -ENOENT is
+ * returned.
+ *
+ * Most of the data is similar to the one returned as text in /proc/<pid>/maps
+ * file, but procmap_query provides more querying flexibility. There are no
+ * consistency guarantees between subsequent ioctl() calls, but data returned
+ * for matched VMA is self-consistent.
+ */
+struct procmap_query {
+ /* Query struct size, for backwards/forward compatibility */
+ __u64 size;
+ /*
+ * Query flags, a combination of enum procmap_query_flags values.
+ * Defines query filtering and behavior, see enum procmap_query_flags.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_flags; /* in */
+ /*
+ * Query address. By default, VMA that covers this address will
+ * be looked up. PROCMAP_QUERY_* flags above modify this default
+ * behavior further.
+ *
+ * Input argument, provided by user. Kernel doesn't modify it.
+ */
+ __u64 query_addr; /* in */
+ /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
+ __u64 vma_start; /* out */
+ __u64 vma_end; /* out */
+ /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
+ __u64 vma_flags; /* out */
+ /* VMA backing page size granularity. */
+ __u64 vma_page_size; /* out */
+ /*
+ * VMA file offset. If VMA has file backing, this specifies offset
+ * within the file that VMA's start address corresponds to.
+ * Is set to zero if VMA has no backing file.
+ */
+ __u64 vma_offset; /* out */
+ /* Backing file's inode number, or zero, if VMA has no backing file. */
+ __u64 inode; /* out */
+ /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
+ __u32 dev_major; /* out */
+ __u32 dev_minor; /* out */
+ /*
+ * If set to non-zero value, signals the request to return VMA name
+ * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
+ * appended, if file was unlinked from FS) for matched VMA. VMA name
+ * can also be some special name (e.g., "[heap]", "[stack]") or could
+ * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
+ *
+ * Kernel will set this field to zero, if VMA has no associated name.
+ * Otherwise kernel will return actual amount of bytes filled in
+ * user-supplied buffer (see vma_name_addr field below), including the
+ * terminating zero.
+ *
+ * If VMA name is longer that user-supplied maximum buffer size,
+ * -E2BIG error is returned.
+ *
+ * If this field is set to non-zero value, vma_name_addr should point
+ * to valid user space memory buffer of at least vma_name_size bytes.
+ * If set to zero, vma_name_addr should be set to zero as well
+ */
+ __u32 vma_name_size; /* in/out */
+ /*
+ * If set to non-zero value, signals the request to extract and return
+ * VMA's backing file's build ID, if the backing file is an ELF file
+ * and it contains embedded build ID.
+ *
+ * Kernel will set this field to zero, if VMA has no backing file,
+ * backing file is not an ELF file, or ELF file has no build ID
+ * embedded.
+ *
+ * Build ID is a binary value (not a string). Kernel will set
+ * build_id_size field to exact number of bytes used for build ID.
+ * If build ID is requested and present, but needs more bytes than
+ * user-supplied maximum buffer size (see build_id_addr field below),
+ * -E2BIG error will be returned.
+ *
+ * If this field is set to non-zero value, build_id_addr should point
+ * to valid user space memory buffer of at least build_id_size bytes.
+ * If set to zero, build_id_addr should be set to zero as well
+ */
+ __u32 build_id_size; /* in/out */
+ /*
+ * User-supplied address of a buffer of at least vma_name_size bytes
+ * for kernel to fill with matched VMA's name (see vma_name_size field
+ * description above for details).
+ *
+ * Should be set to zero if VMA name should not be returned.
+ */
+ __u64 vma_name_addr; /* in */
+ /*
+ * User-supplied address of a buffer of at least build_id_size bytes
+ * for kernel to fill with matched VMA's ELF build ID, if available
+ * (see build_id_size field description above for details).
+ *
+ * Should be set to zero if build ID should not be returned.
+ */
+ __u64 build_id_addr; /* in */
+};
+
#endif /* _UAPI_LINUX_FS_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h
index ad5478dbad00..225bc366ffcb 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/mount.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
*/
struct statmount {
__u32 size; /* Total size, including strings */
- __u32 __spare1;
+ __u32 mnt_opts; /* [str] Mount options of the mount */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
@@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u64 __spare2[49];
char str[]; /* Variable size part containing strings */
};
@@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare;
__u64 mnt_id;
__u64 param;
+ __u64 mnt_ns_id;
};
/* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/*
* @mask bits for statmount(2)
@@ -202,10 +205,13 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
/*
* Special @mnt_id values that can be passed to listmount
*/
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h
index 67626d535316..887a25286441 100644
--- a/tools/perf/trace/beauty/include/uapi/linux/stat.h
+++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h
@@ -126,9 +126,15 @@ struct statx {
__u64 stx_mnt_id;
__u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */
__u32 stx_dio_offset_align; /* File offset alignment for direct I/O */
- __u64 stx_subvol; /* Subvolume identifier */
/* 0xa0 */
- __u64 __spare3[11]; /* Spare space for future expansion */
+ __u64 stx_subvol; /* Subvolume identifier */
+ __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */
+ __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */
+ /* 0xb0 */
+ __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */
+ __u32 __spare1[1];
+ /* 0xb8 */
+ __u64 __spare3[9]; /* Spare space for future expansion */
/* 0x100 */
};
@@ -157,6 +163,7 @@ struct statx {
#define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */
#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */
#define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */
+#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
@@ -192,6 +199,7 @@ struct statx {
#define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */
#define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */
#define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */
+#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */
#endif /* _UAPI_LINUX_STAT_H */
diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h
index 628d46a0da92..8bf7e8a0eb6f 100644
--- a/tools/perf/trace/beauty/include/uapi/sound/asound.h
+++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h
@@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image {
* *
*****************************************************************************/
-#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17)
+#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18)
typedef unsigned long snd_pcm_uframes_t;
typedef signed long snd_pcm_sframes_t;
@@ -334,7 +334,7 @@ union snd_pcm_sync_id {
unsigned char id[16];
unsigned short id16[8];
unsigned int id32[4];
-};
+} __attribute__((deprecated));
struct snd_pcm_info {
unsigned int device; /* RO/WR (control): device number */
@@ -348,7 +348,7 @@ struct snd_pcm_info {
int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */
unsigned int subdevices_count;
unsigned int subdevices_avail;
- union snd_pcm_sync_id sync; /* hardware synchronization ID */
+ unsigned char pad1[16]; /* was: hardware synchronization ID */
unsigned char reserved[64]; /* reserved for future... */
};
@@ -420,7 +420,8 @@ struct snd_pcm_hw_params {
unsigned int rate_num; /* R: rate numerator */
unsigned int rate_den; /* R: rate denominator */
snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */
- unsigned char reserved[64]; /* reserved for future */
+ unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */
+ unsigned char reserved[48]; /* reserved for future */
};
enum {
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b4cb3fe5cc25..bc4e92c0c08b 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con)
goto next;
for (int i = 0; i < total_cpus; i++) {
+ if (cpu_data[i].lock == 0)
+ continue;
+
update_lock_stat(stat_fd, -1, end_ts, aggr_mode,
&cpu_data[i]);
}
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 1730b852a947..6d075648d2cc 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp
int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node,
bool hide_unresolved)
{
- struct machine *machine = maps__machine(node->ms.maps);
+ struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL;
maps__put(al->maps);
al->maps = maps__get(node->ms.maps);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 3be882b2e845..31a223eaf8e6 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -20,6 +20,7 @@
#include "util/env.h"
#include "util/kvm-stat.h"
#include "util/kwork.h"
+#include "util/sample.h"
#include "util/lock-contention.h"
#include <internal/lib.h>
#include "../builtin.h"
diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore
new file mode 100644
index 000000000000..5c9a1f0212dd
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/.gitignore
@@ -0,0 +1,8 @@
+__pycache__/
+raw_pylibcpupower_wrap.c
+*.o
+*.so
+*.py
+!test_raw_pylibcpupower.py
+# git keeps ignoring this file, use git add -f raw_libcpupower.i
+!raw_pylibcpupower.i
diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile
new file mode 100644
index 000000000000..dc09c5b66ead
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/Makefile
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Makefile for libcpupower's Python bindings
+#
+# This Makefile expects you have already run the makefile for cpupower to build
+# the .o files in the lib directory for the bindings to be created.
+
+CC := gcc
+HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi)
+HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi)
+
+LIB_DIR := ../../lib
+PY_INCLUDE = $(firstword $(shell python-config --includes))
+OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o)
+
+all: _raw_pylibcpupower.so
+
+_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o
+ $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so
+
+raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c
+ $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE)
+
+raw_pylibcpupower_wrap.c: raw_pylibcpupower.i
+ifeq ($(HAVE_SWIG),0)
+ $(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+else ifeq ($(HAVE_PYCONFIG),0)
+ $(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.")
+endif
+ swig -python raw_pylibcpupower.i
+
+# Will only clean the bindings folder; will not clean the actual cpupower folder
+clean:
+ rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so
diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README
new file mode 100644
index 000000000000..0a4bb2581e8a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/README
@@ -0,0 +1,59 @@
+This folder contains the necessary files to build the Python bindings for
+libcpupower (aside from the libcpupower object files).
+
+
+requirements
+------------
+
+* You need the object files in the libcpupower directory compiled by
+cpupower's makefile.
+* The SWIG program must be installed.
+* The Python's development libraries installed.
+
+Please check that your version of SWIG is compatible with the version of Python
+installed on your machine by checking the SWIG changelog on their website.
+https://swig.org/
+
+Note that while SWIG itself is GPL v3+ licensed; the resulting output,
+the bindings code: is permissively licensed + the license of libcpupower's .o
+files. For these bindings that means GPL v2.
+
+Please see https://swig.org/legal.html and the discussion [1] for more details.
+
+[1]
+https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/
+
+
+build
+-----
+
+Install SWIG and the Python development files provided by your distribution.
+
+Build the object files for libcpupower by running make in the cpupower
+directory.
+
+Return to the directory this README is in to run:
+
+$ make
+
+
+testing
+-------
+
+Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have
+been created.
+
+To run the test script:
+
+$ python test_raw_pylibcpupower.py
+
+
+credits
+-------
+
+Original Bindings Author:
+John B. Wyatt IV
+jwyatt@redhat.com
+sageofredondo@gmail.com
+
+Copyright (C) 2024 Red Hat
diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
new file mode 100644
index 000000000000..96556d87a745
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i
@@ -0,0 +1,247 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+%module raw_pylibcpupower
+%{
+#include "../../lib/cpupower_intern.h"
+#include "../../lib/acpi_cppc.h"
+#include "../../lib/cpufreq.h"
+#include "../../lib/cpuidle.h"
+#include "../../lib/cpupower.h"
+#include "../../lib/powercap.h"
+%}
+
+/*
+ * cpupower_intern.h
+ */
+
+#define PATH_TO_CPU "/sys/devices/system/cpu/"
+#define MAX_LINE_LEN 4096
+#define SYSFS_PATH_MAX 255
+
+int is_valid_path(const char *path);
+
+unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen);
+
+unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen);
+
+/*
+ * acpi_cppc.h
+ */
+
+enum acpi_cppc_value {
+ HIGHEST_PERF,
+ LOWEST_PERF,
+ NOMINAL_PERF,
+ LOWEST_NONLINEAR_PERF,
+ LOWEST_FREQ,
+ NOMINAL_FREQ,
+ REFERENCE_PERF,
+ WRAPAROUND_TIME,
+ MAX_CPPC_VALUE_FILES
+};
+
+unsigned long acpi_cppc_get_data(unsigned int cpu,
+ enum acpi_cppc_value which);
+
+/*
+ * cpufreq.h
+ */
+
+struct cpufreq_policy {
+ unsigned long min;
+ unsigned long max;
+ char *governor;
+};
+
+struct cpufreq_available_governors {
+ char *governor;
+ struct cpufreq_available_governors *next;
+ struct cpufreq_available_governors *first;
+};
+
+struct cpufreq_available_frequencies {
+ unsigned long frequency;
+ struct cpufreq_available_frequencies *next;
+ struct cpufreq_available_frequencies *first;
+};
+
+
+struct cpufreq_affected_cpus {
+ unsigned int cpu;
+ struct cpufreq_affected_cpus *next;
+ struct cpufreq_affected_cpus *first;
+};
+
+struct cpufreq_stats {
+ unsigned long frequency;
+ unsigned long long time_in_state;
+ struct cpufreq_stats *next;
+ struct cpufreq_stats *first;
+};
+
+unsigned long cpufreq_get_freq_kernel(unsigned int cpu);
+
+unsigned long cpufreq_get_freq_hardware(unsigned int cpu);
+
+#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu);
+
+unsigned long cpufreq_get_transition_latency(unsigned int cpu);
+
+int cpufreq_get_hardware_limits(unsigned int cpu,
+ unsigned long *min,
+ unsigned long *max);
+
+char *cpufreq_get_driver(unsigned int cpu);
+
+void cpufreq_put_driver(char *ptr);
+
+struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu);
+
+void cpufreq_put_policy(struct cpufreq_policy *policy);
+
+struct cpufreq_available_governors
+*cpufreq_get_available_governors(unsigned int cpu);
+
+void cpufreq_put_available_governors(
+ struct cpufreq_available_governors *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_available_frequencies(unsigned int cpu);
+
+void cpufreq_put_available_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+struct cpufreq_available_frequencies
+*cpufreq_get_boost_frequencies(unsigned int cpu);
+
+void cpufreq_put_boost_frequencies(
+ struct cpufreq_available_frequencies *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned
+ int cpu);
+
+void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first);
+
+struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu,
+ unsigned long long *total_time);
+
+void cpufreq_put_stats(struct cpufreq_stats *stats);
+
+unsigned long cpufreq_get_transitions(unsigned int cpu);
+
+int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy);
+
+int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq);
+
+int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq);
+
+int cpufreq_modify_policy_governor(unsigned int cpu, char *governor);
+
+int cpufreq_set_frequency(unsigned int cpu,
+ unsigned long target_frequency);
+
+unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu,
+ const char **table,
+ unsigned int index,
+ unsigned int size);
+
+/*
+ * cpuidle.h
+ */
+
+int cpuidle_is_state_disabled(unsigned int cpu,
+ unsigned int idlestate);
+int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
+ unsigned int disable);
+unsigned long cpuidle_state_latency(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long cpuidle_state_usage(unsigned int cpu,
+ unsigned int idlestate);
+unsigned long long cpuidle_state_time(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_name(unsigned int cpu,
+ unsigned int idlestate);
+char *cpuidle_state_desc(unsigned int cpu,
+ unsigned int idlestate);
+unsigned int cpuidle_state_count(unsigned int cpu);
+
+char *cpuidle_get_governor(void);
+
+char *cpuidle_get_driver(void);
+
+/*
+ * cpupower.h
+ */
+
+struct cpupower_topology {
+ /* Amount of CPU cores, packages and threads per core in the system */
+ unsigned int cores;
+ unsigned int pkgs;
+ unsigned int threads; /* per core */
+
+ /* Array gets mallocated with cores entries, holding per core info */
+ struct cpuid_core_info *core_info;
+};
+
+struct cpuid_core_info {
+ int pkg;
+ int core;
+ int cpu;
+
+ /* flags */
+ unsigned int is_online:1;
+};
+
+int get_cpu_topology(struct cpupower_topology *cpu_top);
+
+void cpu_topology_release(struct cpupower_topology cpu_top);
+
+int cpupower_is_cpu_online(unsigned int cpu);
+
+/*
+ * powercap.h
+ */
+
+struct powercap_zone {
+ char name[MAX_LINE_LEN];
+ /*
+ * sys_name relative to PATH_TO_POWERCAP,
+ * do not forget the / in between
+ */
+ char sys_name[SYSFS_PATH_MAX];
+ int tree_depth;
+ struct powercap_zone *parent;
+ struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES];
+ /* More possible caps or attributes to be added? */
+ uint32_t has_power_uw:1,
+ has_energy_uj:1;
+
+};
+
+int powercap_walk_zones(struct powercap_zone *zone,
+ int (*f)(struct powercap_zone *zone));
+
+struct powercap_zone *powercap_init_zones(void);
+
+int powercap_get_enabled(int *mode);
+
+int powercap_set_enabled(int mode);
+
+int powercap_get_driver(char *driver, int buflen);
+
+int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val);
+
+int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode);
+
+int powercap_zone_set_enabled(struct powercap_zone *zone, int mode);
diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
new file mode 100755
index 000000000000..3d6f62b9556a
--- /dev/null
+++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-only
+
+import raw_pylibcpupower as p
+
+# Simple function call
+
+"""
+Get cstate count
+"""
+cpu_cstates_count = p.cpuidle_state_count(0)
+if cpu_cstates_count > -1:
+ print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+ print(f"cstate count error: return code: {cpu_cstates_count}")
+
+"""
+Disable cstate (will fail if the above is 0, ex: a virtual machine)
+"""
+cstate_disabled = p.cpuidle_state_disable(0, 0, 1)
+if cpu_cstates_count == 0:
+ print(f"CPU 0 has {cpu_cstates_count} c-states")
+else:
+ print(f"cstate count error: return code: {cpu_cstates_count}")
+
+match cstate_disabled:
+ case 0:
+ print(f"CPU state disabled")
+ case -1:
+ print(f"Idlestate not available")
+ case _:
+ print(f"Not documented")
+
+
+# Pointer example
+
+topo = p.cpupower_topology()
+total_cpus = p.get_cpu_topology(topo)
+if total_cpus > 0:
+ print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}")
+else:
+ print(f"Error: could not get cpu topology")
diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c
index 479c5971aa6d..0ecac009273c 100644
--- a/tools/power/cpupower/lib/cpuidle.c
+++ b/tools/power/cpupower/lib/cpuidle.c
@@ -116,6 +116,7 @@ enum idlestate_value {
IDLESTATE_USAGE,
IDLESTATE_POWER,
IDLESTATE_LATENCY,
+ IDLESTATE_RESIDENCY,
IDLESTATE_TIME,
IDLESTATE_DISABLE,
MAX_IDLESTATE_VALUE_FILES
@@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = {
[IDLESTATE_USAGE] = "usage",
[IDLESTATE_POWER] = "power",
[IDLESTATE_LATENCY] = "latency",
+ [IDLESTATE_RESIDENCY] = "residency",
[IDLESTATE_TIME] = "time",
[IDLESTATE_DISABLE] = "disable",
};
@@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu,
return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY);
}
+unsigned long cpuidle_state_residency(unsigned int cpu,
+ unsigned int idlestate)
+{
+ return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY);
+}
+
unsigned long cpuidle_state_usage(unsigned int cpu,
unsigned int idlestate)
{
diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h
index 2e10fead2e1e..2ab404d40259 100644
--- a/tools/power/cpupower/lib/cpuidle.h
+++ b/tools/power/cpupower/lib/cpuidle.h
@@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate,
unsigned int disable);
unsigned long cpuidle_state_latency(unsigned int cpu,
unsigned int idlestate);
+unsigned long cpuidle_state_residency(unsigned int cpu,
+ unsigned int idlestate);
unsigned long cpuidle_state_usage(unsigned int cpu,
unsigned int idlestate);
unsigned long long cpuidle_state_time(unsigned int cpu,
diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c
index a7a59c6bacda..94a0c69e55ef 100644
--- a/tools/power/cpupower/lib/powercap.c
+++ b/tools/power/cpupower/lib/powercap.c
@@ -78,6 +78,14 @@ int powercap_get_enabled(int *mode)
}
/*
+ * TODO: implement function. Returns dummy 0 for now.
+ */
+int powercap_set_enabled(int mode)
+{
+ return 0;
+}
+
+/*
* Hardcoded, because rapl is the only powercap implementation
- * this needs to get more generic if more powercap implementations
* should show up
diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 44126a87fa7a..e0d17f0de3fe 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c
@@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose)
printf(_("Latency: %lu\n"),
cpuidle_state_latency(cpu, idlestate));
+ printf(_("Residency: %lu\n"),
+ cpuidle_state_residency(cpu, idlestate));
printf(_("Usage: %lu\n"),
cpuidle_state_usage(cpu, idlestate));
printf(_("Duration: %llu\n"),
@@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu)
printf(_("promotion[--] demotion[--] "));
printf(_("latency[%03lu] "),
cpuidle_state_latency(cpu, cstate));
+ printf(_("residency[%05lu] "),
+ cpuidle_state_residency(cpu, cstate));
printf(_("usage[%08lu] "),
cpuidle_state_usage(cpu, cstate));
printf(_("duration[%020Lu] \n"),
diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore
new file mode 100644
index 000000000000..37762a8a06d6
--- /dev/null
+++ b/tools/power/pm-graph/.gitignore
@@ -0,0 +1,3 @@
+# sleepgraph.py artifacts
+suspend-[0-9]*-[0-9]*
+suspend-[0-9]*-[0-9]*-x[0-9]*
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index b5310832c19c..aeddbaf2d4c4 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -1,51 +1,86 @@
# SPDX-License-Identifier: GPL-2.0
-PREFIX ?= /usr
-DESTDIR ?=
+#
+# Copyright (c) 2013, Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# Authors:
+# Todd Brandt <todd.e.brandt@linux.intel.com>
+
+# Prefix to the directories we're installing to
+DESTDIR ?=
+
+# Directory definitions. These are default and most probably
+# do not need to be changed. Please note that DESTDIR is
+# added in front of any of them
+
+BINDIR ?= /usr/bin
+MANDIR ?= /usr/share/man
+LIBDIR ?= /usr/lib
+
+# Toolchain: what tools do we use, and what options do they need:
+INSTALL = /usr/bin/install
+INSTALL_DATA = ${INSTALL} -m 644
all:
@echo "Nothing to build"
install : uninstall
- install -d $(DESTDIR)$(PREFIX)/lib/pm-graph
- install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
- install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph
- install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
- install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
-
- install -d $(DESTDIR)$(PREFIX)/bin
- ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
- ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
-
- install -d $(DESTDIR)$(PREFIX)/share/man/man8
- install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
- install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
+ $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph
+ $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+ $(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config
+
+ $(INSTALL) -d $(DESTDIR)$(BINDIR)
+ ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph
+ ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph
+
+ $(INSTALL) -d $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8
+ $(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8
uninstall :
- rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8
- rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8
+ rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8
+ rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8
- rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph
- rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph
+ rm -f $(DESTDIR)$(BINDIR)/bootgraph
+ rm -f $(DESTDIR)$(BINDIR)/sleepgraph
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \
fi;
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \
fi;
- rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*
- if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
- rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
+ rm -f $(DESTDIR)$(LIBDIR)/pm-graph/*
+ if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \
+ rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \
fi;
+
+help:
+ @echo 'Building targets:'
+ @echo ' all - Nothing to build'
+ @echo ' install - Install the program and create necessary directories'
+ @echo ' uninstall - Remove installed files and directories'
+
+.PHONY: all install uninstall help
diff --git a/tools/spi/spidev_fdx.c b/tools/spi/spidev_fdx.c
index 7d2a867cd4ae..bc9c4f6c3ba8 100644
--- a/tools/spi/spidev_fdx.c
+++ b/tools/spi/spidev_fdx.c
@@ -99,7 +99,7 @@ static void dumpstat(const char *name, int fd)
return;
}
- printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n",
+ printf("%s: spi mode 0x%x, %d bits %sper word, %u Hz max\n",
name, mode, bits, lsb ? "(lsb first) " : "", speed);
}
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 030b388800f0..3d1ca9e38b1f 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
ldflags-y += --wrap=cxl_endpoint_parse_cdat
+ldflags-y += --wrap=cxl_setup_parent_dport
DRIVERS := ../../../drivers
CXL_SRC := $(DRIVERS)/cxl
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 6f737941dc0e..d619672faa49 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL);
+void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport)
+{
+ int index;
+ struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
+
+ if (!ops || !ops->is_mock_port(dport->dport_dev))
+ cxl_setup_parent_dport(host, dport);
+
+ put_cxl_mock_ops(index);
+}
+EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL);
+
MODULE_LICENSE("GPL v2");
MODULE_IMPORT_NS(ACPI);
MODULE_IMPORT_NS(CXL);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index bc8fe9e8f7f2..3b7df5477317 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -65,9 +65,11 @@ TARGETS += net/af_unix
TARGETS += net/forwarding
TARGETS += net/hsr
TARGETS += net/mptcp
+TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/packetdrill
+TARGETS += net/rds
TARGETS += net/tcp_ao
-TARGETS += net/netfilter
TARGETS += nsfs
TARGETS += perf_events
TARGETS += pidfd
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index d8909b2b535a..f2d6007a2b98 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -156,6 +156,12 @@ static void pmull_sigill(void)
asm volatile(".inst 0x0ee0e000" : : : );
}
+static void poe_sigill(void)
+{
+ /* mrs x0, POR_EL0 */
+ asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0");
+}
+
static void rng_sigill(void)
{
asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
@@ -602,6 +608,14 @@ static const struct hwcap_data {
.sigill_fn = pmull_sigill,
},
{
+ .name = "POE",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_POE,
+ .cpuinfo = "poe",
+ .sigill_fn = poe_sigill,
+ .sigill_reliable = true,
+ },
+ {
.name = "RNG",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_RNG,
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index e4fa507cbdd0..b51d21f78cf9 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -163,10 +163,10 @@ static void test_hw_debug(pid_t child, int type, const char *type_name)
static int do_child(void)
{
if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
- ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+ ksft_exit_fail_perror("PTRACE_TRACEME");
if (raise(SIGSTOP))
- ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+ ksft_exit_fail_perror("raise(SIGSTOP)");
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 1ce5b5eac386..b2f2bfd5c6aa 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -2,6 +2,7 @@
mangle_*
fake_sigreturn_*
fpmr_*
+poe_*
sme_*
ssve_*
sve_*
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index 8f5febaf1a9a..edb3613513b8 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS)
# Common test-unit targets to build common-layout test-cases executables
# Needs secondary expansion to properly include the testcase c-file in pre-reqs
COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \
- signals.S
+ signals.S sve_helpers.c
COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h
.SECONDEXPANSION:
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c
new file mode 100644
index 000000000000..0acc121af306
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#include <stdbool.h>
+#include <kselftest.h>
+#include <asm/sigcontext.h>
+#include <sys/prctl.h>
+
+unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls)
+{
+ int vq, vl;
+ int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL;
+ int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(pr_set_vl, vq * 16);
+ if (vl == -1)
+ return KSFT_FAIL;
+
+ vl &= len_mask;
+
+ /*
+ * Unlike SVE, SME does not require the minimum vector length
+ * to be implemented, or the VLs to be consecutive, so any call
+ * to the prctl might return the single implemented VL, which
+ * might be larger than 16. So to avoid this loop never
+ * terminating, bail out here when we find a higher VL than
+ * we asked for.
+ * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP.
+ */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ if (nvls < min_vls) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return KSFT_SKIP;
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h
new file mode 100644
index 000000000000..50948ce471cc
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/sve_helpers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2024 ARM Limited
+ *
+ * Common helper functions for SVE and SME functionality.
+ */
+
+#ifndef __SVE_HELPERS_H__
+#define __SVE_HELPERS_H__
+
+#include <stdbool.h>
+
+#define VLS_USE_SVE false
+#define VLS_USE_SME true
+
+extern unsigned int vls[];
+extern unsigned int nvls;
+
+int sve_fill_vls(bool use_sme, int min_vls);
+
+#endif
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
index ebd5815b54bb..dfd6a2badf9f 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -6,44 +6,28 @@
* handler, this is not supported and is expected to segfault.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 2);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
+ if (!res)
+ return true;
- vl &= PR_SME_VL_LEN_MASK;
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least two VLs */
- if (nvls < 2) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
-
- return true;
+ return false;
}
static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
@@ -51,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
{
size_t resv_sz, offset;
struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
- struct sve_context *sve;
+ struct za_context *za;
/* Get a signal context with a SME ZA frame in it */
if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
- head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
if (!head) {
- fprintf(stderr, "No SVE context\n");
+ fprintf(stderr, "No ZA context\n");
return 1;
}
- if (head->size != sizeof(struct sve_context)) {
+ if (head->size != sizeof(struct za_context)) {
fprintf(stderr, "Register data present, aborting\n");
return 1;
}
- sve = (struct sve_context *)head;
+ za = (struct za_context *)head;
/* No changes are supported; init left us at minimum VL so go to max */
fprintf(stderr, "Attempting to change VL from %d to %d\n",
- sve->vl, vls[0]);
- sve->vl = vls[0];
+ za->vl, vls[0]);
+ za->vl = vls[0];
fake_sigreturn(&sf, sizeof(sf), 0);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
index e2a452190511..e1ccf8f85a70 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -12,40 +12,22 @@
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
struct fake_sigframe sf;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sve_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SVE, 2);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
+ if (!res)
+ return true;
- vl &= PR_SVE_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least two VLs */
- if (nvls < 2) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
+ if (res == KSFT_SKIP)
td->result = KSFT_SKIP;
- return false;
- }
- return true;
+ return false;
}
static int fake_sigreturn_sve_change_vl(struct tdescr *td,
diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
new file mode 100644
index 000000000000..36bd9940ee05
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 Arm Limited
+ *
+ * Verify that the POR_EL0 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_POR_EL0 "S3_3_C10_C2_4"
+
+static uint64_t get_por_el0(void)
+{
+ uint64_t val;
+
+ asm volatile(
+ "mrs %0, " SYS_POR_EL0 "\n"
+ : "=r"(val)
+ :
+ : );
+
+ return val;
+}
+
+int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct poe_context *poe_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_poe;
+ __u64 orig_poe;
+
+ have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE;
+ if (have_poe)
+ orig_poe = get_por_el0();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ poe_ctx = (struct poe_context *)
+ get_header(head, POE_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = poe_ctx != NULL;
+
+ fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n",
+ in_sigframe ? "present" : "absent",
+ have_poe ? "with" : "without");
+
+ td->pass = (in_sigframe == have_poe);
+
+ /*
+ * Check that the value we read back was the one present at
+ * the time that the signal was triggered.
+ */
+ if (have_poe && poe_ctx) {
+ if (poe_ctx->por_el0 != orig_poe) {
+ fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n",
+ poe_ctx->por_el0, orig_poe);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "POR_EL0",
+ .descr = "Validate that POR_EL0 is present as expected",
+ .timeout = 3,
+ .run = poe_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
index 3d37daafcff5..6dbe48cf8b09 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -6,51 +6,31 @@
* set up as expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 64];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_ssve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
index 9dc5f128bbc0..5557e116e973 100644
--- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
@@ -6,51 +6,31 @@
* signal frames is set up as expected when enabled simultaneously.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
index 8b16eabbb769..8143eb1c58c1 100644
--- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -6,47 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 64];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sve_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SVE, 1);
- /*
- * Enumerate up to SVE_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SVE_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SVE_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
+ if (!res)
+ return true;
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_sve_regs(void)
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 674b88cc8c39..e6daa94fcd2e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -6,29 +6,6 @@
#include "testcases.h"
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset)
-{
- size_t offs = 0;
- struct _aarch64_ctx *found = NULL;
-
- if (!head || resv_sz < HDR_SZ)
- return found;
-
- while (offs <= resv_sz - HDR_SZ &&
- head->magic != magic && head->magic) {
- offs += head->size;
- head = GET_RESV_NEXT_HEAD(head);
- }
- if (head->magic == magic) {
- found = head;
- if (offset)
- *offset = offs;
- }
-
- return found;
-}
-
bool validate_extra_context(struct extra_context *extra, char **err,
void **extra_data, size_t *extra_size)
{
@@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
if (head->size != sizeof(struct esr_context))
*err = "Bad size for esr_context";
break;
+ case POE_MAGIC:
+ if (head->size != sizeof(struct poe_context))
+ *err = "Bad size for poe_context";
+ break;
case TPIDR2_MAGIC:
if (head->size != sizeof(struct tpidr2_context))
*err = "Bad size for tpidr2_context";
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index 7727126347e0..9872b8912714 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -26,6 +26,9 @@
#define HDR_SZ \
sizeof(struct _aarch64_ctx)
+#define GET_UC_RESV_HEAD(uc) \
+ (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved))
+
#define GET_SF_RESV_HEAD(sf) \
(struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved)
@@ -88,8 +91,29 @@ struct fake_sigframe {
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
-struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
- size_t resv_sz, size_t *offset);
+static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
+ size_t resv_sz, size_t *offset)
+{
+ size_t offs = 0;
+ struct _aarch64_ctx *found = NULL;
+
+ if (!head || resv_sz < HDR_SZ)
+ return found;
+
+ while (offs <= resv_sz - HDR_SZ &&
+ head->magic != magic && head->magic) {
+ offs += head->size;
+ head = GET_RESV_NEXT_HEAD(head);
+ }
+ if (head->magic == magic) {
+ found = head;
+ if (offset)
+ *offset = offs;
+ }
+
+ return found;
+}
+
static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head,
size_t resv_sz,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
index 4d6f94b6178f..ce26e9c2fa5e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -6,47 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SME_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
+ if (!res)
+ return true;
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
index 174ad6656696..b9e13f27f1f9 100644
--- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -6,51 +6,31 @@
* expected.
*/
+#include <kselftest.h>
#include <signal.h>
#include <ucontext.h>
#include <sys/prctl.h>
#include "test_signals_utils.h"
+#include "sve_helpers.h"
#include "testcases.h"
static union {
ucontext_t uc;
char buf[1024 * 128];
} context;
-static unsigned int vls[SVE_VQ_MAX];
-unsigned int nvls = 0;
static bool sme_get_vls(struct tdescr *td)
{
- int vq, vl;
+ int res = sve_fill_vls(VLS_USE_SME, 1);
- /*
- * Enumerate up to SME_VQ_MAX vector lengths
- */
- for (vq = SVE_VQ_MAX; vq > 0; --vq) {
- vl = prctl(PR_SME_SET_VL, vq * 16);
- if (vl == -1)
- return false;
-
- vl &= PR_SME_VL_LEN_MASK;
-
- /* Did we find the lowest supported VL? */
- if (vq < sve_vq_from_vl(vl))
- break;
+ if (!res)
+ return true;
- /* Skip missing VLs */
- vq = sve_vq_from_vl(vl);
-
- vls[nvls++] = vl;
- }
-
- /* We need at least one VL */
- if (nvls < 1) {
- fprintf(stderr, "Only %d VL supported\n", nvls);
- return false;
- }
+ if (res == KSFT_SKIP)
+ td->result = KSFT_SKIP;
- return true;
+ return false;
}
static void setup_za_regs(void)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index dd49c1d23a60..81d4757ecd4c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -713,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
+ $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index 11ee801e75e7..6c3b4d4f173a 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
TP_ARGS(task, ctx)
);
+/* Used in bpf_testmod_test_read() to test __nullable suffix */
+DECLARE_TRACE(bpf_testmod_test_nullable_bare,
+ TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable),
+ TP_ARGS(ctx__nullable)
+);
+
#undef BPF_TESTMOD_DECLARE_TRACE
#ifdef DECLARE_TRACE_WRITABLE
#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index fd28c1157bd3..a32771da4293 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -356,6 +356,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
if (bpf_testmod_loop_test(101) > 100)
trace_bpf_testmod_test_read(current, &ctx);
+ trace_bpf_testmod_test_nullable_bare(NULL);
+
/* Magic number to enable writable tp */
if (len == 64) {
struct bpf_testmod_test_writable_ctx writable = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 00965a6e83bb..61de88cf4ad0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3551,6 +3551,40 @@ static struct btf_raw_test raw_tests[] = {
BTF_STR_SEC("\0x\0?.foo bar:buz"),
},
{
+ .descr = "datasec: name with non-printable first char not is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0\7foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
+ .descr = "datasec: name '\\0' is not ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC \0 */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+{
.descr = "type name '?foo' is not ok",
.raw_types = {
/* union ?foo; */
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 7cfac53c0d58..b614a5272dfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -9,6 +9,7 @@
enum test_setup_type {
SETUP_SYSCALL_SLEEP,
SETUP_SKB_PROG,
+ SETUP_SKB_PROG_TP,
};
static struct {
@@ -28,6 +29,7 @@ static struct {
{"test_dynptr_clone", SETUP_SKB_PROG},
{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
+ {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
};
static void verify_success(const char *prog_name, enum test_setup_type setup_type)
@@ -35,7 +37,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
struct dynptr_success *skel;
struct bpf_program *prog;
struct bpf_link *link;
- int err;
+ int err;
skel = dynptr_success__open();
if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
@@ -47,7 +49,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto cleanup;
- bpf_program__set_autoload(prog, true);
+ bpf_program__set_autoload(prog, true);
err = dynptr_success__load(skel);
if (!ASSERT_OK(err, "dynptr_success__load"))
@@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
break;
}
+ case SETUP_SKB_PROG_TP:
+ {
+ struct __sk_buff skb = {};
+ struct bpf_object *obj;
+ int aux_prog_fd;
+
+ /* Just use its test_run to trigger kfree_skb tracepoint */
+ err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &aux_prog_fd);
+ if (!ASSERT_OK(err, "prog_load sched cls"))
+ goto cleanup;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(aux_prog_fd, &topts);
+ bpf_link__destroy(link);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
}
ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 9e5f38739104..6b3078dd5645 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -378,8 +378,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -407,8 +407,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 80,
.tcp.dest = 8080,
@@ -436,8 +436,8 @@ struct test tests[] = {
.iph_inner.ihl = 5,
.iph_inner.protocol = IPPROTO_TCP,
.iph_inner.tot_len =
- __bpf_constant_htons(MAGIC_BYTES) -
- sizeof(struct iphdr),
+ __bpf_constant_htons(MAGIC_BYTES -
+ sizeof(struct iphdr)),
.tcp.doff = 5,
.tcp.source = 99,
.tcp.dest = 9090,
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index 7d4a9b3d3722..e12255121c15 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -154,6 +154,51 @@ err_out:
close(sfd);
}
+static void test_nonstandard_opt(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ struct bpf_link *getsockopt_link = NULL;
+ int sfd = -1, fd = -1, cfd = -1, flags;
+ socklen_t flagslen = sizeof(flags);
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+
+ fd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(fd, 0, "connect_to_fd_server"))
+ goto err_out;
+
+ /* cgroup/getsockopt prog will intercept getsockopt() below and
+ * retrieve the tcp socket bpf_sock_ops_cb_flags value for the
+ * accept()ed socket; this was set earlier in the passive established
+ * callback for the accept()ed socket via bpf_setsockopt().
+ */
+ getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd);
+ if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog"))
+ goto err_out;
+
+ cfd = accept(sfd, NULL, 0);
+ if (!ASSERT_GE(cfd, 0, "accept"))
+ goto err_out;
+
+ if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen),
+ "getsockopt_flags"))
+ goto err_out;
+ ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG,
+ "cb_flags_set");
+err_out:
+ close(sfd);
+ if (fd != -1)
+ close(fd);
+ if (cfd != -1)
+ close(cfd);
+ bpf_link__destroy(getsockopt_link);
+}
+
void test_setget_sockopt(void)
{
cg_fd = test__join_cgroup(CG_NAME);
@@ -191,6 +236,8 @@ void test_setget_sockopt(void)
test_udp(AF_INET);
test_ktls(AF_INET6);
test_ktls(AF_INET);
+ test_nonstandard_opt(AF_INET);
+ test_nonstandard_opt(AF_INET6);
done:
setget_sockopt__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 1337153eb0ad..82bfb266741c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -451,11 +451,11 @@ out:
#define MAX_EVENTS 10
static void test_sockmap_skb_verdict_shutdown(void)
{
+ int n, err, map, verdict, c1 = -1, p1 = -1;
struct epoll_event ev, events[MAX_EVENTS];
- int n, err, map, verdict, s, c1 = -1, p1 = -1;
struct test_sockmap_pass_prog *skel;
- int epollfd;
int zero = 0;
+ int epollfd;
char b;
skel = test_sockmap_pass_prog__open_and_load();
@@ -469,10 +469,7 @@ static void test_sockmap_skb_verdict_shutdown(void)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (s < 0)
- goto out;
- err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
if (err < 0)
goto out;
@@ -506,8 +503,8 @@ out:
static void test_sockmap_skb_verdict_fionread(bool pass_prog)
{
+ int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int expected, zero = 0, sent, recvd, avail;
- int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
struct test_sockmap_pass_prog *pass = NULL;
struct test_sockmap_drop_prog *drop = NULL;
char buf[256] = "0123456789";
@@ -534,11 +531,8 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- goto out;
- err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
- if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+ err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs()"))
goto out;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
@@ -570,16 +564,12 @@ out:
static void test_sockmap_skb_verdict_peek_helper(int map)
{
- int err, s, c1, p1, zero = 0, sent, recvd, avail;
+ int err, c1, p1, zero = 0, sent, recvd, avail;
char snd[256] = "0123456789";
char rcv[256] = "0";
- s = socket_loopback(AF_INET, SOCK_STREAM);
- if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- return;
-
- err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
- if (!ASSERT_OK(err, "create_pairs(s)"))
+ err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pair()"))
return;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index e880f97bc44d..38e35c72bdaa 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -3,6 +3,9 @@
#include <linux/vm_sockets.h>
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
#define IO_TIMEOUT_SEC 30
#define MAX_STRERR_LEN 256
#define MAX_TEST_NAME 80
@@ -14,6 +17,17 @@
#define __always_unused __attribute__((__unused__))
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue) \
+ ({ \
+ __auto_type __ptr = &(p); \
+ __auto_type __val = *__ptr; \
+ *__ptr = nullvalue; \
+ __val; \
+ })
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
#define _FAIL(errnum, fmt...) \
({ \
error_at_line(0, (errnum), __func__, __LINE__, fmt); \
@@ -179,6 +193,14 @@
__ret; \
})
+static inline void close_fd(int *fd)
+{
+ if (*fd >= 0)
+ xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
static inline int poll_connect(int fd, unsigned int timeout_sec)
{
struct timeval timeout = { .tv_sec = timeout_sec };
@@ -312,54 +334,6 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
}
-static inline int create_pair(int s, int family, int sotype, int *c, int *p)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err = 0;
-
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
- if (err)
- return err;
-
- *c = xsocket(family, sotype, 0);
- if (*c < 0)
- return errno;
- err = xconnect(*c, sockaddr(&addr), len);
- if (err) {
- err = errno;
- goto close_cli0;
- }
-
- *p = xaccept_nonblock(s, NULL, NULL);
- if (*p < 0) {
- err = errno;
- goto close_cli0;
- }
- return err;
-close_cli0:
- close(*c);
- return err;
-}
-
-static inline int create_socket_pairs(int s, int family, int sotype,
- int *c0, int *c1, int *p0, int *p1)
-{
- int err;
-
- err = create_pair(s, family, sotype, c0, p0);
- if (err)
- return err;
-
- err = create_pair(s, family, sotype, c1, p1);
- if (err) {
- close(*c0);
- close(*p0);
- }
- return err;
-}
-
static inline int enable_reuseport(int s, int progfd)
{
int err, one = 1;
@@ -412,5 +386,84 @@ static inline int socket_loopback(int family, int sotype)
return socket_loopback_reuseport(family, sotype, -1);
}
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+ __close_fd int s, c = -1, p = -1;
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return s;
+
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ return c;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (err) {
+ if (errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ return err;
+ }
+
+ err = poll_connect(c, IO_TIMEOUT_SEC);
+ if (err) {
+ FAIL_ERRNO("poll_connect");
+ return err;
+ }
+ }
+
+ switch (sotype & SOCK_TYPE_MASK) {
+ case SOCK_DGRAM:
+ err = xgetsockname(c, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ err = xconnect(s, sockaddr(&addr), len);
+ if (err)
+ return err;
+
+ *p0 = take_fd(s);
+ break;
+ case SOCK_STREAM:
+ case SOCK_SEQPACKET:
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ return p;
+
+ *p0 = take_fd(p);
+ break;
+ default:
+ FAIL("Unsupported socket type %#x", sotype);
+ return -EOPNOTSUPP;
+ }
+
+ *p1 = take_fd(c);
+ return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+ int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+
+ return err;
+}
#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 9ce0e0e0b7da..da5a6fb03b69 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -677,7 +677,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- int s, c0, c1, p0, p1;
+ int c0, c1, p0, p1;
unsigned int pass;
int err, n;
u32 key;
@@ -685,13 +685,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
zero_verdict_count(verd_mapfd);
- s = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (s < 0)
- return;
-
- err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
if (err)
- goto close_srv;
+ return;
err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
@@ -722,8 +719,6 @@ close:
xclose(c1);
xclose(p0);
xclose(c0);
-close_srv:
- xclose(s);
}
static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
@@ -909,7 +904,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
{
- int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ int c0 = -1, c1 = -1, p0 = -1, p1 = -1;
int err, n, key, value;
char buf[] = "abc";
@@ -919,13 +914,10 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
if (err)
return;
- s = socket_loopback(family, sotype | SOCK_NONBLOCK);
- if (s < 0)
- goto clean_parser_map;
-
- err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1,
+ &p0, &p1);
if (err)
- goto close_srv;
+ goto clean_parser_map;
err = add_to_sockmap(sock_map, p0, p1);
if (err)
@@ -944,8 +936,6 @@ close:
xclose(p0);
xclose(c1);
xclose(p1);
-close_srv:
- xclose(s);
clean_parser_map:
key = 0;
@@ -1500,49 +1490,7 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma
/* Returns two connected loopback vsock sockets */
static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int s, p, c;
-
- s = socket_loopback(AF_VSOCK, sotype);
- if (s < 0)
- return -1;
-
- c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
- if (c == -1)
- goto close_srv;
-
- if (getsockname(s, sockaddr(&addr), &len) < 0)
- goto close_cli;
-
- if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
- FAIL_ERRNO("connect");
- goto close_cli;
- }
-
- len = sizeof(addr);
- p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
- if (p < 0)
- goto close_cli;
-
- if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
- FAIL_ERRNO("poll_connect");
- goto close_acc;
- }
-
- *v0 = p;
- *v1 = c;
-
- return 0;
-
-close_acc:
- close(p);
-close_cli:
- close(c);
-close_srv:
- close(s);
-
- return -1;
+ return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
}
static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
@@ -1691,44 +1639,7 @@ static void test_reuseport(struct test_sockmap_listen *skel,
static int inet_socketpair(int family, int type, int *s, int *c)
{
- struct sockaddr_storage addr;
- socklen_t len;
- int p0, c0;
- int err;
-
- p0 = socket_loopback(family, type | SOCK_NONBLOCK);
- if (p0 < 0)
- return p0;
-
- len = sizeof(addr);
- err = xgetsockname(p0, sockaddr(&addr), &len);
- if (err)
- goto close_peer0;
-
- c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
- if (c0 < 0) {
- err = c0;
- goto close_peer0;
- }
- err = xconnect(c0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
- err = xgetsockname(c0, sockaddr(&addr), &len);
- if (err)
- goto close_cli0;
- err = xconnect(p0, sockaddr(&addr), len);
- if (err)
- goto close_cli0;
-
- *s = p0;
- *c = c0;
- return 0;
-
-close_cli0:
- xclose(c0);
-close_peer0:
- xclose(p0);
- return err;
+ return create_pair(family, type | SOCK_NONBLOCK, s, c);
}
static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
@@ -1795,11 +1706,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
int sfd[2];
int err;
- if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
return;
c0 = sfd[0], p0 = sfd[1];
- err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ err = inet_socketpair(family, type, &p1, &c1);
if (err)
goto close;
@@ -1847,7 +1758,7 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
int sfd[2];
int err;
- err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ err = inet_socketpair(family, type, &p0, &c0);
if (err)
return;
@@ -1882,7 +1793,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
- unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, -1, verdict_map,
REDIR_EGRESS, NO_FLAGS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
new file mode 100644
index 000000000000..accc42e01f8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_tp_btf_nullable.skel.h"
+
+void test_tp_btf_nullable(void)
+{
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ RUN_TESTS(test_tp_btf_nullable);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index bd8c75b620c2..c397336fe1ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void)
}
#ifndef __NR_uretprobe
-#define __NR_uretprobe 467
+#define __NR_uretprobe 335
#endif
__naked unsigned long uretprobe_syscall_call_1(void)
@@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void)
struct uprobe_syscall_executed *skel;
int pid, status, err, go[2], c;
- if (ASSERT_OK(pipe(go), "pipe"))
+ if (!ASSERT_OK(pipe(go), "pipe"))
return;
skel = uprobe_syscall_executed__open_and_load();
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index e35bc1eac52a..c3bc186af21e 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -6,6 +6,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include <linux/if_ether.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
@@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx)
return 0;
}
+SEC("fentry/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
+SEC("fexit/skb_tx_error")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(skb_invalid_ctx_fexit, void *skb)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ return 0;
+}
+
/* Reject writes to dynptr slot for uninit arg */
SEC("?raw_tp")
__failure __msg("potential write to dynptr at off=-16")
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index 5985920d162e..bfcc85686cf0 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -5,6 +5,7 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
#include "bpf_kfuncs.h"
#include "errno.h"
@@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb)
return 1;
}
+
+SEC("tp_btf/kfree_skb")
+int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since tp_btf skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 16bdc3e25591..ef70b88bccb2 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx)
return sum;
}
+__u32 upper, select_n, result;
+__u64 global;
+
+static __noinline bool nest_2(char *str)
+{
+ /* some insns (including branch insns) to ensure stacksafe() is triggered
+ * in nest_2(). This way, stacksafe() can compare frame associated with nest_1().
+ */
+ if (str[0] == 't')
+ return true;
+ if (str[1] == 'e')
+ return true;
+ if (str[2] == 's')
+ return true;
+ if (str[3] == 't')
+ return true;
+ return false;
+}
+
+static __noinline bool nest_1(int n)
+{
+ /* case 0: allocate stack, case 1: no allocate stack */
+ switch (n) {
+ case 0: {
+ char comm[16];
+
+ if (bpf_get_current_comm(comm, 16))
+ return false;
+ return nest_2(comm);
+ }
+ case 1:
+ return nest_2((char *)&global);
+ default:
+ return false;
+ }
+}
+
+SEC("raw_tp")
+__success
+int iter_subprog_check_stacksafe(const void *ctx)
+{
+ long i;
+
+ bpf_for(i, 0, upper) {
+ if (!nest_1(select_n)) {
+ result = 1;
+ return 0;
+ }
+ }
+
+ result = 2;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 60518aed1ffc..6dd4318debbf 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = {
{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+ { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
+ .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
{ .opt = 0, },
};
@@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family,
return 1;
}
+SEC("cgroup/getsockopt")
+int _getsockopt(struct bpf_sockopt *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *optval = ctx->optval;
+ struct tcp_sock *tp;
+
+ if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS)
+ return 1;
+
+ tp = bpf_core_cast(sk, struct tcp_sock);
+ if (ctx->optval + sizeof(int) <= ctx->optval_end) {
+ *optval = tp->bpf_sock_ops_cb_flags;
+ ctx->retval = 0;
+ }
+ return 1;
+}
+
SEC("sockops")
int skops_sockopt(struct bpf_sock_ops *skops)
{
struct bpf_sock *bpf_sk = skops->sk;
struct sock *sk;
+ int flags;
if (!bpf_sk)
return 1;
@@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops)
nr_passive += !(bpf_test_sockopt(skops, sk) ||
test_tcp_maxseg(skops, sk) ||
test_tcp_saved_syn(skops, sk));
- bpf_sock_ops_cb_flags_set(skops,
- skops->bpf_sock_ops_cb_flags |
- BPF_SOCK_OPS_STATE_CB_FLAG);
+ flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG;
+ bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags));
break;
case BPF_SOCK_OPS_STATE_CB:
if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index 44ee0d037f95..eb5cca1fce16 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -486,17 +486,10 @@ static int tcp_validate_cookie(struct tcp_syncookie *ctx)
goto err;
mssind = (cookie & (3 << 6)) >> 6;
- if (ctx->ipv4) {
- if (mssind > ARRAY_SIZE(msstab4))
- goto err;
-
+ if (ctx->ipv4)
ctx->attrs.mss = msstab4[mssind];
- } else {
- if (mssind > ARRAY_SIZE(msstab6))
- goto err;
-
+ else
ctx->attrs.mss = msstab6[mssind];
- }
ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
new file mode 100644
index 000000000000..bba3e37f749b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+#include "bpf_misc.h"
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ return nullable_ctx->len;
+}
+
+SEC("tp_btf/bpf_testmod_test_nullable_bare")
+int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
+{
+ if (nullable_ctx)
+ return nullable_ctx->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 8144fd145237..1ee0ef114f9d 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -324,6 +324,25 @@ out:
return zc_avail;
}
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+ unsigned int max_skb_frags = 0;
+ FILE *file;
+
+ file = fopen(MAX_SKB_FRAGS_PATH, "r");
+ if (!file) {
+ ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+ return 0;
+ }
+
+ if (fscanf(file, "%u", &max_skb_frags) != 1)
+ ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+ fclose(file);
+ return max_skb_frags;
+}
+
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"busy-poll", no_argument, 0, 'b'},
@@ -2244,13 +2263,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test)
static int testapp_too_many_frags(struct test_spec *test)
{
- struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
+ struct pkt *pkts;
u32 max_frags, i;
+ int ret;
- if (test->mode == TEST_MODE_ZC)
+ if (test->mode == TEST_MODE_ZC) {
max_frags = test->ifobj_tx->xdp_zc_max_segs;
- else
- max_frags = XSK_DESC__MAX_SKB_FRAGS;
+ } else {
+ max_frags = get_max_skb_frags();
+ if (!max_frags) {
+ ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
+ max_frags = 17;
+ }
+ max_frags += 1;
+ }
+
+ pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+ if (!pkts)
+ return TEST_FAILURE;
test->mtu = MAX_ETH_JUMBO_SIZE;
@@ -2280,7 +2310,10 @@ static int testapp_too_many_frags(struct test_spec *test)
pkts[2 * max_frags + 1].valid = true;
pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
- return testapp_validate_traffic(test);
+ ret = testapp_validate_traffic(test);
+
+ free(pkts);
+ return ret;
}
static int xsk_load_xdp_programs(struct ifobject *ifobj)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 885c948c5d83..e46e823f6a1a 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -55,7 +55,6 @@
#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
#define XSK_DESC__INVALID_OPTION (0xffff)
-#define XSK_DESC__MAX_SKB_FRAGS 18
#define HUGEPAGE_SIZE (2 * 1024 * 1024)
#define PKT_DUMP_NB_TO_PRINT 16
#define RUN_ALL_TESTS UINT_MAX
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 991c473e3859..e0d9851fe1c9 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -26,6 +26,10 @@
#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
#endif
+#ifndef F_CREATED_QUERY
+#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4)
+#endif
+
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@@ -589,4 +593,74 @@ TEST(close_range_cloexec_unshare_syzbot)
EXPECT_EQ(close(fd3), 0);
}
+TEST(close_range_bitmap_corruption)
+{
+ pid_t pid;
+ int status;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ /* get the first 128 descriptors open */
+ for (int i = 2; i < 128; i++)
+ EXPECT_GE(dup2(0, i), 0);
+
+ /* get descriptor table shared */
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* unshare and truncate descriptor table down to 64 */
+ if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE))
+ exit(EXIT_FAILURE);
+
+ ASSERT_EQ(fcntl(64, F_GETFD), -1);
+ /* ... and verify that the range 64..127 is not
+ stuck "fully used" according to secondary bitmap */
+ EXPECT_EQ(dup(0), 64)
+ exit(EXIT_FAILURE);
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(fcntl_created)
+{
+ for (int i = 0; i < 101; i++) {
+ int fd;
+ char path[PATH_MAX];
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return,
+ "Skipping test since /dev/null does not exist");
+ }
+
+ /* We didn't create "/dev/null". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+
+ sprintf(path, "aaaa_%d", i);
+ fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600);
+ ASSERT_GE(fd, 0);
+
+ /* We created "aaaa_%d". */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1);
+ close(fd);
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ /* We're opening it again, so no positive creation check. */
+ EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0);
+ close(fd);
+ unlink(path);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 5f541522364f..5d0a809dc2df 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -29,9 +29,11 @@ static int check_vgem(int fd)
version.name = name;
ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
- if (ret)
+ if (ret || version.name_len != 4)
return 0;
+ name[4] = '\0';
+
return !strcmp(name, "vgem");
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index e54f382bcb02..39fb97a8c1df 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,8 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_INCLUDES := $(wildcard lib/py/*.py)
+TEST_INCLUDES := $(wildcard lib/py/*.py) \
+ ../../net/net_helper.sh \
+ ../../net/lib.sh \
TEST_PROGS := \
+ netcons_basic.sh \
ping.py \
queues.py \
stats.py \
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
index f6a58ce8a230..a2d8af60876d 100644
--- a/tools/testing/selftests/drivers/net/config
+++ b/tools/testing/selftests/drivers/net/config
@@ -1,2 +1,6 @@
CONFIG_IPV6=y
CONFIG_NETDEVSIM=m
+CONFIG_CONFIGFS_FS=y
+CONFIG_NETCONSOLE=m
+CONFIG_NETCONSOLE_DYNAMIC=y
+CONFIG_NETCONSOLE_EXTENDED_LOG=y
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index 026d98976c35..05b6fbb3fcdd 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import errno
import time
import os
from lib.py import ksft_run, ksft_exit, ksft_pr
@@ -61,7 +62,7 @@ def test_pp_alloc(cfg, netdevnl):
try:
stats = get_stats()
except NlError as e:
- if e.nl_msg.error == -95:
+ if e.nl_msg.error == -errno.EOPNOTSUPP:
stats = {}
else:
raise
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
index 931dbc36ca43..9d7adb3cf33b 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -3,7 +3,7 @@
import datetime
import random
-from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
+from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt
from lib.py import NetDrvEpEnv
from lib.py import EthtoolFamily, NetdevFamily
from lib.py import KsftSkipEx
@@ -19,6 +19,15 @@ def _rss_key_rand(length):
return [random.randint(0, 255) for _ in range(length)]
+def _rss_key_check(cfg, data=None, context=0):
+ if data is None:
+ data = get_rss(cfg, context=context)
+ if 'rss-hash-key' not in data:
+ return
+ non_zero = [x for x in data['rss-hash-key'] if x != 0]
+ ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
+
+
def get_rss(cfg, context=0):
return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
@@ -81,17 +90,18 @@ def _send_traffic_check(cfg, port, name, params):
ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
if params.get('noise'):
ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
- "traffic on other queues:" + str(cnts))
+ f"traffic on other queues ({name})':" + str(cnts))
if params.get('empty'):
ksft_eq(sum(cnts[i] for i in params['empty']), 0,
- "traffic on inactive queues: " + str(cnts))
+ f"traffic on inactive queues ({name}): " + str(cnts))
def test_rss_key_indir(cfg):
"""Test basics like updating the main RSS key and indirection table."""
- if len(_get_rx_cnts(cfg)) < 2:
- KsftSkipEx("Device has only one queue (or doesn't support queue stats)")
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 3:
+ KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
data = get_rss(cfg)
want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
@@ -101,6 +111,7 @@ def test_rss_key_indir(cfg):
if not data[k]:
raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
+ _rss_key_check(cfg, data=data)
key_len = len(data['rss-hash-key'])
# Set the key
@@ -110,9 +121,26 @@ def test_rss_key_indir(cfg):
data = get_rss(cfg)
ksft_eq(key, data['rss-hash-key'])
+ # Set the indirection table and the key together
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
+ reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
+
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(2, max(data['rss-indirection-table']))
+
+ # Reset indirection table and set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
+
# Set the indirection table
ethtool(f"-X {cfg.ifname} equal 2")
- reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
data = get_rss(cfg)
ksft_eq(0, min(data['rss-indirection-table']))
ksft_eq(1, max(data['rss-indirection-table']))
@@ -274,6 +302,78 @@ def test_hitless_key_update(cfg):
ksft_eq(carrier1 - carrier0, 0)
+def test_rss_context_dump(cfg):
+ """
+ Test dumping RSS contexts. This tests mostly exercises the kernel APIs.
+ """
+
+ # Get a random key of the right size
+ data = get_rss(cfg)
+ if 'rss-hash-key' in data:
+ key_data = _rss_key_rand(len(data['rss-hash-key']))
+ key = _rss_key_str(key_data)
+ else:
+ key_data = []
+ key = "ba:ad"
+
+ ids = []
+ try:
+ ids.append(ethtool_create(cfg, "-X", f"context new"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+
+ ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}"))
+ defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete")
+ except CmdExitFailure:
+ if not ids:
+ raise KsftSkipEx("Unable to add any contexts")
+ ksft_pr(f"Added only {len(ids)} out of 3 contexts")
+
+ expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids])
+
+ # Dump all
+ ctxs = cfg.ethnl.rss_get({}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Sanity-check the results
+ for data in ctxs:
+ ksft_ne(set(data['indir']), {0}, "indir table is all zero")
+ ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero")
+
+ # More specific checks
+ if len(ids) > 1 and data.get('context') == ids[1]:
+ ksft_eq(set(data['indir']), {0, 1},
+ "ctx1 - indir table mismatch")
+ if len(ids) > 2 and data.get('context') == ids[2]:
+ ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch")
+
+ # Ifindex filter
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ctx_tuples = set(tuples)
+ ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump")
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # Skip ctx 0
+ expect_tuples.remove((cfg.ifname, -1))
+
+ ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True)
+ tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]
+ ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump")
+ ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+ # And finally both with ifindex and skip main
+ ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True)
+ ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs])
+ ksft_eq(expect_tuples, ctx_tuples)
+
+
def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
"""
Test separating traffic into RSS contexts.
@@ -317,8 +417,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
ctx_cnt = i
break
+ _rss_key_check(cfg, context=ctx_id)
+
if not create_with_cfg:
ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+ _rss_key_check(cfg, context=ctx_id)
# Sanity check the context we just created
data = get_rss(cfg, ctx_id)
@@ -511,7 +614,7 @@ def main() -> None:
ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
test_rss_resize, test_hitless_key_update,
test_rss_context, test_rss_context4, test_rss_context32,
- test_rss_context_queue_reconfigure,
+ test_rss_context_dump, test_rss_context_queue_reconfigure,
test_rss_context_overlap, test_rss_context_overlap2,
test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
args=(cfg, ))
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index a5e800b8f103..1ea9bb695e94 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -4,6 +4,7 @@ import os
import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import ksft_setup
from lib.py import cmd, ethtool, ip
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
@@ -14,7 +15,7 @@ def _load_env_file(src_path):
src_dir = Path(src_path).parent.resolve()
if not (src_dir / "net.config").exists():
- return env
+ return ksft_setup(env)
with open((src_dir / "net.config").as_posix(), 'r') as fp:
for line in fp.readlines():
@@ -30,7 +31,7 @@ def _load_env_file(src_path):
if len(pair) != 2:
raise Exception("Can't parse configuration line:", full_file)
env[pair[0]] = pair[1]
- return env
+ return ksft_setup(env)
class NetDrvEnv:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
index 877cd6df94a1..fe905a7f34b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -2,6 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
lib_dir=$(dirname $0)/../../../net/forwarding
+ethtool_lib_dir=$(dirname $0)/../hw
ALL_TESTS="
autoneg
@@ -11,7 +12,7 @@ ALL_TESTS="
NUM_NETIFS=2
: ${TIMEOUT:=30000} # ms
source $lib_dir/lib.sh
-source $lib_dir/ethtool_lib.sh
+source $ethtool_lib_dir/ethtool_lib.sh
setup_prepare()
{
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
new file mode 100755
index 000000000000..06021b2059b7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -0,0 +1,234 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test creates two netdevsim virtual interfaces, assigns one of them (the
+# "destination interface") to a new namespace, and assigns IP addresses to both
+# interfaces.
+#
+# It listens on the destination interface using socat and configures a dynamic
+# target on netconsole, pointing to the destination IP address.
+#
+# Finally, it checks whether the message was received properly on the
+# destination interface. Note that this test may pollute the kernel log buffer
+# (dmesg) and relies on dynamic configuration and namespaces being configured.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+# Simple script to test dynamic targets in netconsole
+SRCIF="" # to be populated later
+SRCIP=192.168.1.1
+DSTIF="" # to be populated later
+DSTIP=192.168.1.2
+
+PORT="6666"
+MSG="netconsole selftest"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+
+# Used to create and delete namespaces
+source "${SCRIPTDIR}"/../../net/lib.sh
+source "${SCRIPTDIR}"/../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+ local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+ udevadm settle 2> /dev/null || true
+
+ local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+ local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+ # These are global variables
+ SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+ -path "$NSIM1"/net -exec basename {} \;)
+ DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+ -path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+ local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+ local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+ local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+ exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+ exec {INITNS_FD}</proc/self/ns/net
+
+ # Bind the dst interface to namespace
+ ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+ # Linking one device to the other one (on the other namespace}
+ if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
+ then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup
+ exit "${ksft_skip}"
+ fi
+}
+
+function configure_ip() {
+ # Configure the IPs for both interfaces
+ ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+ ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip link set "${SRCIF}" up
+}
+
+function set_network() {
+ # setup_ns function is coming from lib.sh
+ setup_ns NAMESPACE
+
+ # Create both interfaces, and assign the destination to a different
+ # namespace
+ create_ifaces
+
+ # Link both interfaces back to back
+ link_ifaces
+
+ configure_ip
+}
+
+function create_dynamic_target() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+ # Create a dynamic target
+ mkdir "${NETCONS_PATH}"
+
+ echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+ echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+ echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+ echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ # Delete netdevsim devices
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+ # this is coming from lib.sh
+ cleanup_all_ns
+
+ # Restoring printk configurations
+ echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function listen_port_and_save_to() {
+ local OUTPUT=${1}
+ # Just wait for 2 seconds
+ timeout 2 ip netns exec "${NAMESPACE}" \
+ socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # Check if the file exists
+ if [ ! -f "$TMPFILENAME" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+ echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Delete the file once it is validated, otherwise keep it
+ # for debugging purposes
+ rm "${TMPFILENAME}"
+ exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo "This test must be run as root" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which socat > /dev/null ; then
+ echo "SKIP: socat(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which ip > /dev/null ; then
+ echo "SKIP: ip(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which udevadm > /dev/null ; then
+ echo "SKIP: udevadm(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+ echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip link show "${DSTIF}" 2> /dev/null; then
+ echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+ echo "SKIP: IPs already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# The content of kmsg will be save to the following file
+OUTPUT_FILE="/tmp/${TARGET}"
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5)
+echo "6 5" > /proc/sys/kernel/printk
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# Listed for netconsole port inside the namespace and destination interface
+listen_port_and_save_to "${OUTPUT_FILE}" &
+# Wait for socat to start and listen to the port.
+wait_local_port_listen "${NAMESPACE}" "${PORT}" udp
+# Send the message
+echo "${MSG}: ${TARGET}" > /dev/kmsg
+# Wait until socat saves the file to disk
+busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}"
+
+# Make sure the message was received in the dst part
+# and exit
+validate_result "${OUTPUT_FILE}"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 820b8e0a22c6..63e3c045a3b2 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -1,10 +1,13 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
+import errno
from lib.py import ksft_run, ksft_exit, ksft_pr
from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
+from lib.py import ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
@@ -17,7 +20,7 @@ def check_pause(cfg) -> None:
try:
ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
- if e.error == 95:
+ if e.error == errno.EOPNOTSUPP:
raise KsftXfailEx("pause not supported by the device")
raise
@@ -32,7 +35,7 @@ def check_fec(cfg) -> None:
try:
ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
except NlError as e:
- if e.error == 95:
+ if e.error == errno.EOPNOTSUPP:
raise KsftXfailEx("FEC not supported by the device")
raise
@@ -117,7 +120,7 @@ def qstat_by_ifindex(cfg) -> None:
# loopback has no stats
with ksft_raises(NlError) as cm:
netfam.qstats_get({"ifindex": 1}, dump=True)
- ksft_eq(cm.exception.nl_msg.error, -95)
+ ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP)
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
# Try to get stats for lowest unused ifindex but not 0
@@ -133,9 +136,31 @@ def qstat_by_ifindex(cfg) -> None:
ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+@ksft_disruptive
+def check_down(cfg) -> None:
+ try:
+ qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ except NlError as e:
+ if e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("qstats not supported by the device")
+ raise
+
+ ip(f"link set dev {cfg.dev['ifname']} down")
+ defer(ip, f"link set dev {cfg.dev['ifname']} up")
+
+ qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+ for k, v in qstat.items():
+ ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down")
+
+ # exercise per-queue API to make sure that "device down" state
+ # is handled correctly and doesn't crash
+ netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+
+
def main() -> None:
with NetDrvEnv(__file__) as cfg:
- ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
+ ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
+ check_down],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
index dc0408a831d0..75b7b4ef6cfa 100644
--- a/tools/testing/selftests/hid/hid_bpf.c
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[],
FIXTURE_DATA(hid_bpf) * self,
const FIXTURE_VARIANT(hid_bpf) * variant)
{
+ struct bpf_map *iter_map;
int err = -EINVAL;
ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links))
@@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[],
*ops_hid_id = self->hid_id;
}
+ /* we disable the auto-attach feature of all maps because we
+ * only want the tested one to be manually attached in the next
+ * call to bpf_map__attach_struct_ops()
+ */
+ bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj)
+ bpf_map__set_autoattach(iter_map, false);
+
err = hid__load(self->skel);
ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err);
@@ -687,6 +695,24 @@ TEST_F(hid_bpf, subprog_raw_event)
}
/*
+ * Attach hid_first_event to the given uhid device,
+ * attempt at re-attaching it, we should not lock and
+ * return an invalid struct bpf_link
+ */
+TEST_F(hid_bpf, multiple_attach)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ };
+ struct bpf_link *link;
+
+ LOAD_PROGRAMS(progs);
+
+ link = bpf_map__attach_struct_ops(self->skel->maps.first_event);
+ ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops");
+}
+
+/*
* Ensures that we can attach/detach programs
*/
TEST_F(hid_bpf, test_attach_detach)
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
index ee9bbbcf751b..5ecc845ef792 100644
--- a/tools/testing/selftests/hid/progs/hid.c
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -455,7 +455,7 @@ struct {
__type(value, struct elem);
} hmap SEC(".maps");
-static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work)
+static int wq_cb_sleepable(void *map, int *key, void *work)
{
__u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10};
struct hid_bpf_ctx *hid_ctx;
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index cfe37f491906..e5db897586bb 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
- int (callback_fn)(void *map, int *key, struct bpf_wq *wq),
+ int (callback_fn)(void *map, int *key, void *wq),
unsigned int flags__k, void *aux__ign) __ksym;
#define bpf_wq_set_callback(timer, cb, flags) \
bpf_wq_set_callback_impl(timer, cb, flags, NULL)
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index 6343f4053bd4..4927b9add5ad 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area)
{
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.dst_ioas_id = self->ioas_id,
.src_ioas_id = self->ioas_id,
.length = PAGE_SIZE,
@@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep)
{
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.src_ioas_id = self->ioas_id,
.dst_iova = MOCK_APERTURE_START,
.length = MOCK_PAGE_SIZE,
@@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy)
};
struct iommu_ioas_copy copy_cmd = {
.size = sizeof(copy_cmd),
- .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE,
.dst_ioas_id = self->ioas_id,
.dst_iova = MOCK_APERTURE_START,
.length = BUFFER_SIZE,
diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py
index cd89fb2bc10e..bf215790a89d 100644
--- a/tools/testing/selftests/kselftest/ksft.py
+++ b/tools/testing/selftests/kselftest/ksft.py
@@ -70,7 +70,7 @@ def test_result(condition, description=""):
def finished():
- if ksft_cnt["pass"] == ksft_num_tests:
+ if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests:
exit_code = KSFT_PASS
else:
exit_code = KSFT_FAIL
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 74954f6a8f94..2c3c58e65a41 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -111,8 +111,11 @@ run_one()
stdbuf="/usr/bin/stdbuf --output=L "
fi
eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
- cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
- if [ ! -x "$TEST" ]; then
+ if [ -x "$TEST" ]; then
+ cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
+ elif [ -x "./ksft_runner.sh" ]; then
+ cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST"
+ else
echo "# Warning: file $TEST is not executable"
if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 48d32c5aa3eb..0c4b254ab56b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
+TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases
TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
@@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
+TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
TEST_GEN_PROGS_aarch64 += demand_paging_test
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
new file mode 100644
index 000000000000..a36a7e2db434
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c
@@ -0,0 +1,1062 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality.
+ *
+ * The test validates some edge cases related to the arch-timer:
+ * - timers above the max TVAL value.
+ * - timers in the past
+ * - moving counters ahead and behind pending timers.
+ * - reprograming timers.
+ * - timers fired multiple times.
+ * - masking/unmasking using the timer control mask.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <pthread.h>
+#include <sys/sysinfo.h>
+
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+static const uint64_t CVAL_MAX = ~0ULL;
+/* tval is a signed 32-bit int. */
+static const int32_t TVAL_MAX = INT32_MAX;
+static const int32_t TVAL_MIN = INT32_MIN;
+
+/* After how much time we say there is no IRQ. */
+static const uint32_t TIMEOUT_NO_IRQ_US = 50000;
+
+/* A nice counter value to use as the starting one for most tests. */
+static const uint64_t DEF_CNT = (CVAL_MAX / 2);
+
+/* Number of runs. */
+static const uint32_t NR_TEST_ITERS_DEF = 5;
+
+/* Default wait test time in ms. */
+static const uint32_t WAIT_TEST_MS = 10;
+
+/* Default "long" wait test time in ms. */
+static const uint32_t LONG_WAIT_TEST_MS = 100;
+
+/* Shared with IRQ handler. */
+struct test_vcpu_shared_data {
+ atomic_t handled;
+ atomic_t spurious;
+} shared_data;
+
+struct test_args {
+ /* Virtual or physical timer and counter tests. */
+ enum arch_timer timer;
+ /* Delay used for most timer tests. */
+ uint64_t wait_ms;
+ /* Delay used in the test_long_timer_delays test. */
+ uint64_t long_wait_ms;
+ /* Number of iterations. */
+ int iterations;
+ /* Whether to test the physical timer. */
+ bool test_physical;
+ /* Whether to test the virtual timer. */
+ bool test_virtual;
+};
+
+struct test_args test_args = {
+ .wait_ms = WAIT_TEST_MS,
+ .long_wait_ms = LONG_WAIT_TEST_MS,
+ .iterations = NR_TEST_ITERS_DEF,
+ .test_physical = true,
+ .test_virtual = true,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+enum sync_cmd {
+ SET_COUNTER_VALUE,
+ USERSPACE_USLEEP,
+ USERSPACE_SCHED_YIELD,
+ USERSPACE_MIGRATE_SELF,
+ NO_USERSPACE_CMD,
+};
+
+typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec);
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec);
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec);
+static void sleep_migrate(enum arch_timer timer, uint64_t usec);
+
+sleep_method_t sleep_method[] = {
+ sleep_poll,
+ sleep_sched_poll,
+ sleep_migrate,
+ sleep_in_userspace,
+};
+
+typedef void (*irq_wait_method_t)(void);
+
+static void wait_for_non_spurious_irq(void);
+static void wait_poll_for_irq(void);
+static void wait_sched_poll_for_irq(void);
+static void wait_migrate_poll_for_irq(void);
+
+irq_wait_method_t irq_wait_method[] = {
+ wait_for_non_spurious_irq,
+ wait_poll_for_irq,
+ wait_sched_poll_for_irq,
+ wait_migrate_poll_for_irq,
+};
+
+enum timer_view {
+ TIMER_CVAL,
+ TIMER_TVAL,
+};
+
+static void assert_irqs_handled(uint32_t n)
+{
+ int h = atomic_read(&shared_data.handled);
+
+ __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n);
+}
+
+static void userspace_cmd(uint64_t cmd)
+{
+ GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0);
+}
+
+static void userspace_migrate_vcpu(void)
+{
+ userspace_cmd(USERSPACE_MIGRATE_SELF);
+}
+
+static void userspace_sleep(uint64_t usecs)
+{
+ GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0);
+}
+
+static void set_counter(enum arch_timer timer, uint64_t counter)
+{
+ GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ enum arch_timer timer;
+ uint64_t cnt, cval;
+ uint32_t ctl;
+ bool timer_condition, istatus;
+
+ if (intid == IAR_SPURIOUS) {
+ atomic_inc(&shared_data.spurious);
+ goto out;
+ }
+
+ if (intid == ptimer_irq)
+ timer = PHYSICAL;
+ else if (intid == vtimer_irq)
+ timer = VIRTUAL;
+ else
+ goto out;
+
+ ctl = timer_get_ctl(timer);
+ cval = timer_get_cval(timer);
+ cnt = timer_get_cntct(timer);
+ timer_condition = cnt >= cval;
+ istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE);
+ GUEST_ASSERT_EQ(timer_condition, istatus);
+
+ /* Disable and mask the timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+
+ atomic_inc(&shared_data.handled);
+
+out:
+ gic_set_eoi(intid);
+}
+
+static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_cval(timer, cval_cycles);
+ timer_set_ctl(timer, ctl);
+}
+
+static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles,
+ uint32_t ctl)
+{
+ atomic_set(&shared_data.handled, 0);
+ atomic_set(&shared_data.spurious, 0);
+ timer_set_ctl(timer, ctl);
+ timer_set_tval(timer, tval_cycles);
+}
+
+static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl,
+ enum timer_view tv)
+{
+ switch (tv) {
+ case TIMER_CVAL:
+ set_cval_irq(timer, xval, ctl);
+ break;
+ case TIMER_TVAL:
+ set_tval_irq(timer, xval, ctl);
+ break;
+ default:
+ GUEST_FAIL("Could not get timer %d", timer);
+ }
+}
+
+/*
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void wait_for_non_spurious_irq(void)
+{
+ int h;
+
+ local_irq_disable();
+
+ for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) {
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
+ }
+}
+
+/*
+ * Wait for an non-spurious IRQ by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD).
+ *
+ * Note that this can theoretically hang forever, so we rely on having
+ * a timeout mechanism in the "runner", like:
+ * tools/testing/selftests/kselftest/runner.sh.
+ */
+static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd)
+{
+ int h;
+
+ local_irq_disable();
+
+ h = atomic_read(&shared_data.handled);
+
+ local_irq_enable();
+ while (h == atomic_read(&shared_data.handled)) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+ local_irq_disable();
+}
+
+static void wait_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(NO_USERSPACE_CMD);
+}
+
+static void wait_sched_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD);
+}
+
+static void wait_migrate_poll_for_irq(void)
+{
+ poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF);
+}
+
+/*
+ * Sleep for usec microseconds by polling in the guest or in
+ * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE).
+ */
+static void guest_poll(enum arch_timer test_timer, uint64_t usec,
+ enum sync_cmd usp_cmd)
+{
+ uint64_t cycles = usec_to_cycles(usec);
+ /* Whichever timer we are testing with, sleep with the other. */
+ enum arch_timer sleep_timer = 1 - test_timer;
+ uint64_t start = timer_get_cntct(sleep_timer);
+
+ while ((timer_get_cntct(sleep_timer) - start) < cycles) {
+ if (usp_cmd == NO_USERSPACE_CMD)
+ cpu_relax();
+ else
+ userspace_cmd(usp_cmd);
+ }
+}
+
+static void sleep_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, NO_USERSPACE_CMD);
+}
+
+static void sleep_sched_poll(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_SCHED_YIELD);
+}
+
+static void sleep_migrate(enum arch_timer timer, uint64_t usec)
+{
+ guest_poll(timer, usec, USERSPACE_MIGRATE_SELF);
+}
+
+static void sleep_in_userspace(enum arch_timer timer, uint64_t usec)
+{
+ userspace_sleep(usec);
+}
+
+/*
+ * Reset the timer state to some nice values like the counter not being close
+ * to the edge, and the control register masked and disabled.
+ */
+static void reset_timer_state(enum arch_timer timer, uint64_t cnt)
+{
+ set_counter(timer, cnt);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timer_xval(enum arch_timer timer, uint64_t xval,
+ enum timer_view tv, irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ local_irq_disable();
+
+ if (reset_state)
+ reset_timer_state(timer, reset_cnt);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * The test_timer_* functions will program the timer, wait for it, and assert
+ * the firing of the correct IRQ.
+ *
+ * These functions don't have a timeout and return as soon as they receive an
+ * IRQ. They can hang (forever), so we rely on having a timeout mechanism in
+ * the "runner", like: tools/testing/selftests/kselftest/runner.sh.
+ */
+
+static void test_timer_cval(enum arch_timer timer, uint64_t cval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt);
+}
+
+static void test_timer_tval(enum arch_timer timer, int32_t tval,
+ irq_wait_method_t wm, bool reset_state,
+ uint64_t reset_cnt)
+{
+ test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state,
+ reset_cnt);
+}
+
+static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval,
+ uint64_t usec, enum timer_view timer_view,
+ sleep_method_t guest_sleep)
+{
+ local_irq_disable();
+
+ set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view);
+ guest_sleep(timer, usec);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume success (no IRQ) after waiting usec microseconds */
+ assert_irqs_handled(0);
+}
+
+static void test_cval_no_irq(enum arch_timer timer, uint64_t cval,
+ uint64_t usec, sleep_method_t wm)
+{
+ test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm);
+}
+
+static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec,
+ sleep_method_t wm)
+{
+ /* tval will be cast to an int32_t in test_xval_check_no_irq */
+ test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm);
+}
+
+/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */
+static void test_timer_control_mask_then_unmask(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Unmask the timer, and then get an IRQ. */
+ local_irq_disable();
+ timer_set_ctl(timer, CTL_ENABLE);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wait_for_non_spurious_irq();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Check that timer control masks actually mask a timer being fired. */
+static void test_timer_control_masks(enum arch_timer timer)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Local IRQs are not masked at this point. */
+
+ set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK);
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ sleep_poll(timer, TIMEOUT_NO_IRQ_US);
+
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_fire_a_timer_multiple_times(enum arch_timer timer,
+ irq_wait_method_t wm, int num)
+{
+ int i;
+
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_tval_irq(timer, 0, CTL_ENABLE);
+
+ for (i = 1; i <= num; i++) {
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ handler masked and disabled the timer.
+ * Enable and unmmask it again.
+ */
+ timer_set_ctl(timer, CTL_ENABLE);
+
+ assert_irqs_handled(i);
+ }
+
+ local_irq_enable();
+}
+
+static void test_timers_fired_multiple_times(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++)
+ test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10);
+}
+
+/*
+ * Set a timer for tval=delta_1_ms then reprogram it to
+ * tval=delta_2_ms. Check that we get the timer fired. There is no
+ * timeout for the wait: we use the wfi instruction.
+ */
+static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm,
+ int32_t delta_1_ms, int32_t delta_2_ms)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ /* Program the timer to DEF_CNT + delta_1_ms. */
+ set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE);
+
+ /* Reprogram the timer to DEF_CNT + delta_2_ms. */
+ timer_set_tval(timer, msec_to_cycles(delta_2_ms));
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */
+ GUEST_ASSERT(timer_get_cntct(timer) >=
+ DEF_CNT + msec_to_cycles(delta_2_ms));
+
+ local_irq_enable();
+ assert_irqs_handled(1);
+};
+
+static void test_reprogram_timers(enum arch_timer timer)
+{
+ int i;
+ uint64_t base_wait = test_args.wait_ms;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /*
+ * Ensure reprogramming works whether going from a
+ * longer time to a shorter or vice versa.
+ */
+ test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait,
+ base_wait);
+ test_reprogramming_timer(timer, irq_wait_method[i], base_wait,
+ 2 * base_wait);
+ }
+}
+
+static void test_basic_functionality(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+/*
+ * This test checks basic timer behavior without actually firing timers, things
+ * like: the relationship between cval and tval, tval down-counting.
+ */
+static void timers_sanity_checks(enum arch_timer timer, bool use_sched)
+{
+ reset_timer_state(timer, DEF_CNT);
+
+ local_irq_disable();
+
+ /* cval in the past */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) -
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ /* tval in the past */
+ timer_set_tval(timer, -1);
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer));
+
+ /* tval larger than TVAL_MAX. This requires programming with
+ * timer_set_cval instead so the value is expressible
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <= 0);
+
+ /*
+ * tval larger than 2 * TVAL_MAX.
+ * Twice the TVAL_MAX completely loops around the TVAL.
+ */
+ timer_set_cval(timer,
+ timer_get_cntct(timer) + 2ULL * TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_tval(timer) <=
+ msec_to_cycles(test_args.wait_ms));
+
+ /* negative tval that rollovers from 0. */
+ set_counter(timer, msec_to_cycles(1));
+ timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms));
+ if (use_sched)
+ userspace_migrate_vcpu();
+ GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms)));
+
+ /* tval should keep down-counting from 0 to -1. */
+ timer_set_tval(timer, 0);
+ sleep_poll(timer, 1);
+ GUEST_ASSERT(timer_get_tval(timer) < 0);
+
+ local_irq_enable();
+
+ /* Mask and disable any pending timer. */
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_timers_sanity_checks(enum arch_timer timer)
+{
+ timers_sanity_checks(timer, false);
+ /* Check how KVM saves/restores these edge-case values. */
+ timers_sanity_checks(timer, true);
+}
+
+static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm)
+{
+ local_irq_disable();
+ reset_timer_state(timer, DEF_CNT);
+
+ set_cval_irq(timer,
+ (uint64_t) TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE);
+
+ set_counter(timer, TVAL_MAX);
+
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */
+static void test_timers_above_tval_max(enum arch_timer timer)
+{
+ uint64_t cval;
+ int i;
+
+ /*
+ * Test that the system is not implementing cval in terms of
+ * tval. If that was the case, setting a cval to "cval = now
+ * + TVAL_MAX + wait_ms" would wrap to "cval = now +
+ * wait_ms", and the timer would fire immediately. Test that it
+ * doesn't.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ reset_timer_state(timer, DEF_CNT);
+ cval = timer_get_cntct(timer) + TVAL_MAX +
+ msec_to_cycles(test_args.wait_ms);
+ test_cval_no_irq(timer, cval,
+ msecs_to_usecs(test_args.wait_ms) +
+ TIMEOUT_NO_IRQ_US, sleep_method[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ /* Get the IRQ by moving the counter forward. */
+ test_set_cnt_after_tval_max(timer, irq_wait_method[i]);
+ }
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t xval, uint64_t cnt_2,
+ irq_wait_method_t wm, enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ /* This method re-enables IRQs to handle the one we're looking for. */
+ wm();
+
+ assert_irqs_handled(1);
+ local_irq_enable();
+}
+
+/*
+ * Template function to be used by the test_move_counter_ahead_* tests. It
+ * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and
+ * then waits for an IRQ.
+ */
+static void test_set_cnt_after_xval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t xval,
+ uint64_t cnt_2,
+ sleep_method_t guest_sleep,
+ enum timer_view tv)
+{
+ local_irq_disable();
+
+ set_counter(timer, cnt_1);
+ timer_set_ctl(timer, CTL_IMASK);
+
+ set_xval_irq(timer, xval, CTL_ENABLE, tv);
+ set_counter(timer, cnt_2);
+ guest_sleep(timer, TIMEOUT_NO_IRQ_US);
+
+ local_irq_enable();
+ isb();
+
+ /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */
+ assert_irqs_handled(0);
+ timer_set_ctl(timer, CTL_IMASK);
+}
+
+static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1,
+ int32_t tval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1,
+ uint64_t cval, uint64_t cnt_2,
+ irq_wait_method_t wm)
+{
+ test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL);
+}
+
+static void test_set_cnt_after_tval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, int32_t tval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm,
+ TIMER_TVAL);
+}
+
+static void test_set_cnt_after_cval_no_irq(enum arch_timer timer,
+ uint64_t cnt_1, uint64_t cval,
+ uint64_t cnt_2, sleep_method_t wm)
+{
+ test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm,
+ TIMER_CVAL);
+}
+
+/* Set a timer and then move the counter ahead of it. */
+static void test_move_counters_ahead_of_timers(enum arch_timer timer)
+{
+ int i;
+ int32_t tval;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm);
+ test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm);
+
+ /* Move counter ahead of negative tval. */
+ test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm);
+ test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm);
+ tval = TVAL_MAX;
+ test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1,
+ wm);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm);
+ }
+}
+
+/*
+ * Program a timer, mask it, and then change the tval or counter to cancel it.
+ * Unmask it and check that nothing fires.
+ */
+static void test_move_counters_behind_timers(enum arch_timer timer)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0,
+ sm);
+ test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm);
+ }
+}
+
+static void test_timers_in_the_past(enum arch_timer timer)
+{
+ int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms);
+ uint64_t cval;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ /* set a timer wait_ms the past. */
+ cval = DEF_CNT - msec_to_cycles(test_args.wait_ms);
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+
+ /* Set a timer to counter=0 (in the past) */
+ test_timer_cval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a time for tval=0 (now) */
+ test_timer_tval(timer, 0, wm, true, DEF_CNT);
+
+ /* Set a timer to as far in the past as possible */
+ test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT);
+ }
+
+ /*
+ * Set the counter to wait_ms, and a tval to -wait_ms. There should be no
+ * IRQ as that tval means cval=CVAL_MAX-wait_ms.
+ */
+ for (i = 0; i < ARRAY_SIZE(sleep_method); i++) {
+ sleep_method_t sm = sleep_method[i];
+
+ set_counter(timer, msec_to_cycles(test_args.wait_ms));
+ test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm);
+ }
+}
+
+static void test_long_timer_delays(enum arch_timer timer)
+{
+ int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms);
+ uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) {
+ irq_wait_method_t wm = irq_wait_method[i];
+
+ test_timer_cval(timer, cval, wm, true, DEF_CNT);
+ test_timer_tval(timer, tval, wm, true, DEF_CNT);
+ }
+}
+
+static void guest_run_iteration(enum arch_timer timer)
+{
+ test_basic_functionality(timer);
+ test_timers_sanity_checks(timer);
+
+ test_timers_above_tval_max(timer);
+ test_timers_in_the_past(timer);
+
+ test_move_counters_ahead_of_timers(timer);
+ test_move_counters_behind_timers(timer);
+ test_reprogram_timers(timer);
+
+ test_timers_fired_multiple_times(timer);
+
+ test_timer_control_mask_then_unmask(timer);
+ test_timer_control_masks(timer);
+}
+
+static void guest_code(enum arch_timer timer)
+{
+ int i;
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, 1);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ for (i = 0; i < test_args.iterations; i++) {
+ GUEST_SYNC(i);
+ guest_run_iteration(timer);
+ }
+
+ test_long_timer_delays(timer);
+ GUEST_DONE();
+}
+
+static uint32_t next_pcpu(void)
+{
+ uint32_t max = get_nprocs();
+ uint32_t cur = sched_getcpu();
+ uint32_t next = cur;
+ cpu_set_t cpuset;
+
+ TEST_ASSERT(max > 1, "Need at least two physical cpus");
+
+ sched_getaffinity(0, sizeof(cpuset), &cpuset);
+
+ do {
+ next = (next + 1) % CPU_SETSIZE;
+ } while (!CPU_ISSET(next, &cpuset));
+
+ return next;
+}
+
+static void migrate_self(uint32_t new_pcpu)
+{
+ int ret;
+ cpu_set_t cpuset;
+ pthread_t thread;
+
+ thread = pthread_self();
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(new_pcpu, &cpuset);
+
+ pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu);
+
+ ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+
+ TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n",
+ new_pcpu, ret);
+}
+
+static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt,
+ enum arch_timer timer)
+{
+ if (timer == PHYSICAL)
+ vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt);
+ else
+ vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt);
+}
+
+static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ enum sync_cmd cmd = uc->args[1];
+ uint64_t val = uc->args[2];
+ enum arch_timer timer = uc->args[3];
+
+ switch (cmd) {
+ case SET_COUNTER_VALUE:
+ kvm_set_cntxct(vcpu, val, timer);
+ break;
+ case USERSPACE_USLEEP:
+ usleep(val);
+ break;
+ case USERSPACE_SCHED_YIELD:
+ sched_yield();
+ break;
+ case USERSPACE_MIGRATE_SELF:
+ migrate_self(next_pcpu());
+ break;
+ default:
+ break;
+ }
+}
+
+static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ /* Start on CPU 0 */
+ migrate_self(0);
+
+ while (true) {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(vcpu, &uc);
+ break;
+ case UCALL_DONE:
+ goto out;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto out;
+ default:
+ TEST_FAIL("Unexpected guest exit\n");
+ }
+ }
+
+ out:
+ return;
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu,
+ enum arch_timer timer)
+{
+ *vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ TEST_ASSERT(*vm, "Failed to create the test VM\n");
+
+ vm_init_descriptor_tables(*vm);
+ vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handler);
+
+ vcpu_init_descriptor_tables(*vcpu);
+ vcpu_args_set(*vcpu, 1, timer);
+
+ test_init_timer_irq(*vm, *vcpu);
+ vgic_v3_setup(*vm, 1, 64);
+ sync_global_to_guest(*vm, test_args);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n"
+ , name);
+ pr_info("\t-i: Number of iterations (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-b: Test both physical and virtual timers (default: true)\n");
+ pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n",
+ LONG_WAIT_TEST_MS);
+ pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n",
+ WAIT_TEST_MS);
+ pr_info("\t-p: Test physical timer (default: true)\n");
+ pr_info("\t-v: Test virtual timer (default: true)\n");
+ pr_info("\t-h: Print this help message\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) {
+ switch (opt) {
+ case 'b':
+ test_args.test_physical = true;
+ test_args.test_virtual = true;
+ break;
+ case 'i':
+ test_args.iterations =
+ atoi_positive("Number of iterations", optarg);
+ break;
+ case 'l':
+ test_args.long_wait_ms =
+ atoi_positive("Long wait time", optarg);
+ break;
+ case 'p':
+ test_args.test_physical = true;
+ test_args.test_virtual = false;
+ break;
+ case 'v':
+ test_args.test_virtual = true;
+ test_args.test_physical = false;
+ break;
+ case 'w':
+ test_args.wait_ms = atoi_positive("Wait time", optarg);
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+ err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (test_args.test_virtual) {
+ test_vm_create(&vm, &vcpu, VIRTUAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ if (test_args.test_physical) {
+ test_vm_create(&vm, &vcpu, PHYSICAL);
+ test_run(vm, vcpu);
+ kvm_vm_free(vm);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
index 709d7d721760..d43fb3f49050 100644
--- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -32,13 +32,25 @@ static struct feature_id_reg feat_id_regs[] = {
{
ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 4,
+ 8,
1
},
{
ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
- 4,
+ 8,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 16,
1
}
};
@@ -468,6 +480,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */
ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
@@ -475,6 +488,7 @@ static __u64 base_regs[] = {
ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
new file mode 100644
index 000000000000..943d65fc6b0b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+// Check that, on a GICv3 system, not configuring GICv3 correctly
+// results in all of the sysregs generating an UNDEF exception.
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+static volatile bool handled;
+
+#define __check_sr_read(r) \
+ ({ \
+ uint64_t val; \
+ \
+ handled = false; \
+ dsb(sy); \
+ val = read_sysreg_s(SYS_ ## r); \
+ val; \
+ })
+
+#define __check_sr_write(r) \
+ do { \
+ handled = false; \
+ dsb(sy); \
+ write_sysreg_s(0, SYS_ ## r); \
+ isb(); \
+ } while(0)
+
+/* Fatal checks */
+#define check_sr_read(r) \
+ do { \
+ __check_sr_read(r); \
+ __GUEST_ASSERT(handled, #r " no read trap"); \
+ } while(0)
+
+#define check_sr_write(r) \
+ do { \
+ __check_sr_write(r); \
+ __GUEST_ASSERT(handled, #r " no write trap"); \
+ } while(0)
+
+#define check_sr_rw(r) \
+ do { \
+ check_sr_read(r); \
+ check_sr_write(r); \
+ } while(0)
+
+static void guest_code(void)
+{
+ uint64_t val;
+
+ /*
+ * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having
+ * hidden the feature at runtime without any other userspace action.
+ */
+ __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC),
+ read_sysreg(id_aa64pfr0_el1)) == 0,
+ "GICv3 wrongly advertised");
+
+ /*
+ * Access all GICv3 registers, and fail if we don't get an UNDEF.
+ * Note that we happily access all the APxRn registers without
+ * checking their existance, as all we want to see is a failure.
+ */
+ check_sr_rw(ICC_PMR_EL1);
+ check_sr_read(ICC_IAR0_EL1);
+ check_sr_write(ICC_EOIR0_EL1);
+ check_sr_rw(ICC_HPPIR0_EL1);
+ check_sr_rw(ICC_BPR0_EL1);
+ check_sr_rw(ICC_AP0R0_EL1);
+ check_sr_rw(ICC_AP0R1_EL1);
+ check_sr_rw(ICC_AP0R2_EL1);
+ check_sr_rw(ICC_AP0R3_EL1);
+ check_sr_rw(ICC_AP1R0_EL1);
+ check_sr_rw(ICC_AP1R1_EL1);
+ check_sr_rw(ICC_AP1R2_EL1);
+ check_sr_rw(ICC_AP1R3_EL1);
+ check_sr_write(ICC_DIR_EL1);
+ check_sr_read(ICC_RPR_EL1);
+ check_sr_write(ICC_SGI1R_EL1);
+ check_sr_write(ICC_ASGI1R_EL1);
+ check_sr_write(ICC_SGI0R_EL1);
+ check_sr_read(ICC_IAR1_EL1);
+ check_sr_write(ICC_EOIR1_EL1);
+ check_sr_rw(ICC_HPPIR1_EL1);
+ check_sr_rw(ICC_BPR1_EL1);
+ check_sr_rw(ICC_CTLR_EL1);
+ check_sr_rw(ICC_IGRPEN0_EL1);
+ check_sr_rw(ICC_IGRPEN1_EL1);
+
+ /*
+ * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can
+ * be RAO/WI. Engage in non-fatal accesses, starting with a
+ * write of 0 to try and disable SRE, and let's see if it
+ * sticks.
+ */
+ __check_sr_write(ICC_SRE_EL1);
+ if (!handled)
+ GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n");
+
+ val = __check_sr_read(ICC_SRE_EL1);
+ if (!handled) {
+ __GUEST_ASSERT((val & BIT(0)),
+ "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n");
+ GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n");
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_undef_handler(struct ex_regs *regs)
+{
+ /* Success, we've gracefully exploded! */
+ handled = true;
+ regs->pc += 4;
+}
+
+static void test_run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ printf("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static void test_guest_no_gicv3(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Create a VM without a GICv3 */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_UNKNOWN, guest_undef_handler);
+
+ test_run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t pfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0);
+ __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0),
+ "GICv3 not supported.");
+ kvm_vm_free(vm);
+
+ test_guest_no_gicv3();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index d20981663831..2a3fe7914b72 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index a51dbd2a5f84..f4ac28d53747 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args,
KVM_INJECT_MULTI(cmd, first_intid, num);
while (irq_handled < num) {
- asm volatile("wfi\n"
- "msr daifclr, #2\n"
- /* handle IRQ */
- "msr daifset, #2\n"
- : : : "memory");
+ wfi();
+ local_irq_enable();
+ isb(); /* handle IRQ */
+ local_irq_disable();
}
- asm volatile("msr daifclr, #2" : : : "memory");
+ local_irq_enable();
GUEST_ASSERT_EQ(irq_handled, num);
for (i = first_intid; i < num + first_intid; i++)
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
index b3e97525cb55..bf461de34785 100644
--- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer)
return 0;
}
-static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+static inline void timer_set_tval(enum arch_timer timer, int32_t tval)
{
switch (timer) {
case VIRTUAL:
@@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
isb();
}
+static inline int32_t timer_get_tval(enum arch_timer timer)
+{
+ isb();
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_tval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_tval_el0);
+ default:
+ GUEST_FAIL("Could not get timer %d\n", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
{
switch (timer) {
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 9b20a355d81a..de977d131082 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
uint64_t arg6, struct arm_smccc_res *res);
+/* Execute a Wait For Interrupt instruction. */
+void wfi(void);
+
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 0ac7cc89f38c..fe4dc3693112 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
sparsebit_set_num(vm->vpages_valid, 0,
(1ULL << vm->va_bits) >> vm->page_shift);
}
+
+/* Helper to call wfi instruction. */
+void wfi(void)
+{
+ asm volatile("wfi");
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index f92c2fb23fcd..8e34f7fa44e9 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
-KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA),
-KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB),
-KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD),
-KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF),
+KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index 69849acd95b0..618cd2442390 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -184,6 +184,33 @@ static void test_apic_id(void)
kvm_vm_free(vm);
}
+static void test_x2apic_id(void)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE);
+
+ /*
+ * Try stuffing a modified x2APIC ID, KVM should ignore the value and
+ * always return the vCPU's default/readonly x2APIC ID.
+ */
+ for (i = 0; i <= 0xff; i++) {
+ *(u32 *)(lapic.regs + APIC_ID) = i << 24;
+ *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic);
+ TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24,
+ "x2APIC ID should be fully readonly");
+ }
+
+ kvm_vm_free(vm);
+}
+
int main(int argc, char *argv[])
{
struct xapic_vcpu x = {
@@ -211,4 +238,5 @@ int main(int argc, char *argv[])
kvm_vm_free(vm);
test_apic_id();
+ test_x2apic_id();
}
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 65c9c058458d..bd13257bfdfe 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-mods=(/sys/kernel/livepatch/*)
-nmods=${#mods[@]}
-if [ "$nmods" -ne 1 ]; then
- die "Expecting only one moduled listed, found $nmods"
-fi
+loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' ||
+ die "Expecting only one moduled listed, found $nmods"
# These modules were disabled by the atomic replace
for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
index 06d24d4679a6..1cc8a977c711 100644
--- a/tools/testing/selftests/lsm/lsm_list_modules_test.c
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -128,6 +128,9 @@ TEST(correct_lsm_list_modules)
case LSM_ID_EVM:
name = "evm";
break;
+ case LSM_ID_IPE:
+ name = "ipe";
+ break;
default:
name = "INVALID";
break;
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index 901e0d07765b..5f2ca591c956 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate
TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_hugetlb
TEST_GEN_FILES += map_populate
+ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64))
TEST_GEN_FILES += memfd_secret
+endif
TEST_GEN_FILES += migration
TEST_GEN_FILES += mkdirty
TEST_GEN_FILES += mlock-random-test
@@ -104,13 +106,13 @@ TEST_GEN_FILES += $(BINARIES_64)
endif
else
-ifneq (,$(findstring $(ARCH),powerpc))
+ifneq (,$(filter $(ARCH),arm64 powerpc))
TEST_GEN_FILES += protection_keys
endif
endif
-ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64))
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390))
TEST_GEN_FILES += va_high_addr_switch
TEST_GEN_FILES += virtual_address_range
TEST_GEN_FILES += write_to_hugetlbfs
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index e140558e6f53..2c3a0eb6b22d 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
int fd, ret = -1;
int compaction_index = 0;
char nr_hugepages[20] = {0};
- char init_nr_hugepages[20] = {0};
+ char init_nr_hugepages[24] = {0};
- sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages);
+ snprintf(init_nr_hugepages, sizeof(init_nr_hugepages),
+ "%lu", initial_nr_hugepages);
/* We want to test with 80% of available memory. Else, OOM killer comes
in to play */
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 1b03bcfaefdf..5a3a9bcba640 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -22,8 +22,10 @@
#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
#define SIZE_KB(k) ((size_t)k * 1024)
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
index a818f010de47..bfcea5cf9a48 100644
--- a/tools/testing/selftests/mm/mseal_test.c
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static int sys_munmap(void *ptr, size_t size)
{
int sret;
@@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut)
{
void *ptr;
- ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
*ptrOut = ptr;
}
@@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut)
void *ptr;
unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
- ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
*ptrOut = ptr;
}
@@ -205,7 +194,7 @@ bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
@@ -481,8 +470,8 @@ static void test_seal_zero_address(void)
int prot;
/* use mmap to change protection. */
- ptr = sys_mmap(0, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ptr = mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
FAIL_TEST_IF_FALSE(ptr == 0);
size = get_vma_size(ptr, &prot);
@@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal)
}
/* use mmap to change protection. */
- ret2 = sys_mmap(ptr, size, PROT_NONE,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal)
}
/* use mmap to expand. */
- ret2 = sys_mmap(ptr, size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal)
}
/* use mmap to shrink. */
- ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
- MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ ret2 = mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
if (seal) {
FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
FAIL_TEST_IF_FALSE(errno == EPERM);
@@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal)
ret = fallocate(fd, 0, 0, size);
FAIL_TEST_IF_FALSE(!ret);
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0);
FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
if (seal) {
@@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal)
int ret;
unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
- ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0);
FAIL_TEST_IF_FALSE(ptr != (void *)-1);
if (seal) {
diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h
new file mode 100644
index 000000000000..580e1b0bb38e
--- /dev/null
+++ b/tools/testing/selftests/mm/pkey-arm64.h
@@ -0,0 +1,139 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Arm Ltd.
+ */
+
+#ifndef _PKEYS_ARM64_H
+#define _PKEYS_ARM64_H
+
+#include "vm_util.h"
+/* for signal frame parsing */
+#include "../arm64/signal/testcases/testcases.h"
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 288
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 289
+# define SYS_pkey_free 290
+#endif
+#define MCONTEXT_IP(mc) mc.pc
+#define MCONTEXT_TRAPNO(mc) -1
+
+#define PKEY_MASK 0xf
+
+#define POE_NONE 0x0
+#define POE_X 0x2
+#define POE_RX 0x3
+#define POE_RWX 0x7
+
+#define NR_PKEYS 8
+#define NR_RESERVED_PKEYS 1 /* pkey-0 */
+
+#define PKEY_ALLOW_ALL 0x77777777
+
+#define PKEY_BITS_PER_PKEY 4
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+#undef HPAGE_SIZE
+#define HPAGE_SIZE default_huge_page_size()
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+static inline u64 __read_pkey_reg(void)
+{
+ u64 pkey_reg = 0;
+
+ // POR_EL0
+ asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ u64 por = pkey_reg;
+
+ dprintf4("%s() changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ // POR_EL0
+ asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :);
+
+ dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+ /* No simple way to determine this */
+ return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ return PTR_ERR_ENOTSUP;
+}
+
+#define set_pkey_bits set_pkey_bits
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+ u64 new_val = POE_RWX;
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+
+ if (flags & PKEY_DISABLE_ACCESS)
+ new_val = POE_X;
+ else if (flags & PKEY_DISABLE_WRITE)
+ new_val = POE_RX;
+
+ /* OR in new bits for pkey */
+ reg |= new_val << shift;
+
+ return reg;
+}
+
+#define get_pkey_bits get_pkey_bits
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /*
+ * shift down the relevant bits to the lowest four, then
+ * mask off all the other higher bits
+ */
+ u32 perm = (reg >> shift) & PKEY_MASK;
+
+ if (perm == POE_X)
+ return PKEY_DISABLE_ACCESS;
+ if (perm == POE_RX)
+ return PKEY_DISABLE_WRITE;
+ return 0;
+}
+
+static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey)
+{
+ struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt);
+ struct poe_context *poe_ctx =
+ (struct poe_context *) get_header(ctx, POE_MAGIC,
+ sizeof(uctxt->uc_mcontext), NULL);
+ if (poe_ctx)
+ poe_ctx->por_el0 = pkey;
+}
+
+#endif /* _PKEYS_ARM64_H */
diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 1af3156a9db8..15608350fc01 100644
--- a/tools/testing/selftests/mm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -91,12 +91,17 @@ void record_pkey_malloc(void *ptr, long size, int prot);
#include "pkey-x86.h"
#elif defined(__powerpc64__) /* arch */
#include "pkey-powerpc.h"
+#elif defined(__aarch64__) /* arch */
+#include "pkey-arm64.h"
#else /* arch */
#error Architecture not supported
#endif /* arch */
+#ifndef PKEY_MASK
#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+#ifndef set_pkey_bits
static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
{
u32 shift = pkey_bit_position(pkey);
@@ -106,7 +111,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
reg |= (flags & PKEY_MASK) << shift;
return reg;
}
+#endif
+#ifndef get_pkey_bits
static inline u64 get_pkey_bits(u64 reg, int pkey)
{
u32 shift = pkey_bit_position(pkey);
@@ -116,6 +123,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey)
*/
return ((reg >> shift) & PKEY_MASK);
}
+#endif
extern u64 shadow_pkey_reg;
diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index ae5df26104e5..3d0c0bdae5bc 100644
--- a/tools/testing/selftests/mm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -8,7 +8,10 @@
# define SYS_pkey_free 385
#endif
#define REG_IP_IDX PT_NIP
+#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO]
#define REG_TRAPNO PT_TRAP
+#define MCONTEXT_FPREGS
#define gregs gp_regs
#define fpregs fp_regs
#define si_pkey_offset 0x20
diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 814758e109c0..5f28e26a2511 100644
--- a/tools/testing/selftests/mm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -15,6 +15,10 @@
#endif
+#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX]
+#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO]
+#define MCONTEXT_FPREGS
+
#ifndef PKEY_DISABLE_ACCESS
# define PKEY_DISABLE_ACCESS 0x1
#endif
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index eaa6d1fc5328..0789981b72b9 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -147,7 +147,7 @@ void abort_hooks(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
-#ifdef __powerpc64__
+#if defined(__powerpc64__) || defined(__aarch64__)
/* This way, both 4K and 64K alignment are maintained */
__attribute__((__aligned__(65536)))
#else
@@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights;
- u64 orig_pkey_reg = read_pkey_reg();
dprintf1("START->%s(%d, 0x%x)\n", __func__,
pkey, flags);
@@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
__func__, pkey, read_pkey_reg());
- if (flags)
- pkey_assert(read_pkey_reg() >= orig_pkey_reg);
dprintf1("END<---%s(%d, 0x%x)\n", __func__,
pkey, flags);
}
@@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags)
unsigned long syscall_flags = 0;
int ret;
int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u64 orig_pkey_reg = read_pkey_reg();
pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
@@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags)
dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
pkey, read_pkey_reg());
- if (flags)
- assert(read_pkey_reg() <= orig_pkey_reg);
}
void pkey_write_allow(int pkey)
@@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
+#ifdef MCONTEXT_FPREGS
char *fpregs;
+#endif
#if defined(__i386__) || defined(__x86_64__) /* arch */
u32 *pkey_reg_ptr;
int pkey_reg_offset;
@@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
__func__, __LINE__,
__read_pkey_reg(), shadow_pkey_reg);
- trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
- ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
+ trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
+ ip = MCONTEXT_IP(uctxt->uc_mcontext);
+#ifdef MCONTEXT_FPREGS
fpregs = (char *) uctxt->uc_mcontext.fpregs;
+#endif
dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
__func__, trapno, ip, si_code_str(si->si_code),
@@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#endif /* arch */
dprintf1("siginfo: %p\n", si);
+#ifdef MCONTEXT_FPREGS
dprintf1(" fpregs: %p\n", fpregs);
+#endif
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
#elif defined(__powerpc64__) /* arch */
/* restore access and let the faulting instruction continue */
pkey_access_allow(siginfo_pkey);
+#elif defined(__aarch64__)
+ aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL);
#endif /* arch */
pkey_faults++;
dprintf1("<<<<==================================================\n");
@@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey)
* test program continue. We now have to restore it.
*/
if (__read_pkey_reg() != 0)
-#else /* arch */
+#elif defined(__aarch64__)
+ if (__read_pkey_reg() != PKEY_ALLOW_ALL)
+#else
if (__read_pkey_reg() != shadow_pkey_reg)
#endif /* arch */
pkey_assert(0);
@@ -1492,6 +1496,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
lots_o_noops_around_write(&scratch);
do_not_expect_pkey_fault("executing on PROT_EXEC memory");
expect_fault_on_read_execonly_key(p1, pkey);
+
+ // Reset back to PROT_EXEC | PROT_READ for architectures that support
+ // non-PKEY execute-only permissions.
+ ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
+ pkey_assert(!ret);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1665,6 +1674,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
}
#endif
+#if defined(__aarch64__)
+void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+ pid_t child;
+ int status, ret;
+ struct iovec iov;
+ u64 trace_pkey;
+ /* Just a random pkey value.. */
+ u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
+ (POE_NONE << PKEY_BITS_PER_PKEY) |
+ POE_RWX;
+
+ child = fork();
+ pkey_assert(child >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+ if (!child) {
+ ptrace(PTRACE_TRACEME, 0, 0, 0);
+
+ /* Stop and allow the tracer to modify PKRU directly */
+ raise(SIGSTOP);
+
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ if (__read_pkey_reg() != new_pkey)
+ exit(1);
+
+ raise(SIGSTOP);
+
+ exit(0);
+ }
+
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ iov.iov_base = &trace_pkey;
+ iov.iov_len = 8;
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == read_pkey_reg());
+
+ trace_pkey = new_pkey;
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ /* Execute the tracee */
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value change */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(&trace_pkey, 0, sizeof(trace_pkey));
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(trace_pkey == new_pkey);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFEXITED(status));
+ pkey_assert(WEXITSTATUS(status) == 0);
+}
+#endif
+
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
@@ -1700,7 +1787,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
test_pkey_alloc_free_attach_pkey0,
-#if defined(__i386__) || defined(__x86_64__)
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
test_ptrace_modifies_pkru,
#endif
};
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 03ac4f2e1cce..36045edb10de 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
CATEGORY="madv_populate" run_test ./madv_populate
+if [ -x ./memfd_secret ]
+then
(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
CATEGORY="memfd_secret" run_test ./memfd_secret
+fi
# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
CATEGORY="ksm" run_test ./ksm_tests -H -s 100
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
index 7aa1366063e4..d9f8ba8d5050 100644
--- a/tools/testing/selftests/mm/seal_elf.c
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len)
return sret;
}
-static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
- unsigned long flags, unsigned long fd, unsigned long offset)
-{
- void *sret;
-
- errno = 0;
- sret = (void *) syscall(__NR_mmap, addr, len, prot,
- flags, fd, offset);
- return sret;
-}
-
static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
{
int sret;
@@ -56,7 +45,7 @@ static bool seal_support(void)
void *ptr;
unsigned long page_size = getpagesize();
- ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == (void *) -1)
return false;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 666ab7d9390b..1c04c780db66 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -17,6 +17,7 @@ ipv6_flowlabel
ipv6_flowlabel_mgr
log.txt
msg_zerocopy
+ncdevmem
nettest
psock_fanout
psock_snd
@@ -34,6 +35,7 @@ scm_pidfd
scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
+sk_so_peek_off
socket
so_incoming_cpu
so_netns_cookie
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 8eaffd7a641c..649f1fe0dc46 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
@@ -80,12 +80,14 @@ TEST_PROGS += io_uring_zerocopy_tx.sh
TEST_GEN_FILES += bind_bhash
TEST_GEN_PROGS += sk_bind_sendto_listen
TEST_GEN_PROGS += sk_connect_zero_addr
+TEST_GEN_PROGS += sk_so_peek_off
TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_FILES += bind_wildcard
+TEST_GEN_PROGS += bind_wildcard
+TEST_GEN_PROGS += bind_timewait
TEST_PROGS += test_vxlan_mdb.sh
TEST_PROGS += test_bridge_neigh_suppress.sh
TEST_PROGS += test_vxlan_nolocalbypass.sh
@@ -95,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
TEST_PROGS += bpf_offload.py
+# YNL files, must be before "include ..lib.mk"
+EXTRA_CLEAN += $(OUTPUT)/libynl.a
+YNL_GEN_FILES := ncdevmem
+TEST_GEN_FILES += $(YNL_GEN_FILES)
+
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
@@ -104,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+# YNL build
+YNL_GENS := netdev
+include ynl.mk
+
$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 16d0c172eaeb..3ed3882a93b8 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata,
static void __recvpair(struct __test_metadata *_metadata,
FIXTURE_DATA(msg_oob) *self,
- const void *expected_buf, int expected_len,
+ const char *expected_buf, int expected_len,
int buf_len, int flags)
{
int i, ret[2], recv_errno[2], expected_errno = 0;
@@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2)
}
}
+TEST_F(msg_oob, ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
TEST_F(msg_oob, ex_oob_ahead_break)
{
sendpair("hello", 5, MSG_OOB);
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 386ebd829df5..899dbad0104b 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
fi
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
declare -i nfail=0
declare -i nsuccess=0
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index ac0b2c6a5761..77c83d9508d3 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -78,7 +78,12 @@ log_test()
else
ret=1
nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [[ $rc -eq $ksft_skip ]]; then
+ printf "TEST: %-60s [SKIP]\n" "${msg}"
+ else
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+
if [ "$VERBOSE" = "1" ]; then
echo " rc=$rc, expected $expected"
fi
@@ -923,6 +928,29 @@ ipv6_grp_fcnal()
ipv6_grp_refs
log_test $? 0 "Nexthop group replace refcounts"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_res_grp_fcnal()
@@ -987,6 +1015,31 @@ ipv6_res_grp_fcnal()
check_nexthop_bucket "list id 102" \
"id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+
+ #
+ # 16-bit weights.
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1"
+ run_cmd "$IP nexthop add id 66 dev veth1"
+
+ run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32"
+ if [[ $? == 0 ]]; then
+ local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(:
+ )type resilient buckets 32 idle_timer 0 $(:
+ )unbalanced_timer 0"
+ run_cmd "$IP nexthop replace $GRP"
+ check_nexthop "id 103" "$GRP unbalanced_time 0"
+ rc=$?
+ else
+ rc=$ksft_skip
+ fi
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ log_test $rc 0 "16-bit weights"
}
ipv6_fcnal_runtime()
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 7c01f58a20de..1d58b3b87465 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -35,18 +35,13 @@ log_test()
local expected=$2
local msg="$3"
- $IP rule show | grep -q l3mdev
- if [ $? -eq 0 ]; then
- msg="$msg (VRF)"
- fi
-
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -56,39 +51,6 @@ log_test()
fi
}
-log_section()
-{
- echo
- echo "######################################################################"
- echo "TEST SECTION: $*"
- echo "######################################################################"
-}
-
-check_nettest()
-{
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- # Add the selftest directory to PATH if not already done
- if [ "${SELFTEST_PATH}" = "" ]; then
- SELFTEST_PATH="$(dirname $0)"
- PATH="${PATH}:${SELFTEST_PATH}"
-
- # Now retry with the new path
- if which nettest > /dev/null 2>&1; then
- return 0
- fi
-
- if [ "${ret}" -eq 0 ]; then
- ret="${ksft_skip}"
- fi
- echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
- fi
-
- return 1
-}
-
setup()
{
set -e
@@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP -6 rule add $match table $RTABLE
$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule6 check: $description"
+ $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule6 check: $nomatch_description"
+
fib_rule6_del_by_pref "$match"
log_test $? 0 "rule6 del by pref: $description"
}
@@ -213,18 +180,27 @@ fib_rule6_test_reject()
fib_rule6_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv6 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
match="oif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
match="from $SRC_IP6 iif $DEV"
- fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -238,44 +214,89 @@ fib_rule6_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule6_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule6_test_match_n_redirect "$match" \
+ "from $SRC_IP6 iif $DEV $getmatch" \
+ "from $SRC_IP6 iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+ getnomatch="ipproto udp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto ipv6-icmp"
- fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule6_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto ipv6-icmp match" \
+ "ipproto ipv6-tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP6 iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP6 iif $DEV tos 0xf4"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
fi
}
fib_rule6_vrf_test()
{
setup_vrf
- fib_rule6_test
+ fib_rule6_test "- with VRF"
cleanup_vrf
}
@@ -285,10 +306,8 @@ fib_rule6_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv6 FIB rule connect tests"
setup_peer
$IP -6 rule add dsfield 0x04 table $RTABLE_PEER
@@ -306,7 +325,45 @@ fib_rule6_connect_test()
log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+ -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)"
+
$IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -6 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp udp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dscp tcp connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp udp no connect"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 1 "rule6 dscp tcp no connect"
+
+ $IP -6 rule del dscp 0x3f table $RTABLE_PEER
+
cleanup_peer
}
@@ -326,12 +383,17 @@ fib_rule4_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
- local description="$3"
+ local getnomatch="$3"
+ local description="$4"
+ local nomatch_description="$5"
$IP rule add $match table $RTABLE
$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
log_test $? 0 "rule4 check: $description"
+ $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE"
+ log_test $? 1 "rule4 check: $nomatch_description"
+
fib_rule4_del_by_pref "$match"
log_test $? 0 "rule4 del by pref: $description"
}
@@ -352,23 +414,31 @@ fib_rule4_test_reject()
fib_rule4_test()
{
+ local ext_name=$1; shift
+ local getnomatch
local getmatch
local match
local cnt
+ echo
+ echo "IPv4 FIB rule tests $ext_name"
+
# setup the fib rule redirect route
$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
match="oif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "oif redirect to table" "oif no redirect to table"
- # need enable forwarding and disable rp_filter temporarily as all the
- # addresses are in the same subnet and egress device == ingress device.
+ # Enable forwarding and disable rp_filter as all the addresses are in
+ # the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
- fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
- ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
+ "iif redirect to table" "iif no redirect to table"
# Reject dsfield (tos) options which have ECN bits set
for cnt in $(seq 1 3); do
@@ -382,44 +452,90 @@ fib_rule4_test()
# Using option 'tos' instead of 'dsfield' as old iproute2
# versions don't support 'dsfield' in ip rule show.
getmatch="tos $cnt"
+ getnomatch="tos 0x20"
fib_rule4_test_match_n_redirect "$match" "$getmatch" \
- "$getmatch redirect to table"
+ "$getnomatch" "$getmatch redirect to table" \
+ "$getnomatch no redirect to table"
+ done
+
+ # Re-test TOS matching, but with input routes since they are handled
+ # differently from output routes.
+ match="tos 0x10"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ getmatch="tos $cnt"
+ getnomatch="tos 0x20"
+ fib_rule4_test_match_n_redirect "$match" \
+ "from $SRC_IP iif $DEV $getmatch" \
+ "from $SRC_IP iif $DEV $getnomatch" \
+ "iif $getmatch redirect to table" \
+ "iif $getnomatch no redirect to table"
done
match="fwmark 0x64"
getmatch="mark 0x64"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+ getnomatch="mark 0x63"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \
+ "fwmark redirect to table" "fwmark no redirect to table"
fib_check_iproute_support "uidrange" "uid"
if [ $? -eq 0 ]; then
match="uidrange 100-100"
getmatch="uid 100"
- fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+ getnomatch="uid 101"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "uid redirect to table" \
+ "uid no redirect to table"
fi
fib_check_iproute_support "sport" "sport"
if [ $? -eq 0 ]; then
match="sport 666 dport 777"
- fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+ getnomatch="sport 667 dport 778"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "sport and dport redirect to table" \
+ "sport and dport no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto tcp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+ getnomatch="ipproto udp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto tcp match" \
+ "ipproto udp no match"
fi
fib_check_iproute_support "ipproto" "ipproto"
if [ $? -eq 0 ]; then
match="ipproto icmp"
- fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+ getnomatch="ipproto tcp"
+ fib_rule4_test_match_n_redirect "$match" "$match" \
+ "$getnomatch" "ipproto icmp match" \
+ "ipproto tcp no match"
+ fi
+
+ fib_check_iproute_support "dscp" "tos"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x3f"
+ getmatch="tos 0xfc"
+ getnomatch="tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp redirect to table" \
+ "dscp no redirect to table"
+
+ match="dscp 0x3f"
+ getmatch="from $SRC_IP iif $DEV tos 0xfc"
+ getnomatch="from $SRC_IP iif $DEV tos 0xf4"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp redirect to table" \
+ "iif dscp no redirect to table"
fi
}
fib_rule4_vrf_test()
{
setup_vrf
- fib_rule4_test
+ fib_rule4_test "- with VRF"
cleanup_vrf
}
@@ -429,10 +545,8 @@ fib_rule4_connect_test()
{
local dsfield
- if ! check_nettest; then
- echo "SKIP: Could not run test without nettest tool"
- return
- fi
+ echo
+ echo "IPv4 FIB rule connect tests"
setup_peer
$IP -4 rule add dsfield 0x04 table $RTABLE_PEER
@@ -450,16 +564,46 @@ fib_rule4_connect_test()
log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
done
+ # Check that UDP and TCP connections fail when using a DS Field that
+ # does not match the previously configured FIB rule.
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)"
+
$IP -4 rule del dsfield 0x04 table $RTABLE_PEER
- cleanup_peer
-}
-run_fibrule_tests()
-{
- log_section "IPv4 fib rule"
- fib_rule4_test
- log_section "IPv6 fib rule"
- fib_rule6_test
+ ip rule help 2>&1 | grep -q dscp
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 iprule too old, missing dscp match"
+ cleanup_peer
+ return
+ fi
+
+ $IP -4 rule add dscp 0x3f table $RTABLE_PEER
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp udp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dscp tcp connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp udp no connect"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 1 "rule4 dscp tcp no connect"
+
+ $IP -4 rule del dscp 0x3f table $RTABLE_PEER
+
+ cleanup_peer
}
################################################################################
# usage
@@ -495,6 +639,8 @@ if [ ! -x "$(command -v ip)" ]; then
exit $ksft_skip
fi
+check_gen_prog "nettest"
+
# start clean
cleanup &> /dev/null
setup
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index 7fdb6a9ca543..a652429bfd53 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
Unfortunately, these namespaces can not be used with actual switching
ASICs, as their ports can not be migrated to other network namespaces
-(NETIF_F_NETNS_LOCAL) and most of them probably do not support the
+(dev->netns_local) and most of them probably do not support the
L1-separation provided by namespaces.
However, a similar kind of flexibility can be achieved by using VRFs and
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 64bd00fe9a4f..90f8a244ea90 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -142,6 +142,58 @@ extern_learn()
bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null
}
+other_tpid()
+{
+ local mac=de:ad:be:ef:13:37
+
+ # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are
+ # classified as untagged by a bridge with vlan_protocol 802.1Q, and
+ # are processed in the PVID of the ingress port (here 1). Not VID 3,
+ # and not VID 5.
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+ ip link set $h2 promisc on
+ ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ # Match on 'self' addresses as well, for those drivers which
+ # do not push their learned addresses to the bridge software
+ # database
+ bridge -j fdb show $swp1 | \
+ jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null
+ check_err $? "FDB entry was not learned when it should"
+
+ log_test "FDB entry in PVID for VLAN-tagged with other TPID"
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was not forwarded when it should"
+ log_test "Reception of VLAN with other TPID as untagged"
+
+ bridge vlan del dev $swp1 vid 1
+
+ $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ RET=0
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err $? "Packet was forwarded when should not"
+ log_test "Reception of VLAN with other TPID as untagged (no PVID)"
+
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 1783c10215e5..7d531f7091e6 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -224,10 +224,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index 9788bd0f6e8b..dda11a4a9450 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -319,10 +319,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index 2ab9eaaa5532..e28b4a079e52 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -321,10 +321,10 @@ send_dst_ipv6()
send_flowlabel()
{
# Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec v$h1 \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
+ ip vrf exec v$h1 sh -c \
+ "for _ in {1..16384}; do \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ done"
}
send_src_udp6()
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index ff96bb7535ff..c992e385159c 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -500,6 +500,11 @@ check_err_fail()
fi
}
+xfail()
+{
+ FAIL_TO_XFAIL=yes "$@"
+}
+
xfail_on_slow()
{
if [[ $KSFT_MACHINE_SLOW = yes ]]; then
@@ -509,6 +514,13 @@ xfail_on_slow()
fi
}
+omit_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW != yes ]]; then
+ "$@"
+ fi
+}
+
xfail_on_veth()
{
local dev=$1; shift
@@ -1113,6 +1125,39 @@ mac_get()
ip -j link show dev $if_name | jq -r '.[]["address"]'
}
+ether_addr_to_u64()
+{
+ local addr="$1"
+ local order="$((1 << 40))"
+ local val=0
+ local byte
+
+ addr="${addr//:/ }"
+
+ for byte in $addr; do
+ byte="0x$byte"
+ val=$((val + order * byte))
+ order=$((order >> 8))
+ done
+
+ printf "0x%x" $val
+}
+
+u64_to_ether_addr()
+{
+ local val=$1
+ local byte
+ local i
+
+ for ((i = 40; i >= 0; i -= 8)); do
+ byte=$(((val & (0xff << i)) >> i))
+ printf "%02x" $byte
+ if [ $i -ne 0 ]; then
+ printf ":"
+ fi
+ done
+}
+
ipv6_lladdr_get()
{
local if_name=$1
@@ -2229,3 +2274,22 @@ absval()
echo $((v > 0 ? v : -v))
}
+
+has_unicast_flt()
+{
+ local dev=$1; shift
+ local mac_addr=$(mac_get $dev)
+ local tmp=$(ether_addr_to_u64 $mac_addr)
+ local promisc
+
+ ip link set $dev up
+ ip link add link $dev name macvlan-tmp type macvlan mode private
+ ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1)))
+ ip link set macvlan-tmp up
+
+ promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity')
+
+ ip link del macvlan-tmp
+
+ [[ $promisc == 1 ]] && echo "no" || echo "yes"
+}
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index 4b364cdf3ef0..c35548767756 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -1,7 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="standalone bridge"
+ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \
+ vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \
+ vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge"
NUM_NETIFS=2
PING_COUNT=1
REQUIRE_MTOOLS=yes
@@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
-NON_IP_MC="01:02:03:04:05:06"
-NON_IP_PKT="00:04 48:45:4c:4f"
-BC="ff:ff:ff:ff:ff:ff"
+PTP_1588_L2_SYNC=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00"
+PTP_1588_L2_FOLLOW_UP=" \
+01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c5 f1 17 97 ed f0"
+PTP_1588_L2_PDELAY_REQ=" \
+01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00"
+PTP_1588_IPV4_SYNC=" \
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \
+01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \
+02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
+PTP_1588_IPV4_FOLLOW_UP="
+01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \
+00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \
+01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \
+c6 0f 1d 9a 61 87"
+PTP_1588_IPV4_PDELAY_REQ=" \
+01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \
+00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \
+00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \
+63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_SYNC=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \
+7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \
+00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \
+00 00 00 00 00 00 00 00 00 00 00 00"
+PTP_1588_IPV6_FOLLOW_UP=" \
+33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \
+00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \
+00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \
+00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \
+00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \
+00 00 66 83 c6 2a 32 09 bd 74 00 00"
+PTP_1588_IPV6_PDELAY_REQ=" \
+33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \
+5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \
+63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \
+00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \
+00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \
+00 00 00 00 00 00"
# Disable promisc to ensure we don't receive unknown MAC DA packets
export TCPDUMP_EXTRA_FLAGS="-pl"
@@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl"
h1=${NETIFS[p1]}
h2=${NETIFS[p2]}
-send_non_ip()
+send_raw()
{
- local if_name=$1
- local smac=$2
- local dmac=$3
+ local if_name=$1; shift
+ local pkt="$1"; shift
+ local smac=$(mac_get $if_name)
+
+ pkt="${pkt/00:00:de:ad:be:ef/$smac}"
- $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+ $MZ -q $if_name "$pkt"
}
send_uc_ipv4()
@@ -68,10 +131,11 @@ send_uc_ipv4()
check_rcv()
{
- local if_name=$1
- local type=$2
- local pattern=$3
- local should_receive=$4
+ local if_name=$1; shift
+ local type=$1; shift
+ local pattern=$1; shift
+ local should_receive=$1; shift
+ local test_name="$1"; shift
local should_fail=
[ $should_receive = true ] && should_fail=0 || should_fail=1
@@ -81,7 +145,7 @@ check_rcv()
check_err_fail "$should_fail" "$?" "reception"
- log_test "$if_name: $type"
+ log_test "$test_name: $type"
}
mc_route_prepare()
@@ -104,44 +168,78 @@ mc_route_destroy()
run_test()
{
- local rcv_if_name=$1
- local smac=$(mac_get $h1)
+ local send_if_name=$1; shift
+ local rcv_if_name=$1; shift
+ local skip_ptp=$1; shift
+ local no_unicast_flt=$1; shift
+ local test_name="$1"; shift
+ local smac=$(mac_get $send_if_name)
local rcv_dmac=$(mac_get $rcv_if_name)
+ local should_receive
tcpdump_start $rcv_if_name
- mc_route_prepare $h1
+ mc_route_prepare $send_if_name
mc_route_prepare $rcv_if_name
- send_uc_ipv4 $h1 $rcv_dmac
- send_uc_ipv4 $h1 $MACVLAN_ADDR
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+ send_uc_ipv4 $send_if_name $rcv_dmac
+ send_uc_ipv4 $send_if_name $MACVLAN_ADDR
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1
ip link set dev $rcv_if_name promisc on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2
ip link set dev $rcv_if_name promisc off
mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
- mc_send $h1 $JOINED_IPV4_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV4_MC_ADDR
mc_leave
mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
- mc_send $h1 $JOINED_IPV6_MC_ADDR
+ mc_send $send_if_name $JOINED_IPV6_MC_ADDR
mc_leave
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1
ip link set dev $rcv_if_name allmulticast on
- send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
- mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
- mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+ send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3
+ mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3
ip link set dev $rcv_if_name allmulticast off
mc_route_destroy $rcv_if_name
- mc_route_destroy $h1
+ mc_route_destroy $send_if_name
+
+ if [ $skip_ptp = false ]; then
+ ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_SYNC"
+ send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP"
+ ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name
+
+ ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name
+ send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ"
+ ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name
+
+ mc_join $rcv_if_name 224.0.1.129
+ send_raw $send_if_name "$PTP_1588_IPV4_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name 224.0.0.107
+ send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ"
+ mc_leave
+
+ mc_join $rcv_if_name ff0e::181
+ send_raw $send_if_name "$PTP_1588_IPV6_SYNC"
+ send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP"
+ mc_leave
+
+ mc_join $rcv_if_name ff02::6b
+ send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ"
+ mc_leave
+ fi
sleep 1
@@ -149,61 +247,99 @@ run_test()
check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
- check_rcv $rcv_if_name \
- "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ [ $no_unicast_flt = true ] && should_receive=true || should_receive=false
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ $should_receive "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name \
"Multicast IPv4 to unknown group" \
"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
- xfail_on_veth $h1 \
+ xfail \
check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ false "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
- true
+ true "$test_name"
+
+ if [ $skip_ptp = false ]; then
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \
+ "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \
+ "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \
+ true "$test_name"
+
+ check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \
+ "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \
+ true "$test_name"
+ fi
tcpdump_cleanup $rcv_if_name
}
@@ -228,62 +364,217 @@ h2_destroy()
simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
}
+h1_vlan_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_vlan_destroy()
+{
+ vlan_destroy $h1 100
+ simple_if_fini $h1
+}
+
+h2_vlan_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_vlan_destroy()
+{
+ vlan_destroy $h2 100
+ simple_if_fini $h2
+}
+
bridge_create()
{
- ip link add br0 type bridge
+ local vlan_filtering=$1
+
+ ip link add br0 type bridge vlan_filtering $vlan_filtering
ip link set br0 address $BRIDGE_ADDR
ip link set br0 up
ip link set $h2 master br0
ip link set $h2 up
-
- simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
}
bridge_destroy()
{
- simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
-
ip link del br0
}
-standalone()
+macvlan_create()
{
- h1_create
- h2_create
+ local lower=$1
- ip link add link $h2 name macvlan0 type macvlan mode private
+ ip link add link $lower name macvlan0 type macvlan mode private
ip link set macvlan0 address $MACVLAN_ADDR
ip link set macvlan0 up
+}
- run_test $h2
-
+macvlan_destroy()
+{
ip link del macvlan0
+}
+
+standalone()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_create
+ h2_create
+ macvlan_create $h2
+
+ run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2"
+
+ macvlan_destroy
h2_destroy
h1_destroy
}
-bridge()
+test_bridge()
{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
h1_create
- bridge_create
+ bridge_create $vlan_filtering
+ simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0
- ip link add link br0 name macvlan0 type macvlan mode private
- ip link set macvlan0 address $MACVLAN_ADDR
- ip link set macvlan0 up
+ run_test $h1 br0 $skip_ptp $no_unicast_flt \
+ "vlan_filtering=$vlan_filtering bridge"
- run_test br0
+ macvlan_destroy
+ simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+ bridge_destroy
+ h1_destroy
+}
- ip link del macvlan0
+vlan_unaware_bridge()
+{
+ test_bridge 0
+}
+
+vlan_aware_bridge()
+{
+ test_bridge 1
+}
+
+test_vlan()
+{
+ local no_unicast_flt=true
+ local skip_ptp=false
+
+ if [ $(has_unicast_flt $h2) = yes ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ macvlan_create $h2.100
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper"
+
+ macvlan_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_bridged_port()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=false
+
+ # br_manage_promisc() will not force a single vlan_filtering port to
+ # promiscuous mode, so we should still expect unicast filtering to take
+ # place if the device can do it.
+ if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then
+ no_unicast_flt=false
+ fi
+
+ h1_vlan_create
+ h2_vlan_create
+ bridge_create $vlan_filtering
+ macvlan_create $h2.100
+
+ run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridged port"
+
+ macvlan_destroy
bridge_destroy
- h1_destroy
+ h2_vlan_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridged_port()
+{
+ vlan_over_bridged_port 0
+}
+
+vlan_over_vlan_aware_bridged_port()
+{
+ vlan_over_bridged_port 1
+}
+
+vlan_over_bridge()
+{
+ local no_unicast_flt=true
+ local vlan_filtering=$1
+ local skip_ptp=true
+
+ h1_vlan_create
+ bridge_create $vlan_filtering
+ simple_if_init br0
+ vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64
+ macvlan_create br0.100
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan add dev $h2 vid 100 master
+ bridge vlan add dev br0 vid 100 self
+ fi
+
+ run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \
+ "VLAN over vlan_filtering=$vlan_filtering bridge"
+
+ if [ $vlan_filtering = 1 ]; then
+ bridge vlan del dev br0 vid 100 self
+ bridge vlan del dev $h2 vid 100 master
+ fi
+
+ macvlan_destroy
+ vlan_destroy br0 100
+ simple_if_fini br0
+ bridge_destroy
+ h1_vlan_destroy
+}
+
+vlan_over_vlan_unaware_bridge()
+{
+ vlan_over_bridge 0
+}
+
+vlan_over_vlan_aware_bridge()
+{
+ vlan_over_bridge 1
}
cleanup()
{
pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
vrf_cleanup
}
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
index af3b398d13f0..9e677aa64a06 100755
--- a/tools/testing/selftests/net/forwarding/no_forwarding.sh
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -233,6 +233,9 @@ cleanup()
{
pre_cleanup
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
h2_destroy
h1_destroy
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 2ba44247c60a..a7d8399c8d4f 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
ping_ipv4_blackhole
ping_ipv6_blackhole
nh_stats_test_v4
@@ -226,9 +227,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -242,7 +245,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -275,7 +281,8 @@ multipath6_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -313,6 +320,23 @@ multipath_test()
multipath6_test "Weighted MP 11:45" 11 45
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ multipath4_test "65535:65535" 65535 65535
+ multipath4_test "128:512" 128 512
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ log_info "Running 16-bit IPv6 multipath tests"
+ multipath6_test "65535:65535" 65535 65535
+ multipath6_test "128:512" 128 512
+ omit_on_slow \
+ multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+}
+
ping_ipv4_blackhole()
{
RET=0
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index 2903294d8bca..507b2852dabe 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -117,3 +117,16 @@ __nh_stats_test_v6()
$MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
+
+check_nhgw16()
+{
+ local nhid=$1; shift
+
+ ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null
+ if (( $? )); then
+ log_test_skip "16-bit multipath tests" \
+ "iproute2 or the kernel do not support 16-bit next hop weights"
+ return 1
+ fi
+ ip nexthop del id 9999 ||:
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index cd9e346436fc..88ddae05b39d 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -40,6 +40,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
multipath_test
+ multipath16_test
nh_stats_test_v4
nh_stats_test_v6
"
@@ -228,9 +229,11 @@ routing_nh_obj()
multipath4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -243,7 +246,8 @@ multipath4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -258,9 +262,11 @@ multipath4_test()
multipath6_l4_test()
{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
+ local desc=$1; shift
+ local weight_rp12=$1; shift
+ local weight_rp13=$1; shift
+ local ports=${1-sp=1024,dp=0-32768}; shift
+
local t0_rp12 t0_rp13 t1_rp12 t1_rp13
local packets_rp12 packets_rp13
@@ -273,7 +279,8 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "$ports"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -371,6 +378,41 @@ multipath_test()
ip nexthop replace id 106 group 104,1/105,1 type resilient
}
+multipath16_test()
+{
+ check_nhgw16 104 || return
+
+ log_info "Running 16-bit IPv4 multipath tests"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+
+ ip nexthop replace id 103 group 101,65535/102,65535 type resilient
+ multipath4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 103 group 101,128/102,512 type resilient
+ multipath4_test "128:512" 128 512
+
+ ip nexthop replace id 103 group 101,255/102,65535 type resilient
+ omit_on_slow \
+ multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running 16-bit IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ ip nexthop replace id 106 group 104,65535/105,65535 type resilient
+ multipath6_l4_test "65535:65535" 65535 65535
+
+ ip nexthop replace id 106 group 104,128/105,512 type resilient
+ multipath6_l4_test "128:512" 128 512
+
+ ip nexthop replace id 106 group 104,255/105,65535 type resilient
+ omit_on_slow \
+ multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
nh_stats_test_v4()
{
__nh_stats_test_v4 resilient
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index e2be354167a1..46f365b557b7 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -180,6 +180,7 @@ multipath4_test()
ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -217,6 +218,7 @@ multipath6_test()
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
-d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+ sleep 1
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 589629636502..ea89e558672d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -4,7 +4,8 @@
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
gact_trap_test mirred_egress_to_ingress_test \
- mirred_egress_to_ingress_tcp_test"
+ mirred_egress_to_ingress_tcp_test \
+ ingress_2nd_vlan_push egress_2nd_vlan_push"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test()
log_test "mirred_egress_to_ingress_tcp ($tcflags)"
}
+ingress_2nd_vlan_push()
+{
+ tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 1 \
+ action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -Q 10 -q
+
+ tc_check_packets "dev $swp1 ingress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower
+ tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower
+
+ log_test "ingress_2nd_vlan_push ($tcflags)"
+}
+
+egress_2nd_vlan_push()
+{
+ tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \
+ $tcflags num_of_vlans 0 \
+ action vlan push id 10 protocol 0x8100 \
+ pipe action vlan push id 100 protocol 0x8100 action goto chain 5
+ tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \
+ $tcflags num_of_vlans 2 \
+ cvlan_ethtype 0x800 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $h1 egress" 30 1
+ check_err $? "No double-vlan packets received"
+
+ tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower
+ tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower
+
+ log_test "egress_2nd_vlan_push ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index d0219032f773..be8707bfb46e 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -125,6 +125,21 @@ slowwait_for_counter()
slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+# Check for existence of tools which are built as part of selftests
+# but may also already exist in $PATH
+check_gen_prog()
+{
+ local prog_name=$1; shift
+
+ if ! which $prog_name >/dev/null 2>/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which $prog_name >/dev/null; then
+ echo "'$prog_name' command not found; skipping tests"
+ exit $ksft_skip
+ fi
+ fi
+}
+
remove_ns_list()
{
local item=$1
@@ -146,6 +161,7 @@ cleanup_ns()
for ns in "$@"; do
[ -z "${ns}" ] && continue
+ ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true
ip netns delete "${ns}" &> /dev/null || true
if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
echo "Warn: Failed to remove namespace $ns"
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index b9f3fc3c3426..e0a34e5e8dd5 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
{
struct iphdr *iph = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*iph) || iph->protocol != proto)
return -1;
+ ip_len = ntohs(iph->tot_len);
+ if (ip_len > len || ip_len < sizeof(*iph))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &iph->saddr;
if (proto == IPPROTO_TCP)
return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
{
struct ipv6hdr *ip6h = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
return -1;
+ ip_len = ntohs(ip6h->payload_len);
+ if (ip_len > len - sizeof(*ip6h))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &ip6h->saddr;
if (proto == IPPROTO_TCP)
- return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_tcp(ip6h + 1, len);
else
- return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_udp(ip6h + 1, len);
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index f26c20df9db4..477ae76de93d 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
import builtins
+import functools
import inspect
import sys
import time
@@ -10,6 +11,7 @@ from .utils import global_defer_queue
KSFT_RESULT = None
KSFT_RESULT_ALL = True
+KSFT_DISRUPTIVE = True
class KsftFailEx(Exception):
@@ -32,8 +34,18 @@ def _fail(*args):
global KSFT_RESULT
KSFT_RESULT = False
- frame = inspect.stack()[2]
- ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ stack = inspect.stack()
+ started = False
+ for frame in reversed(stack[2:]):
+ # Start printing from the test case function
+ if not started:
+ if frame.function == 'ksft_run':
+ started = True
+ continue
+
+ ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) +
+ ", in " + frame.function + ":")
+ ksft_pr("Check| " + frame.code_context[0].strip())
ksft_pr(*args)
@@ -43,6 +55,12 @@ def ksft_eq(a, b, comment=""):
_fail("Check failed", a, "!=", b, comment)
+def ksft_ne(a, b, comment=""):
+ global KSFT_RESULT
+ if a == b:
+ _fail("Check failed", a, "==", b, comment)
+
+
def ksft_true(a, comment=""):
if not a:
_fail("Check failed", a, "does not eval to True", comment)
@@ -127,6 +145,44 @@ def ksft_flush_defer():
KSFT_RESULT = False
+def ksft_disruptive(func):
+ """
+ Decorator that marks the test as disruptive (e.g. the test
+ that can down the interface). Disruptive tests can be skipped
+ by passing DISRUPTIVE=False environment variable.
+ """
+
+ @functools.wraps(func)
+ def wrapper(*args, **kwargs):
+ if not KSFT_DISRUPTIVE:
+ raise KsftSkipEx(f"marked as disruptive")
+ return func(*args, **kwargs)
+ return wrapper
+
+
+def ksft_setup(env):
+ """
+ Setup test framework global state from the environment.
+ """
+
+ def get_bool(env, name):
+ value = env.get(name, "").lower()
+ if value in ["yes", "true"]:
+ return True
+ if value in ["no", "false"]:
+ return False
+ try:
+ return bool(int(value))
+ except:
+ raise Exception(f"failed to parse {name}")
+
+ if "DISRUPTIVE" in env:
+ global KSFT_DISRUPTIVE
+ KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE")
+
+ return env
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 7b936a926859..5d796622e730 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
TEST_FILES := mptcp_lib.sh settings
+TEST_INCLUDES := ../lib.sh ../net_helper.sh
+
EXTRA_CLEAN := *.pcap
include ../../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 776d43a6922d..2bd0c1eb70c5 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -284,7 +284,7 @@ echo "b" | \
./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
-chk_msk_nr 2 "after MPC handshake "
+chk_msk_nr 2 "after MPC handshake"
chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d2043ec3bf6d..4209b9569039 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1115,11 +1115,11 @@ again:
return 1;
}
- if (--cfg_repeat > 0) {
- if (cfg_input)
- close(fd);
+ if (cfg_input)
+ close(fd);
+
+ if (--cfg_repeat > 0)
goto again;
- }
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b77fb7065bfb..57325d57e4c6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -345,9 +345,11 @@ do_transfer()
local addr_port
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
- local result_msg
- result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
- mptcp_lib_print_title "${result_msg}"
+ local pretty_title
+ pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+ mptcp_lib_print_title "${pretty_title}"
+
+ local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}"
if $capture; then
local capuser
@@ -431,7 +433,6 @@ do_transfer()
local duration
duration=$((stop-start))
- result_msg+=" # time=${duration}ms"
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
@@ -444,7 +445,7 @@ do_transfer()
echo
cat "$capout"
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
return 1
fi
@@ -544,12 +545,12 @@ do_transfer()
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
mptcp_lib_pr_ok "${extra:1}"
- mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}"
else
if [ -n "${extra}" ]; then
mptcp_lib_print_warn "${extra:1}"
fi
- mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}"
fi
cat "$capout"
@@ -848,6 +849,8 @@ stop_if_error()
make_file "$cin" "client"
make_file "$sin" "server"
+mptcp_lib_subtests_last_ts_reset
+
check_mptcp_disabled
stop_if_error "The kernel configuration is not valid for MPTCP"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 108aeeb84ef1..e8d0a01b4144 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -61,6 +61,16 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_csum_ns1
+unset join_csum_ns2
+unset join_fail_nr
+unset join_rst_nr
+unset join_infi_nr
+unset join_corrupted_pkts
+unset join_syn_tx
+unset join_create_err
+unset join_bind_err
+unset join_connect_err
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
@@ -196,6 +206,22 @@ print_skip()
mptcp_lib_pr_skip "${@}"
}
+# $1: check name; $2: rc
+print_results()
+{
+ local check="${1}"
+ local rc=${2}
+
+ print_check "${check}"
+ if [ ${rc} = ${KSFT_PASS} ]; then
+ print_ok
+ elif [ ${rc} = ${KSFT_SKIP} ]; then
+ print_skip
+ else
+ fail_test "see above"
+ fi
+}
+
# [ $1: fail msg ]
mark_as_skipped()
{
@@ -337,7 +363,7 @@ reset_with_checksum()
local ns1_enable=$1
local ns2_enable=$2
- reset "checksum test ${1} ${2}" || return 1
+ reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1
ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
@@ -420,12 +446,17 @@ reset_with_fail()
fi
}
+start_events()
+{
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
+
reset_with_events()
{
reset "${1}" || return 1
- mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
- mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+ start_events
}
reset_with_tcp_filter()
@@ -436,9 +467,10 @@ reset_with_tcp_filter()
local ns="${!1}"
local src="${2}"
local target="${3}"
+ local chain="${4:-INPUT}"
if ! ip netns exec "${ns}" ${iptables} \
- -A INPUT \
+ -A "${chain}" \
-s "${src}" \
-p tcp \
-j "${target}"; then
@@ -661,7 +693,7 @@ pm_nl_check_endpoint()
done
if [ -z "${id}" ]; then
- test_fail "bad test - missing endpoint id"
+ fail_test "bad test - missing endpoint id"
return
fi
@@ -833,7 +865,7 @@ chk_cestab_nr()
local cestab=$2
local count
- print_check "cestab $cestab"
+ print_check "currently established: $cestab"
count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
if [ -z "$count" ]; then
print_skip
@@ -1109,28 +1141,29 @@ chk_csum_nr()
csum_ns2=${csum_ns2:1}
fi
- print_check "sum"
+ print_check "checksum server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns1" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then
extra_msg+=" ns1=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns1"
else
print_ok
fi
- print_check "csum"
+
+ print_check "checksum client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
- if [ "$count" != "$csum_ns2" ]; then
+ if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then
extra_msg+=" ns2=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
- { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+ { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
fail_test "got $count data checksum error[s] expected $csum_ns2"
else
print_ok
@@ -1147,6 +1180,8 @@ chk_fail_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
+ local tx="server"
+ local rx="client"
local extra_msg=""
local allow_tx_lost=0
local allow_rx_lost=0
@@ -1154,7 +1189,8 @@ chk_fail_nr()
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
if [[ "${fail_tx}" = "-"* ]]; then
@@ -1166,29 +1202,29 @@ chk_fail_nr()
fail_rx=${fail_rx:1}
fi
- print_check "ftx"
+ print_check "fail tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
- if [ "$count" != "$fail_tx" ]; then
- extra_msg+=",tx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then
+ extra_msg+=" tx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
else
print_ok
fi
- print_check "failrx"
+ print_check "fail rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
- if [ "$count" != "$fail_rx" ]; then
- extra_msg+=",rx=$count"
+ if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then
+ extra_msg+=" rx=$count"
fi
if [ -z "$count" ]; then
print_skip
elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
- { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+ { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
else
print_ok
@@ -1205,37 +1241,35 @@ chk_fclose_nr()
local count
local ns_tx=$ns2
local ns_rx=$ns1
- local extra_msg=""
+ local tx="client"
+ local rx="server"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns1
ns_rx=$ns2
- extra_msg="invert"
+ tx="server"
+ rx="client"
fi
- print_check "ctx"
+ print_check "fast close tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_tx" ]; then
- extra_msg+=",tx=$count"
fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
else
print_ok
fi
- print_check "fclzrx"
+ print_check "fast close rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$fclose_rx" ]; then
- extra_msg+=",rx=$count"
fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_rst_nr()
@@ -1246,15 +1280,17 @@ chk_rst_nr()
local count
local ns_tx=$ns1
local ns_rx=$ns2
- local extra_msg=""
+ local tx="server"
+ local rx="client"
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
ns_rx=$ns1
- extra_msg="invert"
+ tx="client"
+ rx="server"
fi
- print_check "rtx"
+ print_check "reset tx ${tx}"
count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
if [ -z "$count" ]; then
print_skip
@@ -1266,7 +1302,7 @@ chk_rst_nr()
print_ok
fi
- print_check "rstrx"
+ print_check "reset rx ${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
if [ -z "$count" ]; then
print_skip
@@ -1277,8 +1313,6 @@ chk_rst_nr()
else
print_ok
fi
-
- print_info "$extra_msg"
}
chk_infi_nr()
@@ -1287,7 +1321,7 @@ chk_infi_nr()
local infi_rx=$2
local count
- print_check "itx"
+ print_check "infi tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
if [ -z "$count" ]; then
print_skip
@@ -1297,7 +1331,7 @@ chk_infi_nr()
print_ok
fi
- print_check "infirx"
+ print_check "infi rx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
if [ -z "$count" ]; then
print_skip
@@ -1308,17 +1342,66 @@ chk_infi_nr()
fi
}
+chk_join_tx_nr()
+{
+ local syn_tx=${join_syn_tx:-0}
+ local create=${join_create_err:-0}
+ local bind=${join_bind_err:-0}
+ local connect=${join_connect_err:-0}
+ local rc=${KSFT_PASS}
+ local count
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx"
+ fail_test "got $count JOIN[s] syn tx expected $syn_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$create" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx create socket error"
+ fail_test "got $count JOIN[s] syn tx create socket error expected $create"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$bind" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx bind error"
+ fail_test "got $count JOIN[s] syn tx bind error expected $bind"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$connect" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn tx connect error"
+ fail_test "got $count JOIN[s] syn tx connect error expected $connect"
+ fi
+
+ print_results "join Tx" ${rc}
+}
+
chk_join_nr()
{
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
- local csum_ns1=${4:-0}
- local csum_ns2=${5:-0}
- local fail_nr=${6:-0}
- local rst_nr=${7:-0}
- local infi_nr=${8:-0}
- local corrupted_pkts=${9:-0}
+ local csum_ns1=${join_csum_ns1:-0}
+ local csum_ns2=${join_csum_ns2:-0}
+ local fail_nr=${join_fail_nr:-0}
+ local rst_nr=${join_rst_nr:-0}
+ local infi_nr=${join_infi_nr:-0}
+ local corrupted_pkts=${join_corrupted_pkts:-0}
+ local rc=${KSFT_PASS}
local count
local with_cookie
@@ -1326,43 +1409,44 @@ chk_join_nr()
print_info "${corrupted_pkts} corrupted pkts"
fi
- print_check "syn"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_nr" ]; then
- fail_test "got $count JOIN[s] syn expected $syn_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "syn rx"
+ fail_test "got $count JOIN[s] syn rx expected $syn_nr"
fi
- print_check "synack"
with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$syn_ack_nr" ]; then
# simult connections exceeding the limit with cookie enabled could go up to
# synack validation as the conn limit can be enforced reliably only after
# the subflow creation
- if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
- print_ok
- else
- fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+ if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack rx"
+ fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr"
fi
- else
- print_ok
fi
- print_check "ack"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
- print_skip
+ rc=${KSFT_SKIP}
elif [ "$count" != "$ack_nr" ]; then
- fail_test "got $count JOIN[s] ack expected $ack_nr"
- else
- print_ok
+ rc=${KSFT_FAIL}
+ print_check "ack rx"
+ fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+
+ print_results "join Rx" ${rc}
+
+ join_syn_tx="${join_syn_tx:-${syn_nr}}" \
+ chk_join_tx_nr
+
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -1415,18 +1499,30 @@ chk_add_nr()
local add_nr=$1
local echo_nr=$2
local port_nr=${3:-0}
- local syn_nr=${4:-$port_nr}
- local syn_ack_nr=${5:-$port_nr}
- local ack_nr=${6:-$port_nr}
- local mis_syn_nr=${7:-0}
- local mis_ack_nr=${8:-0}
+ local ns_invert=${4:-""}
+ local syn_nr=$port_nr
+ local syn_ack_nr=$port_nr
+ local ack_nr=$port_nr
+ local mis_syn_nr=0
+ local mis_ack_nr=0
+ local ns_tx=$ns1
+ local ns_rx=$ns2
+ local tx=""
+ local rx=""
local count
local timeout
- timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns2
+ ns_rx=$ns1
+ tx=" client"
+ rx=" server"
+ fi
+
+ timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
- print_check "add"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
+ print_check "add addr rx${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
# if the test configured a short timeout tolerate greater then expected
@@ -1437,8 +1533,8 @@ chk_add_nr()
print_ok
fi
- print_check "echo"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
+ print_check "add addr echo rx${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$echo_nr" ]; then
@@ -1448,8 +1544,8 @@ chk_add_nr()
fi
if [ $port_nr -gt 0 ]; then
- print_check "pt"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
+ print_check "add addr rx with port${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$port_nr" ]; then
@@ -1458,8 +1554,8 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+ print_check "syn rx port${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_nr" ]; then
@@ -1469,8 +1565,8 @@ chk_add_nr()
print_ok
fi
- print_check "synack"
- count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+ print_check "synack rx port${rx}"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$syn_ack_nr" ]; then
@@ -1480,8 +1576,8 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+ print_check "ack rx port${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$ack_nr" ]; then
@@ -1491,8 +1587,8 @@ chk_add_nr()
print_ok
fi
- print_check "syn"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+ print_check "syn rx port mismatch${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_syn_nr" ]; then
@@ -1502,8 +1598,8 @@ chk_add_nr()
print_ok
fi
- print_check "ack"
- count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+ print_check "ack rx port mismatch${tx}"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx")
if [ -z "$count" ]; then
print_skip
elif [ "$count" != "$mis_ack_nr" ]; then
@@ -1524,7 +1620,7 @@ chk_add_tx_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
- print_check "add TX"
+ print_check "add addr tx"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
@@ -1536,7 +1632,7 @@ chk_add_tx_nr()
print_ok
fi
- print_check "echo TX"
+ print_check "add addr echo tx"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
if [ -z "$count" ]; then
print_skip
@@ -1556,6 +1652,8 @@ chk_rm_nr()
local count
local addr_ns=$ns1
local subflow_ns=$ns2
+ local addr="server"
+ local subflow="client"
local extra_msg=""
shift 2
@@ -1565,16 +1663,14 @@ chk_rm_nr()
shift
done
- if [ -z $invert ]; then
- addr_ns=$ns1
- subflow_ns=$ns2
- elif [ $invert = "true" ]; then
+ if [ "$invert" = "true" ]; then
addr_ns=$ns2
subflow_ns=$ns1
- extra_msg="invert"
+ addr="client"
+ subflow="server"
fi
- print_check "rm"
+ print_check "rm addr rx ${addr}"
count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
if [ -z "$count" ]; then
print_skip
@@ -1584,7 +1680,7 @@ chk_rm_nr()
print_ok
fi
- print_check "rmsf"
+ print_check "rm subflow ${subflow}"
count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
if [ -z "$count" ]; then
print_skip
@@ -1598,7 +1694,7 @@ chk_rm_nr()
count=$((count + cnt))
if [ "$count" != "$rm_subflow_nr" ]; then
suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
- extra_msg+=" simult"
+ extra_msg="simult"
fi
if [ $count -ge "$rm_subflow_nr" ] && \
[ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
@@ -1619,7 +1715,7 @@ chk_rm_tx_nr()
{
local rm_addr_tx_nr=$1
- print_check "rm TX"
+ print_check "rm addr tx client"
count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
if [ -z "$count" ]; then
print_skip
@@ -1634,9 +1730,11 @@ chk_prio_nr()
{
local mp_prio_nr_tx=$1
local mp_prio_nr_rx=$2
+ local mpj_syn=$3
+ local mpj_syn_ack=$4
local count
- print_check "ptx"
+ print_check "mp_prio tx server"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
if [ -z "$count" ]; then
print_skip
@@ -1646,7 +1744,7 @@ chk_prio_nr()
print_ok
fi
- print_check "prx"
+ print_check "mp_prio rx client"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
if [ -z "$count" ]; then
print_skip
@@ -1655,6 +1753,26 @@ chk_prio_nr()
else
print_ok
fi
+
+ print_check "syn backup"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn" ]; then
+ fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn"
+ else
+ print_ok
+ fi
+
+ print_check "synack backup"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn_ack" ]; then
+ fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack"
+ else
+ print_ok
+ fi
}
chk_subflow_nr()
@@ -1869,9 +1987,11 @@ subflows_error_tests()
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
+ join_bind_err=1 \
+ chk_join_nr 0 0 0
fi
# multiple subflows, with subflow creation error
@@ -1883,7 +2003,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, with subflow timeout on MPJ
@@ -1895,7 +2016,8 @@ subflows_error_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
# multiple subflows, check that the endpoint corresponding to
@@ -1916,7 +2038,8 @@ subflows_error_tests()
# additional subflow could be created only if the PM select
# the later endpoint, skipping the already used one
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
fi
}
@@ -1955,6 +2078,21 @@ signal_address_tests()
chk_add_nr 1 1
fi
+ # uncommon: subflow and signal flags on the same endpoint
+ # or because the user wrongly picked both, but still expects the client
+ # to create additional subflows
+ if reset "subflow and signal together"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1 0 invert # only initiated by ns2
+ chk_add_nr 0 0 0 # none initiated by ns1
+ chk_rst_nr 0 0 invert # no RST sent by the client
+ chk_rst_nr 0 0 # no RST sent by the server
+ fi
+
# accept and use add_addr with additional subflows
if reset "multiple subflows and signal"; then
pm_nl_set_limits $ns1 0 3
@@ -1987,7 +2125,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
fi
@@ -2155,7 +2294,8 @@ add_addr_timeout_tests()
pm_nl_set_limits $ns2 2 2
speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 \
+ chk_join_nr 1 1 1
chk_add_nr 8 0
fi
}
@@ -2255,7 +2395,8 @@ remove_tests()
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=2 join_connect_err=1 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2320,7 +2461,8 @@ remove_tests()
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1
+ join_syn_tx=3 \
+ chk_join_nr 1 1 1
chk_add_nr 3 3
chk_rm_nr 3 1 invert
chk_rst_nr 0 0
@@ -2612,33 +2754,46 @@ backup_tests()
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
fi
# single address, backup
if reset "single address, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
+ pm_nl_set_limits $ns2 1 1
+ sflags=nobackup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 0 0 1
+ fi
+
+ # single address, switch to backup
+ if reset "single address, switch to backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
# single address with port, backup
if reset "single address with port, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100
pm_nl_set_limits $ns2 1 1
- sflags=backup speed=slow \
+ sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 0 0 1
fi
if reset "mpc backup" &&
@@ -2647,17 +2802,26 @@ backup_tests()
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc backup both sides" &&
continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
- pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+
+ # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup)
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup)
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path
+
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_join_nr 2 2 2
+ chk_prio_nr 1 1 1 1
fi
if reset "mpc switch to backup" &&
@@ -2666,7 +2830,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc switch to backup both sides" &&
@@ -2676,7 +2840,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
}
@@ -2868,37 +3032,16 @@ syncookies_tests()
checksum_tests()
{
- # checksum test 0 0
- if reset_with_checksum 0 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 1
- if reset_with_checksum 1 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 0 1
- if reset_with_checksum 0 1; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
-
- # checksum test 1 0
- if reset_with_checksum 1 0; then
- pm_nl_set_limits $ns1 0 1
- pm_nl_set_limits $ns2 0 1
- run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- fi
+ local checksum_enable
+ for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do
+ # checksum test 0 0, 1 1, 0 1, 1 0
+ if reset_with_checksum ${checksum_enable}; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+ done
}
deny_join_id0_tests()
@@ -2987,6 +3130,9 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ fi
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -3053,7 +3199,7 @@ fullmesh_tests()
addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
@@ -3066,7 +3212,7 @@ fullmesh_tests()
sflags=nobackup,nofullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
}
@@ -3086,7 +3232,8 @@ fastclose_tests()
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 0 0 0 1
+ join_rst_nr=1 \
+ chk_join_nr 0 0 0
chk_fclose_nr 1 1 invert
chk_rst_nr 1 1
fi
@@ -3105,7 +3252,10 @@ fail_tests()
MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+ join_csum_ns1=+1 join_csum_ns2=+0 \
+ join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi
@@ -3118,7 +3268,10 @@ fail_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
test_linkfail=1024 \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+ join_csum_ns1=1 join_csum_ns2=0 \
+ join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \
+ join_corrupted_pkts="$(pedit_action_pkts)" \
+ chk_join_nr 1 1 1
fi
}
@@ -3260,6 +3413,36 @@ userspace_pm_chk_get_addr()
fi
}
+# $1: ns ; $2: event type ; $3: count
+chk_evt_nr()
+{
+ local ns=${1}
+ local evt_name="${2}"
+ local exp="${3}"
+
+ local evts="${evts_ns1}"
+ local evt="${!evt_name}"
+ local count
+
+ evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_
+ [ "${ns}" == "ns2" ] && evts="${evts_ns2}"
+
+ print_check "event ${ns} ${evt_name} (${exp})"
+
+ if [[ "${evt_name}" = "LISTENER_"* ]] &&
+ ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ print_skip "event not supported"
+ return
+ fi
+
+ count=$(grep -cw "type:${evt}" "${evts}")
+ if [ "${count}" != "${exp}" ]; then
+ fail_test "got ${count} events, expected ${exp}"
+ else
+ print_ok
+ fi
+}
+
userspace_tests()
{
# userspace pm type prevents add_addr
@@ -3318,7 +3501,7 @@ userspace_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 0
- chk_prio_nr 0 0
+ chk_prio_nr 0 0 0 0
fi
# userspace pm type prevents rm_addr
@@ -3340,8 +3523,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 2 2
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3356,14 +3539,12 @@ userspace_tests()
"signal"
userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
- userspace_pm_rm_addr $ns1 10
userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" \
- "id 20 flags signal 10.0.3.1" "after rm_addr 10"
+ "id 20 flags signal 10.0.3.1" "after rm_sf 10"
userspace_pm_rm_addr $ns1 20
- userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
- chk_rm_nr 2 2 invert
+ chk_rm_nr 1 1 invert
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
@@ -3375,8 +3556,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3387,12 +3568,11 @@ userspace_tests()
"id 20 flags subflow 10.0.3.2" \
"subflow"
userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
- userspace_pm_rm_addr $ns2 20
userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
userspace_pm_chk_dump_addr "${ns2}" \
"" \
- "after rm_addr 20"
- chk_rm_nr 1 1
+ "after rm_sf 20"
+ chk_rm_nr 0 1
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
@@ -3404,8 +3584,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
chk_mptcp_info subflows 0 subflows 0
@@ -3425,8 +3605,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
userspace_pm_add_sf $ns2 10.0.3.2 20
@@ -3449,8 +3629,8 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- speed=5 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
userspace_pm_add_addr $ns1 10.0.2.1 10
@@ -3480,8 +3660,8 @@ endpoint_tests()
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- speed=slow \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3500,31 +3680,185 @@ endpoint_tests()
mptcp_lib_kill_wait $tests_pid
fi
- if reset "delete and re-add" &&
+ if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
- pm_nl_set_limits $ns1 1 1
- pm_nl_set_limits $ns2 1 1
+ start_events
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 0 3
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- test_linkfail=4 speed=20 \
- run_tests $ns1 $ns2 10.0.1.1 &
+ { test_linkfail=4 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
pm_nl_check_endpoint "creation" \
$ns2 10.0.2.2 id 2 flags subflow dev ns2eth2
- chk_subflow_nr "before delete" 2
+ chk_subflow_nr "before delete id 2" 2
chk_mptcp_info subflows 1 subflows 1
pm_nl_del_endpoint $ns2 2 10.0.2.2
sleep 0.5
- chk_subflow_nr "after delete" 1
+ chk_subflow_nr "after delete id 2" 1
chk_mptcp_info subflows 0 subflows 0
- pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
wait_mpj $ns2
- chk_subflow_nr "after re-add" 2
+ chk_subflow_nr "after re-add id 2" 2
chk_mptcp_info subflows 1 subflows 1
+
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_attempt_fail $ns2
+ chk_subflow_nr "after new reject" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_del_endpoint $ns2 3 10.0.3.2
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after no reject" 3
+ chk_mptcp_info subflows 2 subflows 2
+
+ local i
+ for i in $(seq 3); do
+ pm_nl_del_endpoint $ns2 1 10.0.1.2
+ sleep 0.5
+ chk_subflow_nr "after delete id 0 ($i)" 2
+ chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf
+
+ pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add id 0 ($i)" 3
+ chk_mptcp_info subflows 3 subflows 3
+ done
+
+ mptcp_lib_kill_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab
+
+ join_syn_tx=7 \
+ chk_join_nr 6 6 6
+ chk_rm_nr 4 4
+ fi
+
+ # remove and re-add
+ if reset_with_events "delete re-add signal" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_set_limits $ns2 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
+ { test_linkfail=4 speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+
+ wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns1 10.0.2.1 id 1 flags signal
+ chk_subflow_nr "before delete" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ pm_nl_del_endpoint $ns1 1 10.0.2.1
+ pm_nl_del_endpoint $ns1 2 224.0.0.1
+ sleep 0.5
+ chk_subflow_nr "after delete" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add" 3
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_del_endpoint $ns1 42 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+
+ pm_nl_del_endpoint $ns1 99 10.0.1.1
+ sleep 0.5
+ chk_subflow_nr "after re-delete ID 0" 2
+ chk_mptcp_info subflows 2 subflows 2
+
+ pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-re-add ID 0" 3
+ chk_mptcp_info subflows 3 subflows 3
+ mptcp_lib_kill_wait $tests_pid
+
+ kill_events_pids
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5
+ chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3
+
+ join_connect_err=1 \
+ chk_join_nr 5 5 5
+ chk_add_nr 6 6
+ chk_rm_nr 4 3 invert
+ fi
+
+ # flush and re-add
+ if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ { test_linkfail=4 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
+ local tests_pid=$!
+
+ wait_attempt_fail $ns2
+ chk_subflow_nr "before flush" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_flush_endpoint $ns2
+ pm_nl_flush_endpoint $ns1
+ wait_rm_addr $ns2 0
+ ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT
+ pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
+ wait_mpj $ns2
+ pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
+ wait_mpj $ns2
mptcp_lib_kill_wait $tests_pid
+
+ join_syn_tx=3 join_connect_err=1 \
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_rm_nr 1 0 invert
fi
}
@@ -3627,9 +3961,11 @@ if [ ${#tests[@]} -eq 0 ]; then
tests=("${all_tests_names[@]}")
fi
+mptcp_lib_subtests_last_ts_reset
for subtests in "${tests[@]}"; do
"${subtests}"
done
+append_prev_results
if [ ${ret} -ne 0 ]; then
echo
@@ -3640,7 +3976,6 @@ if [ ${ret} -ne 0 ]; then
echo
fi
-append_prev_results
mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 438280e68434..975d4d4c862a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -12,10 +12,14 @@ readonly KSFT_SKIP=4
readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED
+declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED
declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY
declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
@@ -25,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
MPTCP_LIB_SUBTEST_FLAKY=0
+MPTCP_LIB_SUBTESTS_LAST_TS_MS=
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
MPTCP_LIB_IP_MPTCP=0
@@ -201,6 +206,11 @@ mptcp_lib_kversion_ge() {
mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
}
+mptcp_lib_subtests_last_ts_reset() {
+ MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)"
+}
+mptcp_lib_subtests_last_ts_reset
+
__mptcp_lib_result_check_duplicated() {
local subtest
@@ -215,13 +225,22 @@ __mptcp_lib_result_check_duplicated() {
__mptcp_lib_result_add() {
local result="${1}"
+ local time="time="
+ local ts_prev_ms
shift
local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
__mptcp_lib_result_check_duplicated "${*}"
- MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+ # not to add two '#'
+ [[ "${*}" != *"#"* ]] && time="# ${time}"
+
+ ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}"
+ mptcp_lib_subtests_last_ts_reset
+ time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms"
+
+ MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}")
}
# $1: test name
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 68899a303a1a..5e8d5b83e2d0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -349,6 +349,7 @@ init
make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
+mptcp_lib_subtests_last_ts_reset
run_tests $ns1 $ns2 10.0.1.1
run_tests $ns1 $ns2 dead:beef:1::1
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2757378b1b13..2e6648a2b2c0 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -137,6 +137,8 @@ check()
fi
}
+mptcp_lib_subtests_last_ts_reset
+
check "show_endpoints" "" "defaults addr list"
default_limits="$(get_limits)"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7ad5a59adff2..994a556f46c1 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -19,12 +19,6 @@
#include "linux/mptcp.h"
-#ifndef MPTCP_PM_NAME
-#define MPTCP_PM_NAME "mptcp_pm"
-#endif
-#ifndef MPTCP_PM_EVENTS
-#define MPTCP_PM_EVENTS "mptcp_pm_events"
-#endif
#ifndef IPPROTO_MPTCP
#define IPPROTO_MPTCP 262
#endif
@@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group)
if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
&event_group, sizeof(event_group)) < 0)
- error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+ error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
do {
FD_ZERO(&rfds);
@@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
- !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+ !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME))
got_events_grp = 1;
grp = RTA_NEXT(grp, grp_len);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f74e1c3c126d..8fa77c8e9b65 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -286,6 +286,7 @@ while getopts "bcdhi" option;do
done
setup
+mptcp_lib_subtests_last_ts_reset
run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9cb05978269d..3651f73451cf 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
server_evts=$(mktemp)
mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
sleep 0.5
+mptcp_lib_subtests_last_ts_reset
print_title "Init"
print_test "Created network namespaces ns1, ns2"
diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c
new file mode 100644
index 000000000000..64d6805381c5
--- /dev/null
+++ b/tools/testing/selftests/net/ncdevmem.c
@@ -0,0 +1,570 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
+#include <linux/uio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#define __iovec_defined
+#include <fcntl.h>
+#include <malloc.h>
+#include <error.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include <linux/memfd.h>
+#include <linux/dma-buf.h>
+#include <linux/udmabuf.h>
+#include <libmnl/libmnl.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+#include <linux/genetlink.h>
+#include <linux/netdev.h>
+#include <time.h>
+#include <net/if.h>
+
+#include "netdev-user.h"
+#include <ynl.h>
+
+#define PAGE_SHIFT 12
+#define TEST_PREFIX "ncdevmem"
+#define NUM_PAGES 16000
+
+#ifndef MSG_SOCK_DEVMEM
+#define MSG_SOCK_DEVMEM 0x2000000
+#endif
+
+/*
+ * tcpdevmem netcat. Works similarly to netcat but does device memory TCP
+ * instead of regular TCP. Uses udmabuf to mock a dmabuf provider.
+ *
+ * Usage:
+ *
+ * On server:
+ * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7
+ *
+ * On client:
+ * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \
+ * tr \\n \\0 | \
+ * head -c 5G | \
+ * nc <server IP> 5201 -p 5201
+ *
+ * Note this is compatible with regular netcat. i.e. the sender or receiver can
+ * be replaced with regular netcat to test the RX or TX path in isolation.
+ */
+
+static char *server_ip = "192.168.1.4";
+static char *client_ip = "192.168.1.2";
+static char *port = "5201";
+static size_t do_validation;
+static int start_queue = 8;
+static int num_queues = 8;
+static char *ifname = "eth1";
+static unsigned int ifindex;
+static unsigned int dmabuf_id;
+
+void print_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ int i;
+
+ for (i = 0; i < size; i++)
+ printf("%02hhX ", p[i]);
+ printf("\n");
+}
+
+void print_nonzero_bytes(void *ptr, size_t size)
+{
+ unsigned char *p = ptr;
+ unsigned int i;
+
+ for (i = 0; i < size; i++)
+ putchar(p[i]);
+ printf("\n");
+}
+
+void validate_buffer(void *line, size_t size)
+{
+ static unsigned char seed = 1;
+ unsigned char *ptr = line;
+ int errors = 0;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (ptr[i] != seed) {
+ fprintf(stderr,
+ "Failed validation: expected=%u, actual=%u, index=%lu\n",
+ seed, ptr[i], i);
+ errors++;
+ if (errors > 20)
+ error(1, 0, "validation failed.");
+ }
+ seed++;
+ if (seed == do_validation)
+ seed = 0;
+ }
+
+ fprintf(stdout, "Validated buffer\n");
+}
+
+#define run_command(cmd, ...) \
+ ({ \
+ char command[256]; \
+ memset(command, 0, sizeof(command)); \
+ snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \
+ printf("Running: %s\n", command); \
+ system(command); \
+ })
+
+static int reset_flow_steering(void)
+{
+ int ret = 0;
+
+ ret = run_command("sudo ethtool -K %s ntuple off", ifname);
+ if (ret)
+ return ret;
+
+ return run_command("sudo ethtool -K %s ntuple on", ifname);
+}
+
+static int configure_headersplit(bool on)
+{
+ return run_command("sudo ethtool -G %s tcp-data-split %s", ifname,
+ on ? "on" : "off");
+}
+
+static int configure_rss(void)
+{
+ return run_command("sudo ethtool -X %s equal %d", ifname, start_queue);
+}
+
+static int configure_channels(unsigned int rx, unsigned int tx)
+{
+ return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx);
+}
+
+static int configure_flow_steering(void)
+{
+ return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d",
+ ifname, client_ip, server_ip, port, port, start_queue);
+}
+
+static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
+ struct netdev_queue_id *queues,
+ unsigned int n_queue_index, struct ynl_sock **ys)
+{
+ struct netdev_bind_rx_req *req = NULL;
+ struct netdev_bind_rx_rsp *rsp = NULL;
+ struct ynl_error yerr;
+
+ *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
+ if (!*ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return -1;
+ }
+
+ req = netdev_bind_rx_req_alloc();
+ netdev_bind_rx_req_set_ifindex(req, ifindex);
+ netdev_bind_rx_req_set_fd(req, dmabuf_fd);
+ __netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
+
+ rsp = netdev_bind_rx(*ys, req);
+ if (!rsp) {
+ perror("netdev_bind_rx");
+ goto err_close;
+ }
+
+ if (!rsp->_present.id) {
+ perror("id not present");
+ goto err_close;
+ }
+
+ printf("got dmabuf id=%d\n", rsp->id);
+ dmabuf_id = rsp->id;
+
+ netdev_bind_rx_req_free(req);
+ netdev_bind_rx_rsp_free(rsp);
+
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
+ netdev_bind_rx_req_free(req);
+ ynl_sock_destroy(*ys);
+ return -1;
+}
+
+static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size)
+{
+ struct udmabuf_create create;
+ int ret;
+
+ *devfd = open("/dev/udmabuf", O_RDWR);
+ if (*devfd < 0) {
+ error(70, 0,
+ "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
+ }
+
+ *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
+ if (*memfd < 0)
+ error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX);
+
+ /* Required for udmabuf */
+ ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0)
+ error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+
+ ret = ftruncate(*memfd, dmabuf_size);
+ if (ret == -1)
+ error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+
+ memset(&create, 0, sizeof(create));
+
+ create.memfd = *memfd;
+ create.offset = 0;
+ create.size = dmabuf_size;
+ *buf = ioctl(*devfd, UDMABUF_CREATE, &create);
+ if (*buf < 0)
+ error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX);
+}
+
+int do_server(void)
+{
+ char ctrl_data[sizeof(int) * 20000];
+ struct netdev_queue_id *queues;
+ size_t non_page_aligned_frags = 0;
+ struct sockaddr_in client_addr;
+ struct sockaddr_in server_sin;
+ size_t page_aligned_frags = 0;
+ int devfd, memfd, buf, ret;
+ size_t total_received = 0;
+ socklen_t client_addr_len;
+ bool is_devmem = false;
+ char *buf_mem = NULL;
+ struct ynl_sock *ys;
+ size_t dmabuf_size;
+ char iobuf[819200];
+ char buffer[256];
+ int socket_fd;
+ int client_fd;
+ size_t i = 0;
+ int opt = 1;
+
+ dmabuf_size = getpagesize() * NUM_PAGES;
+
+ create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+ if (reset_flow_steering())
+ error(1, 0, "Failed to reset flow steering\n");
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss())
+ error(1, 0, "Failed to configure rss\n");
+
+ /* Flow steer our devmem flows to start_queue */
+ if (configure_flow_steering())
+ error(1, 0, "Failed to configure flow steering\n");
+
+ sleep(1);
+
+ queues = malloc(sizeof(*queues) * num_queues);
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ buf, 0);
+ if (buf_mem == MAP_FAILED)
+ error(1, 0, "mmap()");
+
+ server_sin.sin_family = AF_INET;
+ server_sin.sin_port = htons(atoi(port));
+
+ ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr);
+ if (socket < 0)
+ error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+ socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0);
+ if (socket < 0)
+ error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+ ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt,
+ sizeof(opt));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX);
+
+ printf("binding to address %s:%d\n", server_ip,
+ ntohs(server_sin.sin_port));
+
+ ret = bind(socket_fd, &server_sin, sizeof(server_sin));
+ if (ret)
+ error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX);
+
+ ret = listen(socket_fd, 1);
+ if (ret)
+ error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX);
+
+ client_addr_len = sizeof(client_addr);
+
+ inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer,
+ sizeof(buffer));
+ printf("Waiting or connection on %s:%d\n", buffer,
+ ntohs(server_sin.sin_port));
+ client_fd = accept(socket_fd, &client_addr, &client_addr_len);
+
+ inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer,
+ sizeof(buffer));
+ printf("Got connection from %s:%d\n", buffer,
+ ntohs(client_addr.sin_port));
+
+ while (1) {
+ struct iovec iov = { .iov_base = iobuf,
+ .iov_len = sizeof(iobuf) };
+ struct dmabuf_cmsg *dmabuf_cmsg = NULL;
+ struct dma_buf_sync sync = { 0 };
+ struct cmsghdr *cm = NULL;
+ struct msghdr msg = { 0 };
+ struct dmabuf_token token;
+ ssize_t ret;
+
+ is_devmem = false;
+ printf("\n\n");
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = ctrl_data;
+ msg.msg_controllen = sizeof(ctrl_data);
+ ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM);
+ printf("recvmsg ret=%ld\n", ret);
+ if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK))
+ continue;
+ if (ret < 0) {
+ perror("recvmsg");
+ continue;
+ }
+ if (ret == 0) {
+ printf("client exited\n");
+ goto cleanup;
+ }
+
+ i++;
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if (cm->cmsg_level != SOL_SOCKET ||
+ (cm->cmsg_type != SCM_DEVMEM_DMABUF &&
+ cm->cmsg_type != SCM_DEVMEM_LINEAR)) {
+ fprintf(stdout, "skipping non-devmem cmsg\n");
+ continue;
+ }
+
+ dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm);
+ is_devmem = true;
+
+ if (cm->cmsg_type == SCM_DEVMEM_LINEAR) {
+ /* TODO: process data copied from skb's linear
+ * buffer.
+ */
+ fprintf(stdout,
+ "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n",
+ dmabuf_cmsg->frag_size);
+
+ continue;
+ }
+
+ token.token_start = dmabuf_cmsg->frag_token;
+ token.token_count = 1;
+
+ total_received += dmabuf_cmsg->frag_size;
+ printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n",
+ dmabuf_cmsg->frag_offset >> PAGE_SHIFT,
+ dmabuf_cmsg->frag_offset % getpagesize(),
+ dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size,
+ dmabuf_cmsg->frag_token, total_received,
+ dmabuf_cmsg->dmabuf_id);
+
+ if (dmabuf_cmsg->dmabuf_id != dmabuf_id)
+ error(1, 0,
+ "received on wrong dmabuf_id: flow steering error\n");
+
+ if (dmabuf_cmsg->frag_size % getpagesize())
+ non_page_aligned_frags++;
+ else
+ page_aligned_frags++;
+
+ sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START;
+ ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+ if (do_validation)
+ validate_buffer(
+ ((unsigned char *)buf_mem) +
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+ else
+ print_nonzero_bytes(
+ ((unsigned char *)buf_mem) +
+ dmabuf_cmsg->frag_offset,
+ dmabuf_cmsg->frag_size);
+
+ sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END;
+ ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync);
+
+ ret = setsockopt(client_fd, SOL_SOCKET,
+ SO_DEVMEM_DONTNEED, &token,
+ sizeof(token));
+ if (ret != 1)
+ error(1, 0,
+ "SO_DEVMEM_DONTNEED not enough tokens");
+ }
+ if (!is_devmem)
+ error(1, 0, "flow steering error\n");
+
+ printf("total_received=%lu\n", total_received);
+ }
+
+ fprintf(stdout, "%s: ok\n", TEST_PREFIX);
+
+ fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+ fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
+ page_aligned_frags, non_page_aligned_frags);
+
+cleanup:
+
+ munmap(buf_mem, dmabuf_size);
+ close(client_fd);
+ close(socket_fd);
+ close(buf);
+ close(memfd);
+ close(devfd);
+ ynl_sock_destroy(ys);
+
+ return 0;
+}
+
+void run_devmem_tests(void)
+{
+ struct netdev_queue_id *queues;
+ int devfd, memfd, buf;
+ struct ynl_sock *ys;
+ size_t dmabuf_size;
+ size_t i = 0;
+
+ dmabuf_size = getpagesize() * NUM_PAGES;
+
+ create_udmabuf(&devfd, &memfd, &buf, dmabuf_size);
+
+ /* Configure RSS to divert all traffic from our devmem queues */
+ if (configure_rss())
+ error(1, 0, "rss error\n");
+
+ queues = calloc(num_queues, sizeof(*queues));
+
+ if (configure_headersplit(1))
+ error(1, 0, "Failed to configure header split\n");
+
+ if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Binding empty queues array should have failed\n");
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (configure_headersplit(0))
+ error(1, 0, "Failed to configure header split\n");
+
+ if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Configure dmabuf with header split off should have failed\n");
+
+ if (configure_headersplit(1))
+ error(1, 0, "Failed to configure header split\n");
+
+ for (i = 0; i < num_queues; i++) {
+ queues[i]._present.type = 1;
+ queues[i]._present.id = 1;
+ queues[i].type = NETDEV_QUEUE_TYPE_RX;
+ queues[i].id = start_queue + i;
+ }
+
+ if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys))
+ error(1, 0, "Failed to bind\n");
+
+ /* Deactivating a bound queue should not be legal */
+ if (!configure_channels(num_queues, num_queues - 1))
+ error(1, 0, "Deactivating a bound queue should be illegal.\n");
+
+ /* Closing the netlink socket does an implicit unbind */
+ ynl_sock_destroy(ys);
+}
+
+int main(int argc, char *argv[])
+{
+ int is_server = 0, opt;
+
+ while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
+ switch (opt) {
+ case 'l':
+ is_server = 1;
+ break;
+ case 's':
+ server_ip = optarg;
+ break;
+ case 'c':
+ client_ip = optarg;
+ break;
+ case 'p':
+ port = optarg;
+ break;
+ case 'v':
+ do_validation = atoll(optarg);
+ break;
+ case 'q':
+ num_queues = atoi(optarg);
+ break;
+ case 't':
+ start_queue = atoi(optarg);
+ break;
+ case 'f':
+ ifname = optarg;
+ break;
+ case '?':
+ printf("unknown option: %c\n", optopt);
+ break;
+ }
+ }
+
+ ifindex = if_nametoindex(ifname);
+
+ for (; optind < argc; optind++)
+ printf("extra arguments: %s\n", argv[optind]);
+
+ run_devmem_tests();
+
+ if (is_server)
+ return do_server();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh
index e3afcb424710..438f7b2acc5f 100755
--- a/tools/testing/selftests/net/netdevice.sh
+++ b/tools/testing/selftests/net/netdevice.sh
@@ -67,8 +67,12 @@ kci_net_setup()
return $ksft_skip
fi
- # TODO what ipaddr to set ? DHCP ?
- echo "SKIP: $netdev: set IP address"
+ if [ "$veth_created" ]; then
+ echo "XFAIL: $netdev: set IP address unsupported for veth*"
+ else
+ # TODO what ipaddr to set ? DHCP ?
+ echo "SKIP: $netdev: set IP address"
+ fi
return $ksft_skip
}
@@ -86,7 +90,7 @@ kci_netdev_ethtool_test()
ret=$?
if [ $ret -ne 0 ];then
if [ $ret -eq "$1" ];then
- echo "SKIP: $netdev: ethtool $2 not supported"
+ echo "XFAIL: $netdev: ethtool $2 not supported"
return $ksft_skip
else
echo "FAIL: $netdev: ethtool $2"
@@ -124,11 +128,45 @@ kci_netdev_ethtool()
return 1
fi
echo "PASS: $netdev: ethtool list features"
- #TODO for each non fixed features, try to turn them on/off
+
+ while read -r FEATURE VALUE FIXED; do
+ [ "$FEATURE" != "Features" ] || continue # Skip "Features"
+ [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features
+ feature="${FEATURE%:*}"
+
+ ethtool --offload "$netdev" "$feature" off
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned off feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn off feature:" \
+ "$feature"
+ fi
+
+ ethtool --offload "$netdev" "$feature" on
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Turned on feature: $feature"
+ else
+ echo "FAIL: $netdev: Failed to turn on feature:" \
+ "$feature"
+ fi
+
+ #restore the feature to its initial state
+ ethtool --offload "$netdev" "$feature" "$VALUE"
+ if [ $? -eq 0 ]; then
+ echo "PASS: $netdev: Restore feature $feature" \
+ "to initial state $VALUE"
+ else
+ echo "FAIL: $netdev: Failed to restore feature" \
+ "$feature to initial state $VALUE"
+ fi
+
+ done < "$TMP_ETHTOOL_FEATURES"
+
rm "$TMP_ETHTOOL_FEATURES"
kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev"
kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev"
+
return 0
}
@@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then
fi
ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV"
+
+if [ ! -s "$TMP_LIST_NETDEV" ]; then
+ echo "No valid network device found, creating veth pair"
+ ip link add veth0 type veth peer name veth1
+ echo "veth0" > "$TMP_LIST_NETDEV"
+ veth_created=1
+fi
+
while read netdev
do
kci_test_netdev "$netdev"
done < "$TMP_LIST_NETDEV"
+#clean up veth interface pair if it was created
+if [ "$veth_created" ]; then
+ ip link delete veth0
+ echo "Removed veth pair"
+fi
+
rm "$TMP_LIST_NETDEV"
exit 0
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index 47945b2b3f92..d13fb5ea3e89 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += br_netfilter_queue.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
new file mode 100755
index 000000000000..6a764d70ab06
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+ cleanup_all_ns
+}
+
+setup_ns c1 c2 c3 sender
+
+trap cleanup EXIT
+
+nf_queue_wait()
+{
+ grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue"
+}
+
+port_add() {
+ ns="$1"
+ dev="$2"
+ a="$3"
+
+ ip link add name "$dev" type veth peer name "$dev" netns "$ns"
+
+ ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev"
+ ip -net "$ns" link set "$dev" up
+
+ ip link set "$dev" master br0
+ ip link set "$dev" up
+}
+
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+ip link add br0 type bridge
+ip addr add 192.168.1.254/24 dev br0
+
+port_add "$c1" "c1" 1
+port_add "$c2" "c2" 2
+port_add "$c3" "c3" 3
+port_add "$sender" "sender" 253
+
+ip link set br0 up
+
+modprobe -q br_netfilter
+
+sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1
+
+ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1
+ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2
+ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3
+
+nft -f /dev/stdin <<EOF
+table ip filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ ct state new counter
+ ip protocol icmp counter queue num 0 bypass
+ }
+}
+EOF
+./nf_queue -t 5 > /dev/null &
+
+busywait 5000 nf_queue_wait
+
+for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done &
+ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 63ef80ef47a4..b2dd4db45215 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m
CONFIG_XFRM_STATISTICS=y
CONFIG_NET_PKTGEN=m
CONFIG_TUN=m
+CONFIG_INET_DIAG=m
+CONFIG_SCTP_DIAG=m
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index c61d23a8c88d..d66e3c4dfec6 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -8,7 +8,7 @@
source lib.sh
ret=0
-timeout=2
+timeout=5
cleanup()
{
@@ -25,6 +25,9 @@ cleanup()
}
checktool "nft --version" "test without nft tool"
+checktool "socat -h" "run test without socat"
+
+modprobe -q sctp
trap cleanup EXIT
@@ -36,7 +39,9 @@ TMPFILE2=$(mktemp)
TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
-dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+COUNT=200
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: No virtual ethernet pair device support in kernel"
@@ -250,45 +255,49 @@ listener_ready()
test_tcp_forward()
{
- ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 2 &
local nfqpid=$!
timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ kill "$nfqpid"
}
test_tcp_localhost()
{
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
local rpid=$!
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
wait "$rpid" && echo "PASS: tcp via loopback"
- wait 2>/dev/null
+ kill "$nfqpid"
}
test_tcp_localhost_connectclose()
{
- ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
- ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 &
+ local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+ timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3
+
+ kill "$nfqpid"
wait && echo "PASS: tcp via loopback with connect/close"
- wait 2>/dev/null
}
test_tcp_localhost_requeue()
@@ -353,7 +362,7 @@ table inet filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 1 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
@@ -363,6 +372,7 @@ EOF
for n in output post; do
for d in tvrf eth0; do
if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ kill "$nfqpid"
echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
ip netns exec "$ns1" nft list ruleset
ret=1
@@ -371,8 +381,96 @@ EOF
done
done
- wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
- wait 2>/dev/null
+ kill "$nfqpid"
+ echo "PASS: icmp+nfqueue via vrf"
+}
+
+sctp_listener_ready()
+{
+ ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345
+}
+
+check_output_files()
+{
+ local f1="$1"
+ local f2="$2"
+ local err="$3"
+
+ if ! cmp "$f1" "$f2" ; then
+ echo "FAIL: $err: input and output file differ" 1>&2
+ echo -n " Input file" 1>&2
+ ls -l "$f1" 1>&2
+ echo -n "Output file" 1>&2
+ ls -l "$f2" 1>&2
+ ret=1
+ fi
+}
+
+test_sctp_forward()
+{
+ ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet sctpq {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ sctp dport 12345 queue num 10
+ }
+}
+EOF
+ timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
+ local nfqpid=$!
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
+}
+
+test_sctp_output()
+{
+ ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+table inet sctpq {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ sctp dport 12345 queue num 11
+ }
+}
+EOF
+ # reduce test file size, software segmentation causes sk wmem increase.
+ dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
+
+ timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
+
+ ip netns exec "$ns1" ./nf_queue -q 11 &
+ local nfqpid=$!
+
+ ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ if ! ip netns exec "$ns1" nft delete table inet sctpq; then
+ echo "FAIL: Could not delete sctpq table"
+ exit 1
+ fi
+
+ # must wait before checking completeness of output file.
+ wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+ kill "$nfqpid"
+
+ check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
}
test_queue_removal()
@@ -388,7 +486,7 @@ table ip filter {
}
}
EOF
- ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+ ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 &
local nfqpid=$!
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
@@ -443,11 +541,16 @@ test_queue 10
# same. We queue to a second program as well.
load_ruleset "filter2" 20
test_queue 20
+ip netns exec "$ns1" nft flush ruleset
test_tcp_forward
test_tcp_localhost
test_tcp_localhost_connectclose
test_tcp_localhost_requeue
+test_sctp_forward
+test_sctp_output
+
+# should be last, adds vrf device in ns1 and changes routes
test_icmp_vrf
test_queue_removal
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
new file mode 100644
index 000000000000..31cfb666ba8b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := ksft_runner.sh \
+ defaults.sh \
+ set_sysctls.py \
+ ../../kselftest/ktap_helpers.sh
+
+TEST_PROGS := $(wildcard *.pkt)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
new file mode 100644
index 000000000000..0237ed98f3c0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -0,0 +1,11 @@
+CONFIG_IPV6=y
+CONFIG_HZ_1000=y
+CONFIG_HZ=1000
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_FIFO=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_PROC_SYSCTL=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TCP_CONG_CUBIC=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TUN=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
new file mode 100755
index 000000000000..1095a7b22f44
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Set standard production config values that relate to TCP behavior.
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP timestamps.
+sysctl -q net.ipv4.tcp_timestamps=1
+
+# TCP SYN(ACK) retry thresholds
+sysctl -q net.ipv4.tcp_syn_retries=5
+sysctl -q net.ipv4.tcp_synack_retries=5
+
+# TCP Forward RTO-Recovery, RFC 5682.
+sysctl -q net.ipv4.tcp_frto=2
+
+# TCP Selective Acknowledgements (SACK)
+sysctl -q net.ipv4.tcp_sack=1
+
+# TCP Duplicate Selective Acknowledgements (DSACK)
+sysctl -q net.ipv4.tcp_dsack=1
+
+# TCP FACK (Forward Acknowldgement)
+sysctl -q net.ipv4.tcp_fack=0
+
+# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery).
+sysctl -q net.ipv4.tcp_reordering=3
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP RACK and TLP.
+sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1
+
+# TCP method for deciding when to defer sending to accumulate big TSO packets.
+sysctl -q net.ipv4.tcp_tso_win_divisor=3
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
+sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+sysctl -q net.ipv4.tcp_fastopen=0x70403
+sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
+
+sysctl -q net.ipv4.tcp_syncookies=1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
new file mode 100755
index 000000000000..7478c0c0c9aa
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
+
+readonly ipv4_args=('--ip_version=ipv4 '
+ '--local_ip=192.168.0.1 '
+ '--gateway_ip=192.168.0.1 '
+ '--netmask_ip=255.255.0.0 '
+ '--remote_ip=192.0.2.1 '
+ '-D CMSG_LEVEL_IP=SOL_IP '
+ '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
+
+readonly ipv6_args=('--ip_version=ipv6 '
+ '--mtu=1520 '
+ '--local_ip=fd3d:0a0b:17d6::1 '
+ '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
+ '--remote_ip=fd3d:fa7b:d17d::1 '
+ '-D CMSG_LEVEL_IP=SOL_IPV6 '
+ '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+
+if [ $# -ne 1 ]; then
+ ktap_exit_fail_msg "usage: $0 <script>"
+ exit "$KSFT_FAIL"
+fi
+script="$1"
+
+if [ -z "$(which packetdrill)" ]; then
+ ktap_skip_all "packetdrill not found in PATH"
+ exit "$KSFT_SKIP"
+fi
+
+ktap_print_header
+ktap_set_plan 2
+
+unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \
+ && ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \
+ && ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+
+ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py
new file mode 100755
index 000000000000..5ddf456ae973
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Sets sysctl values and writes a file that restores them.
+
+The arguments are of the form "<proc-file>=<val>" separated by spaces.
+The program first reads the current value of the proc-file and creates
+a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which
+restores the values when executed. It then sets the new values.
+
+PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt
+file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh`
+at the end.
+"""
+
+import os
+import subprocess
+import sys
+
+filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID']
+
+# Open file for restoring sysctl values
+restore_file = open(filename, 'w')
+print('#!/bin/bash', file=restore_file)
+
+for a in sys.argv[1:]:
+ sysctl = a.split('=')
+ # sysctl[0] contains the proc-file name, sysctl[1] the new value
+
+ # read current value and add restore command to file
+ cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True)
+ print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file)
+
+ # set new value
+ cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0])
+ os.system(cmd)
+
+os.system('chmod u+x %s' % filename)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
new file mode 100644
index 000000000000..df49c67645ac
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the client side.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Connect to the server and enable TCP_INQ.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700>
+
+// We read 1K and we should have 9K ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=9000}]}, 0) = 1000
+// We read 9K and we should have no further data ready to read.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 9000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 9000
+
+// Server sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700>
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive EOF.
+ +0 read(3, ..., 2000) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
new file mode 100644
index 000000000000..04a5e2590c62
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_INQ and TCP_CM_INQ on the server side.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Accept the connection and enable TCP_INQ.
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0
+
+// Now we have 10K of data ready on the socket.
+ +0 < . 1:10001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 10001
+
+// We read 2K and we should have 8K ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=8000}]}, 0) = 2000
+// We read 8K and we should have no further data ready to read.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 8000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=0}]}, 0) = 8000
+// Client sends more data and closes the connections.
+ +0 < F. 10001:20001(10000) ack 1 win 514
+ +0 > . 1:1(0) ack 20002
+
+// We read 10K and we should have one "fake" byte because the connection is
+// closed.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 10000}],
+ msg_flags=0,
+ msg_control=[{cmsg_level=SOL_TCP,
+ cmsg_type=TCP_CM_INQ,
+ cmsg_data=1}]}, 0) = 10000
+// Now, receive error.
+ +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
new file mode 100644
index 000000000000..25dfef95d3f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test what happens when client does not provide MD5 on SYN,
+// but then does on the ACK that completes the three-way handshake.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// Ooh, weird: client provides MD5 option on the ACK:
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop>
+
+// The TCP listener refcount should be 2, but on buggy kernels it can be 0:
+ +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"`
+
+// Now here comes the legit ACK:
+ +.01 < . 1:1(0) ack 1 win 514
+
+// Make sure the connection is OK:
+ +0 accept(3, ..., ...) = 4
+
+ +.01 write(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
new file mode 100644
index 000000000000..795c476d222d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every packet.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 1001 win 257
+ +0 > P. 10001:12001(2000) ack 1
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 > P. 12001:14001(2000) ack 1
+
++.005 < . 1:1(0) ack 3001 win 257
+ +0 > P. 14001:16001(2000) ack 1
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 > P. 16001:18001(2000) ack 1
+
++.005 < . 1:1(0) ack 5001 win 257
+ +0 > P. 18001:20001(2000) ack 1
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 > P. 20001:22001(2000) ack 1
+
++.005 < . 1:1(0) ack 7001 win 257
+ +0 > P. 22001:24001(2000) ack 1
+
+ +0 < . 1:1(0) ack 8001 win 257
+ +0 > P. 24001:26001(2000) ack 1
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 > P. 26001:28001(2000) ack 1
+
+ +0 < . 1:1(0) ack 10001 win 257
+ +0 > P. 28001:30001(2000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
new file mode 100644
index 000000000000..9212ae1fd0f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, and not big enough to bump up cwnd.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 5 packets.
+ +0 write(4, ..., 5000) = 5000
+ +0 > P. 1:5001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 5001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
new file mode 100644
index 000000000000..416c901ddf51
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when an outstanding flight of packets is
+// less than the current cwnd, but still big enough that in slow
+// start we want to increase our cwnd a little.
+//
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+// Only send 6 packets.
+ +0 write(4, ..., 6000) = 6000
+ +0 > P. 1:6001(6000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
+
+ +0 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..a894b7d4559c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 > P. 10001:14001(4000) ack 1
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 > P. 14001:18001(4000) ack 1
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 > P. 18001:22001(4000) ack 1
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 > P. 22001:26001(4000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
new file mode 100644
index 000000000000..065fae9e9abd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow.
+// In this variant, the receiver sends one ACK per 4 packets.
+
+// Set up config. To keep things simple, disable the
+// mechanism that defers sending in order to send bigger TSO packets.
+`./defaults.sh
+sysctl -q net.ipv4.tcp_tso_win_divisor=100`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 30000) = 30000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.11 < . 1:1(0) ack 4001 win 257
+ +0 > P. 10001:18001(8000) ack 1
+
+ +.01 < . 1:1(0) ack 8001 win 257
+ +0 > P. 18001:26001(8000) ack 1
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 > P. 26001:30001(4000) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
new file mode 100644
index 000000000000..11b213be1138
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after idle
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +2 write(4, ..., 20000) = 20000
+// If slow start after idle works properly, we should send 5 MSS here (cwnd/2)
+ +0 > P. 26001:31001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
new file mode 100644
index 000000000000..577ed8c8852c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start after window update
+// This test expects tso size to be at least initial cwnd * mss
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \
+ /proc/sys/net/ipv4/tcp_min_tso_segs=10`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 511
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 26000) = 26000
+ +0 > P. 1:5001(5000) ack 1
+ +0 > P. 5001:10001(5000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +.1 < . 1:1(0) ack 10001 win 511
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
+ +0 > P. 10001:20001(10000) ack 1
+ +0 > P. 20001:26001(6000) ack 1
+
+ +.1 < . 1:1(0) ack 26001 win 0
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +0 write(4, ..., 20000) = 20000
+// 1st win0 probe
++.3~+.310 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 2nd win0 probe
++.6~+.620 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+// 3rd win0 probe
++1.2~+1.240 > . 26000:26000(0) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }%
+
+ +.9 < . 1:1(0) ack 26001 win 511
+ +0 > P. 26001:31001(5000) ack 1
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
new file mode 100644
index 000000000000..869f32c35a2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we don't fully use our cwnd but instead
+// send just 9 packets, then cwnd should grow to twice that
+// value, or 18 packets.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 9000) = 9000
+ +0 > P. 1:9001(9000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 9001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
new file mode 100644
index 000000000000..0f77b7955db6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when application-limited: in this case,
+// with IW10, if we send exactly 10 packets then cwnd should grow to 20.
+
+// Set up config.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 4001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 6001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 8001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
+
++.005 < . 1:1(0) ack 10001 win 257
+ +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
new file mode 100644
index 000000000000..7e9c83d617c2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test of slow start when not application-limited, so that
+// the cwnd continues to grow, even if TSQ triggers.
+// In this variant, the receiver ACKs every other packet,
+// approximating standard delayed ACKs.
+
+// Note we use FQ/pacing to check if TCP Small Queues is not hurting
+
+`./defaults.sh
+tc qdisc replace dev tun0 root fq
+sysctl -q net/ipv4/tcp_pacing_ss_ratio=200
+sysctl -e -q net.ipv4.tcp_min_tso_segs=2`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 500
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0
+
+ +0 write(4, ..., 40000) = 40000
+// This might change if we cook the initial packet with 10 MSS.
+ +0 > P. 1:2921(2920) ack 1
+ +0 > P. 2921:5841(2920) ack 1
+ +0 > P. 5841:8761(2920) ack 1
+ +0 > P. 8761:11681(2920) ack 1
+ +0 > P. 11681:14601(2920) ack 1
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
++.105 < . 1:1(0) ack 2921 win 500
+ +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }%
+
+// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts"
+// FQ notices that this packet missed the 'time to send next packet' computed
+// when prior packet (11681:14601(2920)) was sent.
+// So FQ will allow following packet to be sent a bit earlier (quantum/2)
+// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit)
+ +0 > P. 14601:17521(2920) ack 1
+
++.003 < . 1:1(0) ack 5841 win 500
+ +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }%
+
++.001 > P. 17521:20441(2920) ack 1
+
++.001 < . 1:1(0) ack 8761 win 500
+ +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }%
+
+// remaining packets are delivered at a constant rate.
++.007 > P. 20441:23361(2920) ack 1
+
++.002 < . 1:1(0) ack 11681 win 500
+ +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }%
++.001 < . 1:1(0) ack 14601 win 500
+
++.004 > P. 23361:26281(2920) ack 1
+
++.007 > P. 26281:29201(2920) ack 1
+
+ +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
new file mode 100644
index 000000000000..a82c8899d36b
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+// basic zerocopy test:
+//
+// send a packet with MSG_ZEROCOPY and receive the notification ID
+// repeat and verify IDs are consecutive
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
new file mode 100644
index 000000000000..c01915e7f4a1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// batch zerocopy test:
+//
+// send multiple packets, then read one range of all notifications.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
new file mode 100644
index 000000000000..6509882932e9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Minimal client-side zerocopy test
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
new file mode 100644
index 000000000000..2cd78755cb2a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// send with MSG_ZEROCOPY on a non-established socket
+//
+// verify that a send in state TCP_CLOSE correctly aborts the zerocopy
+// operation, specifically it does not increment the zerocopy counter.
+//
+// First send on a closed socket and wait for (absent) notification.
+// Then connect and send and verify that notification nr. is zero.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe)
+
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +0...0 connect(4, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8>
+ +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
new file mode 100644
index 000000000000..7671c20e01cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
new file mode 100644
index 000000000000..fadc480fdb7f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but
+// it is not level-triggered either. this tests verify that the same behavior is
+// maintained when we have EPOLLEXCLUSIVE.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLET is set. send another packet with
+// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive only one EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
new file mode 100644
index 000000000000..5bfa0d1d2f4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// epoll zerocopy test:
+//
+// This is a test to confirm that EPOLLERR is only fired once for an FD when
+// EPOLLONESHOT is set.
+//
+// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
+// is correctly fired only once, when EPOLLONESHOT is set. send another packet
+// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
+// confirm that EPOLLERR is correctly set.
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 epoll_create(1) = 5
+ +0 epoll_ctl(5, EPOLL_CTL_ADD, 4,
+ {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 1:4001(4000) ack 1
+ +0 < . 1:1(0) ack 4001 win 257
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 4001:8001(4000) ack 1
+ +0 < . 1:1(0) ack 8001 win 257
+
+// receive only one EPOLLERR for the two sends above.
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000
+ +0 > P. 8001:12001(4000) ack 1
+ +0 < . 1:1(0) ack 12001 win 257
+
+// receive no EPOLLERR for the third send above.
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+// rearm the FD and verify the EPOLLERR is fired again.
+ +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0
+ +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1
+ +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0
+
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=2}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
new file mode 100644
index 000000000000..4a73bbf46961
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen client zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+//
+// Fastopen requires a stored cookie. Create two sockets. The first
+// one will have no data in the initial send. On return 0 the
+// zerocopy notification counter is not incremented. Verify this too.
+
+`./defaults.sh`
+
+// Send a FastOpen request, no cookie yet so no data in SYN
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop>
+ +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000>
+
+// Read from error queue: no zerocopy notification
+ +1 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable)
+
+ +.01 close(3) = 0
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000>
+ +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002>
+ +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001>
+
+// Send another Fastopen request, now SYN will have data
+ +.07 `sysctl -q net.ipv4.tcp_timestamps=0`
+ +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
+ +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500
+ +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop>
+ +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6>
+ +0 > . 501:501(0) ack 1
+
+// Read from error queue: now has first zerocopy notification
+ +0.5 recvmsg(5, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
new file mode 100644
index 000000000000..36086c5877ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+// Fastopen server zerocopy test:
+//
+// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
+// kernel returns the notification ID.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
+
+// Set up a TFO server listening socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+// Client sends a SYN with data.
+ +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+
+// Server accepts and replies with data.
++.005 accept(3, ..., ...) = 4
+ +0 read(4, ..., 1024) = 1000
+ +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000
+ +0 > P. 1:1001(1000) ack 1001
+ +.05 < . 1001:1001(0) ack 1001 win 32792
+
+// Read from error queue: now has first zerocopy notification
+ +0.1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
new file mode 100644
index 000000000000..672f817faca0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+// tcp_MAX_SKB_FRAGS test
+//
+// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will
+// 1) fit in a single packet without zerocopy
+// 2) spill over into a second packet with zerocopy,
+// because each iovec element becomes a frag
+// 3) the PSH bit is set on an skb when it runs out of fragments
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+
+ // Each pinned zerocopy page is fully accounted to skb->truesize.
+ // This test generates a worst case packet with each frag storing
+ // one byte, but increasing truesize with a page (64KB on PPC).
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send an iov of 18 elements: just becomes a linear skb
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, 0) = 18
+
+ +0 > P. 1:19(18) ack 1
+ +0 < . 1:1(0) ack 19 win 257
+
+ // send a zerocopy iov of 18 elements:
+ +1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 18
+
+ // verify that it is split in one skb of 17 frags + 1 of 1 frag
+ // verify that both have the PSH bit set
+ +0 > P. 19:36(17) ack 1
+ +0 < . 1:1(0) ack 36 win 257
+
+ +0 > P. 36:37(1) ack 1
+ +0 < . 1:1(0) ack 37 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send a zerocopy iov of 64 elements:
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1},
+ {..., 1}, {..., 1}, {..., 1}, {..., 1}],
+ msg_flags=0}, MSG_ZEROCOPY) = 64
+
+ // verify that it is split in skbs with 17 frags
+ +0 > P. 37:54(17) ack 1
+ +0 < . 1:1(0) ack 54 win 257
+
+ +0 > P. 54:71(17) ack 1
+ +0 < . 1:1(0) ack 71 win 257
+
+ +0 > P. 71:88(17) ack 1
+ +0 < . 1:1(0) ack 88 win 257
+
+ +0 > P. 88:101(13) ack 1
+ +0 < . 1:1(0) ack 101 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
new file mode 100644
index 000000000000..a9a1ac0aea4f
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// small packet zerocopy test:
+//
+// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
+// packets of all sizes, including the smallest payload, 1B.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ // send 1B
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 1:2(1) ack 1
+ +0 < . 1:1(0) ack 2 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=0,
+ ee_data=0}}
+ ]}, MSG_ERRQUEUE) = 0
+
+ // send 1B again
+ +0 send(4, ..., 1, MSG_ZEROCOPY) = 1
+ +0 > P. 2:3(1) ack 1
+ +0 < . 1:1(0) ack 3 win 257
+
+ +1 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE,
+ msg_control=[
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=0,
+ ee_origin=SO_EE_ORIGIN_ZEROCOPY,
+ ee_type=0,
+ ee_code=SO_EE_CODE_ZEROCOPY_COPIED,
+ ee_info=1,
+ ee_data=1}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 5175c0c83a23..569bce8b6383 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -681,13 +681,7 @@ setup_xfrm() {
}
setup_nettest_xfrm() {
- if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- return 1
- fi
- fi
+ check_gen_prog "nettest"
[ ${1} -eq 6 ] && proto="-6" || proto=""
port=${2}
@@ -1447,7 +1441,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
size=$(du -sb $tmpoutfile)
size=${size%%/tmp/*}
- [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1
done
rm -f "$tmpoutfile"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 1a736f700be4..4f31e92ebd96 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -165,9 +165,9 @@ static void sock_fanout_set_ebpf(int fd)
attr.insns = (unsigned long) prog;
attr.insn_cnt = ARRAY_SIZE(prog);
attr.license = (unsigned long) "GPL";
- attr.log_buf = (unsigned long) log_buf,
- attr.log_size = sizeof(log_buf),
- attr.log_level = 1,
+ attr.log_buf = (unsigned long) log_buf;
+ attr.log_size = sizeof(log_buf);
+ attr.log_level = 1;
pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
if (pfd < 0) {
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
new file mode 100644
index 000000000000..da9714bc7aad
--- /dev/null
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+all:
+ @echo mk_build_dir="$(shell pwd)" > include.sh
+
+TEST_PROGS := run.sh \
+ include.sh \
+ test.py
+
+EXTRA_CLEAN := /tmp/rds_logs
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt
new file mode 100644
index 000000000000..cbde2951ab13
--- /dev/null
+++ b/tools/testing/selftests/net/rds/README.txt
@@ -0,0 +1,41 @@
+RDS self-tests
+==============
+
+These scripts provide a coverage test for RDS-TCP by creating two
+network namespaces and running rds packets between them. A loopback
+network is provisioned with optional probability of packet loss or
+corruption. A workload of 50000 hashes, each 64 characters in size,
+are passed over an RDS socket on this test network. A passing test means
+the RDS-TCP stack was able to recover properly. The provided config.sh
+can be used to compile the kernel with the necessary gcov options. The
+kernel may optionally be configured to omit the coverage report as well.
+
+USAGE:
+ run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]
+ [-u packet_duplcate]
+
+OPTIONS:
+ -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs
+
+ -l Simulates a percentage of packet loss
+
+ -c Simulates a percentage of packet corruption
+
+ -u Simulates a percentage of packet duplication.
+
+EXAMPLE:
+
+ # Create a suitable gcov enabled .config
+ tools/testing/selftests/net/rds/config.sh -g
+
+ # Alternatly create a gcov disabled .config
+ tools/testing/selftests/net/rds/config.sh
+
+ # build the kernel
+ vng --build --config tools/testing/selftests/net/config
+
+ # launch the tests in a VM
+ vng -v --rwdir ./ --run . --user root --cpus 4 -- \
+ "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh"
+
+An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/.
diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh
new file mode 100755
index 000000000000..791c8dbe1095
--- /dev/null
+++ b/tools/testing/selftests/net/rds/config.sh
@@ -0,0 +1,53 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -x
+
+unset KBUILD_OUTPUT
+
+GENERATE_GCOV_REPORT=0
+while getopts "g" opt; do
+ case ${opt} in
+ g)
+ GENERATE_GCOV_REPORT=1
+ ;;
+ :)
+ echo "USAGE: config.sh [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+CONF_FILE="tools/testing/selftests/net/config"
+
+# no modules
+scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES
+
+# enable RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS
+scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ # instrument RDS and only RDS
+ scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS
+else
+ scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL
+ scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS
+fi
+
+# need network namespaces to run tests with veth network interfaces
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS
+scripts/config --file "$CONF_FILE" --enable CONFIG_VETH
+
+# simulate packet loss
+scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM
+
diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh
new file mode 100755
index 000000000000..8aee244f582a
--- /dev/null
+++ b/tools/testing/selftests/net/rds/run.sh
@@ -0,0 +1,224 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+
+unset KBUILD_OUTPUT
+
+current_dir="$(realpath "$(dirname "$0")")"
+build_dir="$current_dir"
+
+build_include="$current_dir/include.sh"
+if test -f "$build_include"; then
+ # this include will define "$mk_build_dir" as the location the test was
+ # built. We will need this if the tests are installed in a location
+ # other than the kernel source
+
+ source "$build_include"
+ build_dir="$mk_build_dir"
+fi
+
+# This test requires kernel source and the *.gcda data therein
+# Locate the top level of the kernel source, and the net/rds
+# subfolder with the appropriate *.gcno object files
+ksrc_dir="$(realpath "$build_dir"/../../../../../)"
+kconfig="$ksrc_dir/.config"
+obj_dir="$ksrc_dir/net/rds"
+
+GCOV_CMD=gcov
+
+#check to see if the host has the required packages to generate a gcov report
+check_gcov_env()
+{
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find gcov. "
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ # the gcov version must match the gcc version
+ GCC_VER=$(gcc -dumpfullversion)
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ #attempt to find a matching gcov version
+ GCOV_CMD=gcov-$(gcc -dumpversion)
+
+ if ! which "$GCOV_CMD" > /dev/null 2>&1; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ return
+ fi
+
+ #recheck version number of found gcov executable
+ GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \
+ awk 'BEGIN {FS="-"}{print $1}')
+ if [ "$GCOV_VER" != "$GCC_VER" ]; then
+ echo "Warning: Could not find an appropriate gcov installation. \
+ gcov version must match gcc version"
+ GENERATE_GCOV_REPORT=0
+ else
+ echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD"
+ fi
+ fi
+}
+
+# Check to see if the kconfig has the required configs to generate a coverage report
+check_gcov_conf()
+{
+ if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+ if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then
+ echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports"
+ GENERATE_GCOV_REPORT=0
+ fi
+
+ if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then
+ echo "To enable gcov reports, please run "\
+ "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel"
+ else
+ # if we have the required kernel configs, proceed to check the environment to
+ # ensure we have the required gcov packages
+ check_gcov_env
+ fi
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_conf_enabled() {
+ if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 enabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf_disabled() {
+ if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then
+ echo "selftests: [SKIP] This test requires $1 disabled"
+ echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel"
+ exit 4
+ fi
+}
+check_conf() {
+ check_conf_enabled CONFIG_NET_SCH_NETEM
+ check_conf_enabled CONFIG_VETH
+ check_conf_enabled CONFIG_NET_NS
+ check_conf_enabled CONFIG_RDS_TCP
+ check_conf_enabled CONFIG_RDS
+ check_conf_disabled CONFIG_MODULES
+}
+
+check_env()
+{
+ if ! test -d "$obj_dir"; then
+ echo "selftests: [SKIP] This test requires a kernel source tree"
+ exit 4
+ fi
+ if ! test -e "$kconfig"; then
+ echo "selftests: [SKIP] This test requires a configured kernel source tree"
+ exit 4
+ fi
+ if ! which strace > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without strace"
+ exit 4
+ fi
+ if ! which tcpdump > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+
+ if ! which python3 > /dev/null 2>&1; then
+ echo "selftests: [SKIP] Could not run test without python3"
+ exit 4
+ fi
+
+ python_major=$(python3 -c "import sys; print(sys.version_info[0])")
+ python_minor=$(python3 -c "import sys; print(sys.version_info[1])")
+ if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then
+ echo "selftests: [SKIP] Could not run test without at least python3.9"
+ python3 -V
+ exit 4
+ fi
+}
+
+LOG_DIR="$current_dir"/rds_logs
+PLOSS=0
+PCORRUPT=0
+PDUP=0
+GENERATE_GCOV_REPORT=1
+while getopts "d:l:c:u:" opt; do
+ case ${opt} in
+ d)
+ LOG_DIR=${OPTARG}
+ ;;
+ l)
+ PLOSS=${OPTARG}
+ ;;
+ c)
+ PCORRUPT=${OPTARG}
+ ;;
+ u)
+ PDUP=${OPTARG}
+ ;;
+ :)
+ echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \
+ "[-u packet_duplcate] [-g]"
+ exit 1
+ ;;
+ ?)
+ echo "Invalid option: -${OPTARG}."
+ exit 1
+ ;;
+ esac
+done
+
+
+check_env
+check_conf
+check_gcov_conf
+
+
+rm -fr "$LOG_DIR"
+TRACE_FILE="${LOG_DIR}/rds-strace.txt"
+COVR_DIR="${LOG_DIR}/coverage/"
+mkdir -p "$LOG_DIR"
+mkdir -p "$COVR_DIR"
+
+set +e
+echo running RDS tests...
+echo Traces will be logged to "$TRACE_FILE"
+rm -f "$TRACE_FILE"
+strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \
+ -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP"
+
+test_rc=$?
+dmesg > "${LOG_DIR}/dmesg.out"
+
+if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then
+ echo saving coverage data...
+ (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \
+ while read -r f
+ do
+ cat < "/sys/kernel/debug/gcov/$f" > "/$f"
+ done)
+
+ echo running gcovr...
+ gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \
+ -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/"
+else
+ echo "Coverage report will be skipped"
+fi
+
+if [ "$test_rc" -eq 0 ]; then
+ echo "PASS: Test completed successfully"
+else
+ echo "FAIL: Test failed"
+fi
+
+exit "$test_rc"
diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py
new file mode 100644
index 000000000000..e6bb109bcead
--- /dev/null
+++ b/tools/testing/selftests/net/rds/test.py
@@ -0,0 +1,262 @@
+#! /usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import argparse
+import ctypes
+import errno
+import hashlib
+import os
+import select
+import signal
+import socket
+import subprocess
+import sys
+import atexit
+from pwd import getpwuid
+from os import stat
+from lib.py import ip
+
+
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+setns = libc.setns
+
+net0 = 'net0'
+net1 = 'net1'
+
+veth0 = 'veth0'
+veth1 = 'veth1'
+
+# Helper function for creating a socket inside a network namespace.
+# We need this because otherwise RDS will detect that the two TCP
+# sockets are on the same interface and use the loop transport instead
+# of the TCP transport.
+def netns_socket(netns, *args):
+ u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET)
+
+ child = os.fork()
+ if child == 0:
+ # change network namespace
+ with open(f'/var/run/netns/{netns}') as f:
+ try:
+ ret = setns(f.fileno(), 0)
+ except IOError as e:
+ print(e.errno)
+ print(e)
+
+ # create socket in target namespace
+ s = socket.socket(*args)
+
+ # send resulting socket to parent
+ socket.send_fds(u0, [], [s.fileno()])
+
+ sys.exit(0)
+
+ # receive socket from child
+ _, s, _, _ = socket.recv_fds(u1, 0, 1)
+ os.waitpid(child, 0)
+ u0.close()
+ u1.close()
+ return socket.fromfd(s[0], *args)
+
+def signal_handler(sig, frame):
+ print('Test timed out')
+ sys.exit(1)
+
+#Parse out command line arguments. We take an optional
+# timeout parameter and an optional log output folder
+parser = argparse.ArgumentParser(description="init script args",
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+parser.add_argument("-d", "--logdir", action="store",
+ help="directory to store logs", default="/tmp")
+parser.add_argument('--timeout', help="timeout to terminate hung test",
+ type=int, default=0)
+parser.add_argument('-l', '--loss', help="Simulate tcp packet loss",
+ type=int, default=0)
+parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption",
+ type=int, default=0)
+parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication",
+ type=int, default=0)
+args = parser.parse_args()
+logdir=args.logdir
+packet_loss=str(args.loss)+'%'
+packet_corruption=str(args.corruption)+'%'
+packet_duplicate=str(args.duplicate)+'%'
+
+ip(f"netns add {net0}")
+ip(f"netns add {net1}")
+ip(f"link add type veth")
+
+addrs = [
+ # we technically don't need different port numbers, but this will
+ # help identify traffic in the network analyzer
+ ('10.0.0.1', 10000),
+ ('10.0.0.2', 20000),
+]
+
+# move interfaces to separate namespaces so they can no longer be
+# bound directly; this prevents rds from switching over from the tcp
+# transport to the loop transport.
+ip(f"link set {veth0} netns {net0} up")
+ip(f"link set {veth1} netns {net1} up")
+
+
+
+# add addresses
+ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}")
+ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}")
+
+# add routes
+ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}")
+ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}")
+
+# sanity check that our two interfaces/addresses are correctly set up
+# and communicating by doing a single ping
+ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}")
+
+# Start a packet capture on each network
+for net in [net0, net1]:
+ tcpdump_pid = os.fork()
+ if tcpdump_pid == 0:
+ pcap = logdir+'/'+net+'.pcap'
+ subprocess.check_call(['touch', pcap])
+ user = getpwuid(stat(pcap).st_uid).pw_name
+ ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}")
+ sys.exit(0)
+
+# simulate packet loss, duplication and corruption
+for net, iface in [(net0, veth0), (net1, veth1)]:
+ ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \
+ corrupt {packet_corruption} loss {packet_loss} duplicate \
+ {packet_duplicate}")
+
+# add a timeout
+if args.timeout > 0:
+ signal.alarm(args.timeout)
+ signal.signal(signal.SIGALRM, signal_handler)
+
+sockets = [
+ netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET),
+ netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET),
+]
+
+for s, addr in zip(sockets, addrs):
+ s.bind(addr)
+ s.setblocking(0)
+
+fileno_to_socket = {
+ s.fileno(): s for s in sockets
+}
+
+addr_to_socket = {
+ addr: s for addr, s in zip(addrs, sockets)
+}
+
+socket_to_addr = {
+ s: addr for addr, s in zip(addrs, sockets)
+}
+
+send_hashes = {}
+recv_hashes = {}
+
+ep = select.epoll()
+
+for s in sockets:
+ ep.register(s, select.EPOLLRDNORM)
+
+n = 50000
+nr_send = 0
+nr_recv = 0
+
+while nr_send < n:
+ # Send as much as we can without blocking
+ print("sending...", nr_send, nr_recv)
+ while nr_send < n:
+ send_data = hashlib.sha256(
+ f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8')
+
+ # pseudo-random send/receive pattern
+ sender = sockets[nr_send % 2]
+ receiver = sockets[1 - (nr_send % 3) % 2]
+
+ try:
+ sender.sendto(send_data, socket_to_addr[receiver])
+ send_hashes.setdefault((sender.fileno(), receiver.fileno()),
+ hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8'))
+ nr_send = nr_send + 1
+ except BlockingIOError as e:
+ break
+ except OSError as e:
+ if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]:
+ break
+ raise
+
+ # Receive as much as we can without blocking
+ print("receiving...", nr_send, nr_recv)
+ while nr_recv < nr_send:
+ for fileno, eventmask in ep.poll():
+ receiver = fileno_to_socket[fileno]
+
+ if eventmask & select.EPOLLRDNORM:
+ while True:
+ try:
+ recv_data, address = receiver.recvfrom(1024)
+ sender = addr_to_socket[address]
+ recv_hashes.setdefault((sender.fileno(),
+ receiver.fileno()), hashlib.sha256()).update(
+ f'<{recv_data}>'.encode('utf-8'))
+ nr_recv = nr_recv + 1
+ except BlockingIOError as e:
+ break
+
+ # exercise net/rds/tcp.c:rds_tcp_sysctl_reset()
+ for net in [net0, net1]:
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000")
+ ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000")
+
+print("done", nr_send, nr_recv)
+
+# the Python socket module doesn't know these
+RDS_INFO_FIRST = 10000
+RDS_INFO_LAST = 10017
+
+nr_success = 0
+nr_error = 0
+
+for s in sockets:
+ for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1):
+ # Sigh, the Python socket module doesn't allow us to pass
+ # buffer lengths greater than 1024 for some reason. RDS
+ # wants multiple pages.
+ try:
+ s.getsockopt(socket.SOL_RDS, optname, 1024)
+ nr_success = nr_success + 1
+ except OSError as e:
+ nr_error = nr_error + 1
+ if e.errno == errno.ENOSPC:
+ # ignore
+ pass
+
+print(f"getsockopt(): {nr_success}/{nr_error}")
+
+print("Stopping network packet captures")
+subprocess.check_call(['killall', '-q', 'tcpdump'])
+
+# We're done sending and receiving stuff, now let's check if what
+# we received is what we sent.
+for (sender, receiver), send_hash in send_hashes.items():
+ recv_hash = recv_hashes.get((sender, receiver))
+
+ if recv_hash is None:
+ print("FAIL: No data received")
+ sys.exit(1)
+
+ if send_hash.hexdigest() != recv_hash.hexdigest():
+ print("FAIL: Send/recv mismatch")
+ print("hash expected:", send_hash.hexdigest())
+ print("hash received:", recv_hash.hexdigest())
+ sys.exit(1)
+
+ print(f"{sender}/{receiver}: ok")
+
+print("Success")
+sys.exit(0)
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 9eb42570294d..16ac4df55fdb 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = {
SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE),
SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE),
+ SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER),
+ SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE),
};
static struct socket_type socket_types[] = {
@@ -98,6 +100,22 @@ static struct test_case test_cases[] = {
{}
},
{
+ { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ {}
+ },
+ {
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
+ | SOF_TIMESTAMPING_RX_SOFTWARE
+ | SOF_TIMESTAMPING_OPT_RX_FILTER },
+ { .swtstamp = true }
+ },
+ {
{ .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
{ .swtstamp = true }
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
new file mode 100644
index 000000000000..d87dd8d8d491
--- /dev/null
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include "../kselftest.h"
+
+static char *afstr(int af, int proto)
+{
+ if (proto == IPPROTO_TCP)
+ return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6";
+ else
+ return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6";
+}
+
+int sk_peek_offset_probe(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ int optv = 0;
+ int ret = 0;
+ int s;
+
+ s = socket(af, type, proto);
+ if (s < 0) {
+ ksft_perror("Temporary TCP socket creation failed");
+ } else {
+ if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int)))
+ ret = 1;
+ else
+ printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto));
+ close(s);
+ }
+ return ret;
+}
+
+static void sk_peek_offset_set(int s, int offset)
+{
+ if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset)))
+ ksft_perror("Failed to set SO_PEEK_OFF value\n");
+}
+
+static int sk_peek_offset_get(int s)
+{
+ int offset;
+ socklen_t len = sizeof(offset);
+
+ if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len))
+ ksft_perror("Failed to get SO_PEEK_OFF value\n");
+ return offset;
+}
+
+static int sk_peek_offset_test(sa_family_t af, int proto)
+{
+ int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM);
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in a4;
+ struct sockaddr_in6 a6;
+ } a;
+ int res = 0;
+ int s[2] = {0, 0};
+ int recv_sock = 0;
+ int offset = 0;
+ ssize_t len;
+ char buf[2];
+
+ memset(&a, 0, sizeof(a));
+ a.sa.sa_family = af;
+
+ s[0] = recv_sock = socket(af, type, proto);
+ s[1] = socket(af, type, proto);
+
+ if (s[0] < 0 || s[1] < 0) {
+ ksft_perror("Temporary socket creation failed\n");
+ goto out;
+ }
+ if (bind(s[0], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket bind() failed\n");
+ goto out;
+ }
+ if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) {
+ ksft_perror("Temporary socket getsockname() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) {
+ ksft_perror("Temporary socket listen() failed\n");
+ goto out;
+ }
+ if (connect(s[1], &a.sa, sizeof(a)) < 0) {
+ ksft_perror("Temporary socket connect() failed\n");
+ goto out;
+ }
+ if (proto == IPPROTO_TCP) {
+ recv_sock = accept(s[0], NULL, NULL);
+ if (recv_sock <= 0) {
+ ksft_perror("Temporary socket accept() failed\n");
+ goto out;
+ }
+ }
+
+ /* Some basic tests of getting/setting offset */
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != -1) {
+ ksft_perror("Initial value of socket offset not -1\n");
+ goto out;
+ }
+ sk_peek_offset_set(recv_sock, 0);
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Failed to set socket offset to 0\n");
+ goto out;
+ }
+
+ /* Transfer a message */
+ if (send(s[1], (char *)("ab"), 2, 0) != 2) {
+ ksft_perror("Temporary probe socket send() failed\n");
+ goto out;
+ }
+ /* Read first byte */
+ len = recv(recv_sock, buf, 1, MSG_PEEK);
+ if (len != 1 || buf[0] != 'a') {
+ ksft_perror("Failed to read first byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 1) {
+ ksft_perror("Offset not forwarded correctly at first byte\n");
+ goto out;
+ }
+ /* Try to read beyond last byte */
+ len = recv(recv_sock, buf, 2, MSG_PEEK);
+ if (len != 1 || buf[0] != 'b') {
+ ksft_perror("Failed to read last byte of message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 2) {
+ ksft_perror("Offset not forwarded correctly at last byte\n");
+ goto out;
+ }
+ /* Flush message */
+ len = recv(recv_sock, buf, 2, MSG_TRUNC);
+ if (len != 2) {
+ ksft_perror("Failed to flush message\n");
+ goto out;
+ }
+ offset = sk_peek_offset_get(recv_sock);
+ if (offset != 0) {
+ ksft_perror("Offset not reverted correctly after flush\n");
+ goto out;
+ }
+
+ printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto));
+ res = 1;
+out:
+ if (proto == IPPROTO_TCP && recv_sock >= 0)
+ close(recv_sock);
+ if (s[1] >= 0)
+ close(s[1]);
+ if (s[0] >= 0)
+ close(s[0]);
+ return res;
+}
+
+static int do_test(int proto)
+{
+ int res4, res6;
+
+ res4 = sk_peek_offset_probe(AF_INET, proto);
+ res6 = sk_peek_offset_probe(AF_INET6, proto);
+
+ if (!res4 && !res6)
+ return KSFT_SKIP;
+
+ if (res4)
+ res4 = sk_peek_offset_test(AF_INET, proto);
+
+ if (res6)
+ res6 = sk_peek_offset_test(AF_INET6, proto);
+
+ if (!res4 || !res6)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+int main(void)
+{
+ int restcp, resudp;
+
+ restcp = do_test(IPPROTO_TCP);
+ resudp = do_test(IPPROTO_UDP);
+ if (restcp == KSFT_FAIL || resudp == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index bd88b90b902b..5b0205c70c39 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -31,7 +31,8 @@ CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -iquote ./lib/ -I ../../../../include/
# Library
-LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c
+LIBSRC += proc.c repair.c setup.c sock.c utils.c
LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o)
EXTRA_CLEAN += $(LIBOBJ) $(LIB)
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
index a1e6e007c291..6736484996a3 100644
--- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -355,6 +355,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(30, server_fn, client_fn);
+ test_init(31, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index d3277a9de987..3605e38711cb 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_TCP_AO=y
CONFIG_TCP_MD5SIG=y
+CONFIG_TRACEPOINTS=y
CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index 185a2f6e5ff3..d418162d335f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -71,10 +71,12 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
}
}
+ synchronize_threads(); /* before counter checks */
if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
test_error("test_get_tcp_ao_counters()");
close(lsk);
+
if (pwd)
test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
@@ -84,10 +86,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -180,6 +182,7 @@ static void try_connect(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* before counter checks */
if (ret < 0) {
if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
test_ok("%s: connect() was prevented", tst_name);
@@ -212,30 +215,44 @@ out:
static void *client_fn(void *arg)
{
- union tcp_addr wrong_addr, network_addr;
+ union tcp_addr wrong_addr, network_addr, addr_any = {};
unsigned int port = test_server_port;
if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
test_error("Can't convert ip address %s", TEST_WRONG_IP);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server + Non-AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
+ port, 0, 100, 100);
try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
@@ -259,6 +276,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(21, server_fn, client_fn);
+ test_init(22, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index 81653b47f303..f1d8d29e393f 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -67,14 +67,14 @@ static void *client_fn(void *arg)
netstat_free(ns_after);
if (nr_packets > (after_aogood - before_aogood)) {
- test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+ test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")",
nr_packets, after_aogood, before_aogood);
return NULL;
}
if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
return NULL;
- test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+ test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
before_aogood, ao1.ao_info_pkt_good,
ao1.key_cnts[0].pkt_good,
after_aogood, ao2.ao_info_pkt_good,
@@ -85,6 +85,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(1, server_fn, client_fn);
+ test_init(2, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index d69bcba3c929..a1614f0d8c44 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -444,6 +444,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(3, server_fn, client_fn);
+ test_init(4, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index 24e62120b792..d4385b52c10b 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
synchronize_threads(); /* 5: counters */
}
-static void try_unmatched_keys(int sk, int *rnext_index)
+static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
{
struct test_key *key;
unsigned int i = 0;
@@ -1013,6 +1013,9 @@ static void try_unmatched_keys(int sk, int *rnext_index)
test_error("all keys on server match the client");
if (test_set_key(sk, -1, key->server_keyid))
test_error("Can't change the current key");
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ -1, key->server_keyid, -1);
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("verify failed");
*rnext_index = i;
@@ -1054,6 +1057,10 @@ static void check_current_back(const char *tst_name, unsigned int port,
return;
if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
test_error("Can't change the current key");
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1,
+ collection.keys[rotate_to_index].client_keyid,
+ collection.keys[current_index].client_keyid, -1);
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("verify failed");
/* There is a race here: between setting the current_key with
@@ -1085,6 +1092,11 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
if (i >= collection.nr_keys)
i = 0;
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST,
+ this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1,
+ i == 0 ? -1 : collection.keys[i - 1].server_keyid,
+ collection.keys[i].server_keyid, -1);
if (test_set_key(sk, -1, collection.keys[i].server_keyid))
test_error("Can't change the Rnext key");
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
@@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port,
rnext_index, msg_len, nr_packets);
if (sk < 0)
return;
- try_unmatched_keys(sk, &rnext_index);
+ try_unmatched_keys(sk, &rnext_index, port);
end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
}
@@ -1181,6 +1193,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(120, server_fn, client_fn);
+ test_init(121, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index fbc7f6111815..db44e77428dd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -37,17 +37,58 @@ extern void __test_xfail(const char *buf);
extern void __test_error(const char *buf);
extern void __test_skip(const char *buf);
-__attribute__((__format__(__printf__, 2, 3)))
-static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+static inline char *test_snprintf(const char *fmt, va_list vargs)
{
-#define TEST_MSG_BUFFER_SIZE 4096
- char buf[TEST_MSG_BUFFER_SIZE];
- va_list arg;
-
- va_start(arg, fmt);
- vsnprintf(buf, sizeof(buf), fmt, arg);
- va_end(arg);
- fn(buf);
+ char *ret = NULL;
+ size_t size = 0;
+ va_list tmp;
+ int n = 0;
+
+ va_copy(tmp, vargs);
+ n = vsnprintf(ret, size, fmt, tmp);
+ if (n < 0)
+ return NULL;
+
+ size = n + 1;
+ ret = malloc(size);
+ if (!ret)
+ return NULL;
+
+ n = vsnprintf(ret, size, fmt, vargs);
+ if (n < 0 || n > size - 1) {
+ free(ret);
+ return NULL;
+ }
+ return ret;
+}
+
+static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...)
+{
+ va_list vargs;
+ char *ret;
+
+ va_start(vargs, fmt);
+ ret = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ return ret;
+}
+
+static __printf(2, 3) inline void __test_print(void (*fn)(const char *),
+ const char *fmt, ...)
+{
+ va_list vargs;
+ char *msg;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+
+ if (!msg)
+ return;
+
+ fn(msg);
+ free(msg);
}
#define test_print(fmt, ...) \
@@ -103,6 +144,7 @@ enum test_needs_kconfig {
KCONFIG_TCP_AO, /* required */
KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */
KCONFIG_NET_VRF, /* optional, for L3/VRF testing */
+ KCONFIG_FTRACE, /* optional, for tracepoints checks */
__KCONFIG_LAST__
};
extern bool kernel_config_has(enum test_needs_kconfig k);
@@ -142,6 +184,8 @@ static inline void test_init2(unsigned int ntests,
__test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
}
extern void test_add_destructor(void (*d)(void));
+extern void test_init_ftrace(int nsfd1, int nsfd2);
+extern int test_setup_tracing(void);
/* To adjust optmem socket limit, approximately estimate a number,
* that is bigger than sizeof(struct tcp_ao_key).
@@ -216,12 +260,17 @@ static inline void test_init(unsigned int ntests,
}
extern void synchronize_threads(void);
extern void switch_ns(int fd);
+extern int switch_save_ns(int fd);
+extern void switch_close_ns(int fd);
extern __thread union tcp_addr this_ip_addr;
extern __thread union tcp_addr this_ip_dest;
extern int test_family;
extern void randomize_buffer(void *buf, size_t buflen);
+extern __printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...);
+
extern int open_netns(void);
extern int unshare_open_netns(void);
extern const char veth_name[];
@@ -602,4 +651,115 @@ static inline int test_add_repaired_key(int sk,
return test_verify_socket_key(sk, &tmp);
}
+#define DEFAULT_FTRACE_BUFFER_KB 10000
+#define DEFAULT_TRACER_LINES_ARR 200
+struct test_ftracer;
+extern uint64_t ns_cookie1, ns_cookie2;
+
+enum ftracer_op {
+ FTRACER_LINE_DISCARD = 0,
+ FTRACER_LINE_PRESERVE,
+ FTRACER_EXIT,
+};
+
+extern struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb);
+extern int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter);
+extern void destroy_ftracer(struct test_ftracer *tracer);
+extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer);
+extern const char **tracer_get_savedlines(struct test_ftracer *tracer);
+
+enum trace_events {
+ /* TCP_HASH_EVENT */
+ TCP_HASH_BAD_HEADER = 0,
+ TCP_HASH_MD5_REQUIRED,
+ TCP_HASH_MD5_UNEXPECTED,
+ TCP_HASH_MD5_MISMATCH,
+ TCP_HASH_AO_REQUIRED,
+ /* TCP_AO_EVENT */
+ TCP_AO_HANDSHAKE_FAILURE,
+ TCP_AO_WRONG_MACLEN,
+ TCP_AO_MISMATCH,
+ TCP_AO_KEY_NOT_FOUND,
+ TCP_AO_RNEXT_REQUEST,
+ /* TCP_AO_EVENT_SK */
+ TCP_AO_SYNACK_NO_KEY,
+ /* TCP_AO_EVENT_SNE */
+ TCP_AO_SND_SNE_UPDATE,
+ TCP_AO_RCV_SNE_UPDATE,
+ __MAX_TRACE_EVENTS
+};
+
+extern int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne);
+
+static inline void trace_hash_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ -1, -1, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, L3index,
+ fin, syn, rst, psh, ack,
+ keyid, rnext, maclen, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sk_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port,
+ int keyid, int rnext)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ keyid, rnext, -1, -1);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+static inline void trace_ao_event_sne_expect(enum trace_events type,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int sne)
+{
+ int err;
+
+ err = __trace_event_expect(type, TEST_FAMILY, src, dst,
+ src_port, dst_port, -1,
+ -1, -1, -1, -1, -1,
+ -1, -1, -1, sne);
+ if (err)
+ test_error("Couldn't add a trace event: %d", err);
+}
+
+extern int setup_aolib_ftracer(void);
+
#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
new file mode 100644
index 000000000000..24380c68fec6
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include "aolib.h"
+
+static const char *trace_event_names[__MAX_TRACE_EVENTS] = {
+ /* TCP_HASH_EVENT */
+ "tcp_hash_bad_header",
+ "tcp_hash_md5_required",
+ "tcp_hash_md5_unexpected",
+ "tcp_hash_md5_mismatch",
+ "tcp_hash_ao_required",
+ /* TCP_AO_EVENT */
+ "tcp_ao_handshake_failure",
+ "tcp_ao_wrong_maclen",
+ "tcp_ao_mismatch",
+ "tcp_ao_key_not_found",
+ "tcp_ao_rnext_request",
+ /* TCP_AO_EVENT_SK */
+ "tcp_ao_synack_no_key",
+ /* TCP_AO_EVENT_SNE */
+ "tcp_ao_snd_sne_update",
+ "tcp_ao_rcv_sne_update"
+};
+
+struct expected_trace_point {
+ /* required */
+ enum trace_events type;
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+
+ /* optional */
+ int src_port;
+ int dst_port;
+ int L3index;
+
+ int fin;
+ int syn;
+ int rst;
+ int psh;
+ int ack;
+
+ int keyid;
+ int rnext;
+ int maclen;
+ int sne;
+
+ size_t matched;
+};
+
+static struct expected_trace_point *exp_tps;
+static size_t exp_tps_nr;
+static size_t exp_tps_size;
+static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int __trace_event_expect(enum trace_events type, int family,
+ union tcp_addr src, union tcp_addr dst,
+ int src_port, int dst_port, int L3index,
+ int fin, int syn, int rst, int psh, int ack,
+ int keyid, int rnext, int maclen, int sne)
+{
+ struct expected_trace_point new_tp = {
+ .type = type,
+ .family = family,
+ .src = src,
+ .dst = dst,
+ .src_port = src_port,
+ .dst_port = dst_port,
+ .L3index = L3index,
+ .fin = fin,
+ .syn = syn,
+ .rst = rst,
+ .psh = psh,
+ .ack = ack,
+ .keyid = keyid,
+ .rnext = rnext,
+ .maclen = maclen,
+ .sne = sne,
+ .matched = 0,
+ };
+ int ret = 0;
+
+ if (!kernel_config_has(KCONFIG_FTRACE))
+ return 0;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ if (exp_tps_nr == exp_tps_size) {
+ struct expected_trace_point *tmp;
+
+ if (exp_tps_size == 0)
+ exp_tps_size = 10;
+ else
+ exp_tps_size = exp_tps_size * 1.6;
+
+ tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0]));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ exp_tps = tmp;
+ }
+ exp_tps[exp_tps_nr] = new_tp;
+ exp_tps_nr++;
+out:
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return ret;
+}
+
+static void free_expected_events(void)
+{
+ /* We're from the process destructor - not taking the mutex */
+ exp_tps_size = 0;
+ exp_tps = NULL;
+ free(exp_tps);
+}
+
+struct trace_point {
+ int family;
+ union tcp_addr src;
+ union tcp_addr dst;
+ unsigned int src_port;
+ unsigned int dst_port;
+ int L3index;
+ unsigned int fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1;
+
+ unsigned int keyid;
+ unsigned int rnext;
+ unsigned int maclen;
+
+ unsigned int sne;
+};
+
+static bool lookup_expected_event(int event_type, struct trace_point *e)
+{
+ size_t i;
+
+ pthread_mutex_lock(&exp_tps_mutex);
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *p = &exp_tps[i];
+ size_t sk_size;
+
+ if (p->type != event_type)
+ continue;
+ if (p->family != e->family)
+ continue;
+ if (p->family == AF_INET)
+ sk_size = sizeof(p->src.a4);
+ else
+ sk_size = sizeof(p->src.a6);
+ if (memcmp(&p->src, &e->src, sk_size))
+ continue;
+ if (memcmp(&p->dst, &e->dst, sk_size))
+ continue;
+ if (p->src_port >= 0 && p->src_port != e->src_port)
+ continue;
+ if (p->dst_port >= 0 && p->dst_port != e->dst_port)
+ continue;
+ if (p->L3index >= 0 && p->L3index != e->L3index)
+ continue;
+
+ if (p->fin >= 0 && p->fin != e->fin)
+ continue;
+ if (p->syn >= 0 && p->syn != e->syn)
+ continue;
+ if (p->rst >= 0 && p->rst != e->rst)
+ continue;
+ if (p->psh >= 0 && p->psh != e->psh)
+ continue;
+ if (p->ack >= 0 && p->ack != e->ack)
+ continue;
+
+ if (p->keyid >= 0 && p->keyid != e->keyid)
+ continue;
+ if (p->rnext >= 0 && p->rnext != e->rnext)
+ continue;
+ if (p->maclen >= 0 && p->maclen != e->maclen)
+ continue;
+ if (p->sne >= 0 && p->sne != e->sne)
+ continue;
+ p->matched++;
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return true;
+ }
+ pthread_mutex_unlock(&exp_tps_mutex);
+ return false;
+}
+
+static int check_event_type(const char *line)
+{
+ size_t i;
+
+ /*
+ * This should have been a set or hashmap, but it's a selftest,
+ * so... KISS.
+ */
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i])))
+ return i;
+ }
+ return -1;
+}
+
+static bool event_has_flags(enum trace_events event)
+{
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED:
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int tracer_ip_split(int family, char *src, char **addr, char **port)
+{
+ char *p;
+
+ if (family == AF_INET) {
+ /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */
+ *addr = src;
+ p = strchr(src, ':');
+ if (!p) {
+ test_print("Couldn't parse trace event addr:port %s", src);
+ return -EINVAL;
+ }
+ *p++ = '\0';
+ *port = p;
+ return 0;
+ }
+ if (family != AF_INET6)
+ return -EAFNOSUPPORT;
+
+ /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */
+ *addr = strchr(src, '[');
+ p = strchr(src, ']');
+
+ if (!p || !*addr) {
+ test_print("Couldn't parse trace event [addr]:port %s", src);
+ return -EINVAL;
+ }
+
+ *addr = *addr + 1; /* '[' */
+ *p++ = '\0'; /* ']' */
+ if (*p != ':') {
+ test_print("Couldn't parse trace event :port %s", p);
+ return -EINVAL;
+ }
+ *p++ = '\0'; /* ':' */
+ *port = p;
+ return 0;
+}
+
+static int tracer_scan_address(int family, char *src,
+ union tcp_addr *dst, unsigned int *port)
+{
+ char *addr, *port_str;
+ int ret;
+
+ ret = tracer_ip_split(family, src, &addr, &port_str);
+ if (ret)
+ return ret;
+
+ if (inet_pton(family, addr, dst) != 1) {
+ test_print("Couldn't parse trace event addr %s", addr);
+ return -EINVAL;
+ }
+ errno = 0;
+ *port = (unsigned int)strtoul(port_str, NULL, 10);
+ if (errno != 0) {
+ test_print("Couldn't parse trace event port %s", port_str);
+ return -errno;
+ }
+ return 0;
+}
+
+static int tracer_scan_event(const char *line, enum trace_events event,
+ struct trace_point *out)
+{
+ char *src = NULL, *dst = NULL, *family = NULL;
+ char fin, syn, rst, psh, ack;
+ int nr_matched, ret = 0;
+ uint64_t netns_cookie;
+
+ switch (event) {
+ case TCP_HASH_BAD_HEADER:
+ case TCP_HASH_MD5_REQUIRED:
+ case TCP_HASH_MD5_UNEXPECTED:
+ case TCP_HASH_MD5_MISMATCH:
+ case TCP_HASH_AO_REQUIRED: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack);
+ if (nr_matched != 10)
+ test_print("Couldn't parse trace event, matched = %d/10",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_HANDSHAKE_FAILURE:
+ case TCP_AO_WRONG_MACLEN:
+ case TCP_AO_MISMATCH:
+ case TCP_AO_KEY_NOT_FOUND:
+ case TCP_AO_RNEXT_REQUEST: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->L3index,
+ &fin, &syn, &rst, &psh, &ack,
+ &out->keyid, &out->rnext, &out->maclen);
+ if (nr_matched != 13)
+ test_print("Couldn't parse trace event, matched = %d/13",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SYNACK_NO_KEY: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->keyid, &out->rnext);
+ if (nr_matched != 6)
+ test_print("Couldn't parse trace event, matched = %d/6",
+ nr_matched);
+ break;
+ }
+ case TCP_AO_SND_SNE_UPDATE:
+ case TCP_AO_RCV_SNE_UPDATE: {
+ nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u",
+ &netns_cookie, &family,
+ &src, &dst, &out->sne);
+ if (nr_matched != 5)
+ test_print("Couldn't parse trace event, matched = %d/5",
+ nr_matched);
+ break;
+ }
+ default:
+ return -1;
+ }
+
+ if (family) {
+ if (!strcmp(family, "AF_INET")) {
+ out->family = AF_INET;
+ } else if (!strcmp(family, "AF_INET6")) {
+ out->family = AF_INET6;
+ } else {
+ test_print("Couldn't parse trace event family %s", family);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (event_has_flags(event)) {
+ out->fin = (fin == 'F');
+ out->syn = (syn == 'S');
+ out->rst = (rst == 'R');
+ out->psh = (psh == 'P');
+ out->ack = (ack == '.');
+
+ if ((fin != 'F' && fin != ' ') ||
+ (syn != 'S' && syn != ' ') ||
+ (rst != 'R' && rst != ' ') ||
+ (psh != 'P' && psh != ' ') ||
+ (ack != '.' && ack != ' ')) {
+ test_print("Couldn't parse trace event flags %c%c%c%c%c",
+ fin, syn, rst, psh, ack);
+ ret = -EINVAL;
+ goto out_free;
+ }
+ }
+
+ if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) {
+ ret = -EINVAL;
+ goto out_free;
+ }
+
+ if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) {
+ test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64,
+ netns_cookie, ns_cookie1, ns_cookie2);
+ ret = -EINVAL;
+ }
+
+out_free:
+ free(src);
+ free(dst);
+ free(family);
+ return ret;
+}
+
+static enum ftracer_op aolib_tracer_process_event(const char *line)
+{
+ int event_type = check_event_type(line);
+ struct trace_point tmp = {};
+
+ if (event_type < 0)
+ return FTRACER_LINE_PRESERVE;
+
+ if (tracer_scan_event(line, event_type, &tmp))
+ return FTRACER_LINE_PRESERVE;
+
+ return lookup_expected_event(event_type, &tmp) ?
+ FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE;
+}
+
+static void dump_trace_event(struct expected_trace_point *e)
+{
+ char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN];
+
+ if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN))
+ test_error("inet_ntop()");
+ test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
+ trace_event_names[e->type],
+ src, e->src_port, dst, e->dst_port, e->L3index,
+ (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
+ (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
+ (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
+ (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
+ (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+ e->keyid, e->rnext, e->maclen, e->sne, e->matched);
+}
+
+static void print_match_stats(bool unexpected_events)
+{
+ size_t matches_per_type[__MAX_TRACE_EVENTS] = {};
+ bool expected_but_none = false;
+ size_t i, total_matched = 0;
+ char *stat_line = NULL;
+
+ for (i = 0; i < exp_tps_nr; i++) {
+ struct expected_trace_point *e = &exp_tps[i];
+
+ total_matched += e->matched;
+ matches_per_type[e->type] += e->matched;
+ if (!e->matched)
+ expected_but_none = true;
+ }
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ if (!matches_per_type[i])
+ continue;
+ stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "",
+ trace_event_names[i],
+ matches_per_type[i]);
+ if (!stat_line)
+ test_error("test_sprintf()");
+ }
+
+ if (unexpected_events || expected_but_none) {
+ for (i = 0; i < exp_tps_nr; i++)
+ dump_trace_event(&exp_tps[i]);
+ }
+
+ if (unexpected_events)
+ return;
+
+ if (expected_but_none)
+ test_fail("Some trace events were expected, but didn't occur");
+ else if (total_matched)
+ test_ok("Trace events matched expectations: %zu %s",
+ total_matched, stat_line);
+ else
+ test_ok("No unexpected trace events during the test run");
+}
+
+#define dump_events(fmt, ...) \
+ __test_print(__test_msg, fmt, ##__VA_ARGS__)
+static void check_free_events(struct test_ftracer *tracer)
+{
+ const char **lines;
+ size_t nr;
+
+ if (!kernel_config_has(KCONFIG_FTRACE)) {
+ test_skip("kernel config doesn't have ftrace - no checks");
+ return;
+ }
+
+ nr = tracer_get_savedlines_nr(tracer);
+ lines = tracer_get_savedlines(tracer);
+ print_match_stats(!!nr);
+ if (!nr)
+ return;
+
+ errno = 0;
+ test_xfail("Trace events [%zu] were not expected:", nr);
+ while (nr)
+ dump_events("\t%s", lines[--nr]);
+}
+
+static int setup_tcp_trace_events(struct test_ftracer *tracer)
+{
+ char *filter;
+ size_t i;
+ int ret;
+
+ filter = test_sprintf("net_cookie == %zu || net_cookie == %zu",
+ ns_cookie1, ns_cookie2);
+ if (!filter)
+ return -ENOMEM;
+
+ for (i = 0; i < __MAX_TRACE_EVENTS; i++) {
+ char *event_name = test_sprintf("tcp/%s", trace_event_names[i]);
+
+ if (!event_name) {
+ ret = -ENOMEM;
+ break;
+ }
+ ret = setup_trace_event(tracer, event_name, filter);
+ free(event_name);
+ if (ret)
+ break;
+ }
+
+ free(filter);
+ return ret;
+}
+
+static void aolib_tracer_destroy(struct test_ftracer *tracer)
+{
+ check_free_events(tracer);
+ free_expected_events();
+}
+
+static bool aolib_tracer_expecting_more(void)
+{
+ size_t i;
+
+ for (i = 0; i < exp_tps_nr; i++)
+ if (!exp_tps[i].matched)
+ return true;
+ return false;
+}
+
+int setup_aolib_ftracer(void)
+{
+ struct test_ftracer *f;
+
+ f = create_ftracer("aolib", aolib_tracer_process_event,
+ aolib_tracer_destroy, aolib_tracer_expecting_more,
+ DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR);
+ if (!f)
+ return -1;
+
+ return setup_tcp_trace_events(f);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
new file mode 100644
index 000000000000..e4d0b173bc94
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c
@@ -0,0 +1,543 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static char ftrace_path[] = "ksft-ftrace-XXXXXX";
+static bool ftrace_mounted;
+uint64_t ns_cookie1, ns_cookie2;
+
+struct test_ftracer {
+ pthread_t tracer_thread;
+ int error;
+ char *instance_path;
+ FILE *trace_pipe;
+
+ enum ftracer_op (*process_line)(const char *line);
+ void (*destructor)(struct test_ftracer *tracer);
+ bool (*expecting_more)(void);
+
+ char **saved_lines;
+ size_t saved_lines_size;
+ size_t next_line_ind;
+
+ pthread_cond_t met_all_expected;
+ pthread_mutex_t met_all_expected_lock;
+
+ struct test_ftracer *next;
+};
+
+static struct test_ftracer *ftracers;
+static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static int mount_ftrace(void)
+{
+ if (!mkdtemp(ftrace_path))
+ test_error("Can't create temp dir");
+
+ if (mount("tracefs", ftrace_path, "tracefs", 0, "rw"))
+ return -errno;
+
+ ftrace_mounted = true;
+
+ return 0;
+}
+
+static void unmount_ftrace(void)
+{
+ if (ftrace_mounted && umount(ftrace_path))
+ test_print("Failed on cleanup: can't unmount tracefs: %m");
+
+ if (rmdir(ftrace_path))
+ test_error("Failed on cleanup: can't remove ftrace dir %s",
+ ftrace_path);
+}
+
+struct opts_list_t {
+ char *opt_name;
+ struct opts_list_t *next;
+};
+
+static int disable_trace_options(const char *ftrace_path)
+{
+ struct opts_list_t *opts_list = NULL;
+ char *fopts, *line = NULL;
+ size_t buf_len = 0;
+ ssize_t line_len;
+ int ret = 0;
+ FILE *opts;
+
+ fopts = test_sprintf("%s/%s", ftrace_path, "trace_options");
+ if (!fopts)
+ return -ENOMEM;
+
+ opts = fopen(fopts, "r+");
+ if (!opts) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ while ((line_len = getline(&line, &buf_len, opts)) != -1) {
+ struct opts_list_t *tmp;
+
+ if (!strncmp(line, "no", 2))
+ continue;
+
+ tmp = malloc(sizeof(*tmp));
+ if (!tmp) {
+ ret = -ENOMEM;
+ goto out_free_opts_list;
+ }
+ tmp->next = opts_list;
+ tmp->opt_name = test_sprintf("no%s", line);
+ if (!tmp->opt_name) {
+ ret = -ENOMEM;
+ free(tmp);
+ goto out_free_opts_list;
+ }
+ opts_list = tmp;
+ }
+
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ fseek(opts, 0, SEEK_SET);
+ fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts);
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+
+out_free_opts_list:
+ while (opts_list) {
+ struct opts_list_t *tmp = opts_list;
+
+ opts_list = opts_list->next;
+ free(tmp->opt_name);
+ free(tmp);
+ }
+ free(line);
+ fclose(opts);
+out_free:
+ free(fopts);
+ return ret;
+}
+
+static int setup_buffer_size(const char *ftrace_path, size_t sz)
+{
+ char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path);
+ int ret;
+
+ if (!fbuf_size)
+ return -1;
+
+ ret = test_echo(fbuf_size, 0, "%zu", sz);
+ free(fbuf_size);
+ return ret;
+}
+
+static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name)
+{
+ char *tmp;
+
+ tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name);
+ if (!tmp)
+ return -ENOMEM;
+
+ tracer->instance_path = mkdtemp(tmp);
+ if (!tracer->instance_path) {
+ free(tmp);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void remove_ftrace_instance(struct test_ftracer *tracer)
+{
+ if (rmdir(tracer->instance_path))
+ test_print("Failed on cleanup: can't remove ftrace instance %s",
+ tracer->instance_path);
+ free(tracer->instance_path);
+}
+
+static void tracer_cleanup(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ fclose(tracer->trace_pipe);
+}
+
+static void tracer_set_error(struct test_ftracer *tracer, int error)
+{
+ if (!tracer->error)
+ tracer->error = error;
+}
+
+const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer)
+{
+ return tracer->next_line_ind;
+}
+
+const char **tracer_get_savedlines(struct test_ftracer *tracer)
+{
+ return (const char **)tracer->saved_lines;
+}
+
+static void *tracer_thread_func(void *arg)
+{
+ struct test_ftracer *tracer = arg;
+
+ pthread_cleanup_push(tracer_cleanup, arg);
+
+ while (tracer->next_line_ind < tracer->saved_lines_size) {
+ char **lp = &tracer->saved_lines[tracer->next_line_ind];
+ enum ftracer_op op;
+ size_t buf_len = 0;
+ ssize_t line_len;
+
+ line_len = getline(lp, &buf_len, tracer->trace_pipe);
+ if (line_len == -1)
+ break;
+
+ pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ op = tracer->process_line(*lp);
+ pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+
+ if (tracer->expecting_more) {
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ if (!tracer->expecting_more())
+ pthread_cond_signal(&tracer->met_all_expected);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+ }
+
+ if (op == FTRACER_LINE_DISCARD)
+ continue;
+ if (op == FTRACER_EXIT)
+ break;
+ if (op != FTRACER_LINE_PRESERVE)
+ test_error("unexpected tracer command %d", op);
+
+ tracer->next_line_ind++;
+ buf_len = 0;
+ }
+ test_print("too many lines in ftracer buffer %zu, exiting tracer",
+ tracer->next_line_ind);
+
+ pthread_cleanup_pop(1);
+ return NULL;
+}
+
+static int setup_trace_thread(struct test_ftracer *tracer)
+{
+ int ret = 0;
+ char *path;
+
+ path = test_sprintf("%s/trace_pipe", tracer->instance_path);
+ if (!path)
+ return -ENOMEM;
+
+ tracer->trace_pipe = fopen(path, "r");
+ if (!tracer->trace_pipe) {
+ ret = -errno;
+ goto out_free;
+ }
+
+ if (pthread_create(&tracer->tracer_thread, NULL,
+ tracer_thread_func, (void *)tracer)) {
+ ret = -errno;
+ fclose(tracer->trace_pipe);
+ }
+
+out_free:
+ free(path);
+ return ret;
+}
+
+static void stop_trace_thread(struct test_ftracer *tracer)
+{
+ void *res;
+
+ if (pthread_cancel(tracer->tracer_thread)) {
+ test_print("Can't stop tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (pthread_join(tracer->tracer_thread, &res)) {
+ test_print("Can't join tracer pthread: %m");
+ tracer_set_error(tracer, -errno);
+ }
+ if (res != PTHREAD_CANCELED) {
+ test_print("Tracer thread wasn't canceled");
+ tracer_set_error(tracer, -errno);
+ }
+ if (tracer->error)
+ test_fail("tracer errored by %s", strerror(tracer->error));
+}
+
+static void final_wait_for_events(struct test_ftracer *tracer,
+ unsigned timeout_sec)
+{
+ struct timespec timeout;
+ struct timeval now;
+ int ret = 0;
+
+ if (!tracer->expecting_more)
+ return;
+
+ pthread_mutex_lock(&tracer->met_all_expected_lock);
+ gettimeofday(&now, NULL);
+ timeout.tv_sec = now.tv_sec + timeout_sec;
+ timeout.tv_nsec = now.tv_usec * 1000;
+
+ while (tracer->expecting_more() && ret != ETIMEDOUT)
+ ret = pthread_cond_timedwait(&tracer->met_all_expected,
+ &tracer->met_all_expected_lock, &timeout);
+ pthread_mutex_unlock(&tracer->met_all_expected_lock);
+}
+
+int setup_trace_event(struct test_ftracer *tracer,
+ const char *event, const char *filter)
+{
+ char *enable_path, *filter_path, *instance = tracer->instance_path;
+ int ret;
+
+ enable_path = test_sprintf("%s/events/%s/enable", instance, event);
+ if (!enable_path)
+ return -ENOMEM;
+
+ filter_path = test_sprintf("%s/events/%s/filter", instance, event);
+ if (!filter_path) {
+ ret = -ENOMEM;
+ goto out_free;
+ }
+
+ ret = test_echo(filter_path, 0, "%s", filter);
+ if (!ret)
+ ret = test_echo(enable_path, 0, "1");
+
+out_free:
+ free(filter_path);
+ free(enable_path);
+ return ret;
+}
+
+struct test_ftracer *create_ftracer(const char *name,
+ enum ftracer_op (*process_line)(const char *line),
+ void (*destructor)(struct test_ftracer *tracer),
+ bool (*expecting_more)(void),
+ size_t lines_buf_sz, size_t buffer_size_kb)
+{
+ struct test_ftracer *tracer;
+ int err;
+
+ /* XXX: separate __create_ftracer() helper and do here
+ * if (!kernel_config_has(KCONFIG_FTRACE))
+ * return NULL;
+ */
+
+ tracer = malloc(sizeof(*tracer));
+ if (!tracer) {
+ test_print("malloc()");
+ return NULL;
+ }
+
+ memset(tracer, 0, sizeof(*tracer));
+
+ err = setup_ftrace_instance(tracer, name);
+ if (err) {
+ test_print("setup_ftrace_instance(): %d", err);
+ goto err_free;
+ }
+
+ err = disable_trace_options(tracer->instance_path);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ err = setup_buffer_size(tracer->instance_path, buffer_size_kb);
+ if (err) {
+ test_print("disable_trace_options(): %d", err);
+ goto err_remove;
+ }
+
+ tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0]));
+ if (!tracer->saved_lines) {
+ test_print("calloc()");
+ goto err_remove;
+ }
+ tracer->saved_lines_size = lines_buf_sz;
+
+ tracer->process_line = process_line;
+ tracer->destructor = destructor;
+ tracer->expecting_more = expecting_more;
+
+ err = pthread_cond_init(&tracer->met_all_expected, NULL);
+ if (err) {
+ test_print("pthread_cond_init(): %d", err);
+ goto err_free_lines;
+ }
+
+ err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL);
+ if (err) {
+ test_print("pthread_mutex_init(): %d", err);
+ goto err_cond_destroy;
+ }
+
+ err = setup_trace_thread(tracer);
+ if (err) {
+ test_print("setup_trace_thread(): %d", err);
+ goto err_mutex_destroy;
+ }
+
+ pthread_mutex_lock(&ftracers_lock);
+ tracer->next = ftracers;
+ ftracers = tracer;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ return tracer;
+
+err_mutex_destroy:
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+err_cond_destroy:
+ pthread_cond_destroy(&tracer->met_all_expected);
+err_free_lines:
+ free(tracer->saved_lines);
+err_remove:
+ remove_ftrace_instance(tracer);
+err_free:
+ free(tracer);
+ return NULL;
+}
+
+static void __destroy_ftracer(struct test_ftracer *tracer)
+{
+ size_t i;
+
+ final_wait_for_events(tracer, TEST_TIMEOUT_SEC);
+ stop_trace_thread(tracer);
+ remove_ftrace_instance(tracer);
+ if (tracer->destructor)
+ tracer->destructor(tracer);
+ for (i = 0; i < tracer->saved_lines_size; i++)
+ free(tracer->saved_lines[i]);
+ pthread_cond_destroy(&tracer->met_all_expected);
+ pthread_mutex_destroy(&tracer->met_all_expected_lock);
+ free(tracer);
+}
+
+void destroy_ftracer(struct test_ftracer *tracer)
+{
+ pthread_mutex_lock(&ftracers_lock);
+ if (tracer == ftracers) {
+ ftracers = tracer->next;
+ } else {
+ struct test_ftracer *f = ftracers;
+
+ while (f->next != tracer) {
+ if (!f->next)
+ test_error("tracers list corruption or double free %p", tracer);
+ f = f->next;
+ }
+ f->next = tracer->next;
+ }
+ tracer->next = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+ __destroy_ftracer(tracer);
+}
+
+static void destroy_all_ftracers(void)
+{
+ struct test_ftracer *f;
+
+ pthread_mutex_lock(&ftracers_lock);
+ f = ftracers;
+ ftracers = NULL;
+ pthread_mutex_unlock(&ftracers_lock);
+
+ while (f) {
+ struct test_ftracer *n = f->next;
+
+ f->next = NULL;
+ __destroy_ftracer(f);
+ f = n;
+ }
+}
+
+static void test_unset_tracing(void)
+{
+ destroy_all_ftracers();
+ unmount_ftrace();
+}
+
+int test_setup_tracing(void)
+{
+ /*
+ * Just a basic protection - this should be called only once from
+ * lib/kconfig. Not thread safe, which is fine as it's early, before
+ * threads are created.
+ */
+ static int already_set;
+ int err;
+
+ if (already_set)
+ return -1;
+
+ /* Needs net-namespace cookies for filters */
+ if (ns_cookie1 == ns_cookie2) {
+ test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing",
+ ns_cookie1, ns_cookie2);
+ return -1;
+ }
+
+ already_set = 1;
+
+ test_add_destructor(test_unset_tracing);
+
+ err = mount_ftrace();
+ if (err) {
+ test_print("failed to mount_ftrace(): %d", err);
+ return err;
+ }
+
+ return setup_aolib_ftracer();
+}
+
+static int get_ns_cookie(int nsfd, uint64_t *out)
+{
+ int old_ns = switch_save_ns(nsfd);
+ socklen_t size = sizeof(*out);
+ int sk;
+
+ sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0) {
+ test_print("socket(): %m");
+ return -errno;
+ }
+
+ if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) {
+ test_print("getsockopt(SO_NETNS_COOKIE): %m");
+ close(sk);
+ return -errno;
+ }
+
+ close(sk);
+ switch_close_ns(old_ns);
+ return 0;
+}
+
+void test_init_ftrace(int nsfd1, int nsfd2)
+{
+ get_ns_cookie(nsfd1, &ns_cookie1);
+ get_ns_cookie(nsfd2, &ns_cookie2);
+ /* Populate kernel config state */
+ kernel_config_has(KCONFIG_FTRACE);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
index f279ffc3843b..9f1c175846f8 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -6,7 +6,7 @@
#include "aolib.h"
struct kconfig_t {
- int _errno; /* the returned error if not supported */
+ int _error; /* negative errno if not supported */
int (*check_kconfig)(int *error);
};
@@ -62,7 +62,7 @@ static int has_tcp_ao(int *err)
memcpy(&tmp.addr, &addr, sizeof(addr));
*err = 0;
if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT)
ret = -errno;
}
@@ -87,7 +87,7 @@ static int has_tcp_md5(int *err)
*/
*err = 0;
if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
- *err = errno;
+ *err = -errno;
if (errno != ENOPROTOOPT && errno == ENOMEM) {
test_print("setsockopt(TCP_MD5SIG_EXT): %m");
ret = -errno;
@@ -116,13 +116,21 @@ static int has_vrfs(int *err)
return ret;
}
+static int has_ftrace(int *err)
+{
+ *err = test_setup_tracing();
+ return 0;
+}
+
+#define KCONFIG_UNKNOWN 1
static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
- { -1, has_net_ns },
- { -1, has_veth },
- { -1, has_tcp_ao },
- { -1, has_tcp_md5 },
- { -1, has_vrfs },
+ { KCONFIG_UNKNOWN, has_net_ns },
+ { KCONFIG_UNKNOWN, has_veth },
+ { KCONFIG_UNKNOWN, has_tcp_ao },
+ { KCONFIG_UNKNOWN, has_tcp_md5 },
+ { KCONFIG_UNKNOWN, has_vrfs },
+ { KCONFIG_UNKNOWN, has_ftrace },
};
const char *tests_skip_reason[__KCONFIG_LAST__] = {
@@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = {
"Tests require TCP-AO support (CONFIG_TCP_AO)",
"setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
"VRFs are not supported (CONFIG_NET_VRF)",
+ "Ftrace points are not supported (CONFIG_TRACEPOINTS)",
};
bool kernel_config_has(enum test_needs_kconfig k)
@@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k)
bool ret;
pthread_mutex_lock(&kconfig_lock);
- if (kconfig[k]._errno == -1) {
- if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+ if (kconfig[k]._error == KCONFIG_UNKNOWN) {
+ if (kconfig[k].check_kconfig(&kconfig[k]._error))
test_error("Failed to initialize kconfig %u", k);
}
- ret = kconfig[k]._errno == 0;
+ ret = kconfig[k]._error == 0;
pthread_mutex_unlock(&kconfig_lock);
return ret;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index e408b9243b2c..a27cc03c9fbd 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -111,7 +111,7 @@ static void sig_int(int signo)
int open_netns(void)
{
- const char *netns_path = "/proc/self/ns/net";
+ const char *netns_path = "/proc/thread-self/ns/net";
int fd;
fd = open(netns_path, O_RDONLY);
@@ -142,6 +142,13 @@ int switch_save_ns(int new_ns)
return ret;
}
+void switch_close_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET))
+ test_error("setns()");
+ close(fd);
+}
+
static int nsfd_outside = -1;
static int nsfd_parent = -1;
static int nsfd_child = -1;
@@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix,
test_print("rand seed %u", (unsigned int)seed);
srand(seed);
-
ksft_print_header();
init_namespaces();
+ test_init_ftrace(nsfd_parent, nsfd_child);
if (add_veth(veth_name, nsfd_parent, nsfd_child))
test_error("Failed to add veth");
@@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void)
int old_ns = switch_save_ns(nsfd_child);
optmem_ns = !access(optmem_file, F_OK);
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
return !!optmem_ns;
}
@@ -317,7 +324,7 @@ size_t test_get_optmem(void)
test_error("can't read from %s", optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
return ret;
}
@@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old)
test_error("can't write %zu to %s", new, optmem_file);
fclose(foptmem);
if (!is_optmem_namespaced())
- switch_ns(old_ns);
+ switch_close_ns(old_ns);
}
static void test_revert_optmem(void)
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 15aeb0963058..0ffda966c677 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -379,7 +379,6 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
key_dump[0].nkeys = nr_keys;
key_dump[0].get_all = 1;
- key_dump[0].get_all = 1;
err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
key_dump, &key_dump_sz);
if (err) {
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
index 372daca525f5..bdf5522c9213 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/utils.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen)
}
}
+__printf(3, 4) int test_echo(const char *fname, bool append,
+ const char *fmt, ...)
+{
+ size_t len, written;
+ va_list vargs;
+ char *msg;
+ FILE *f;
+
+ f = fopen(fname, append ? "a" : "w");
+ if (!f)
+ return -errno;
+
+ va_start(vargs, fmt);
+ msg = test_snprintf(fmt, vargs);
+ va_end(vargs);
+ if (!msg) {
+ fclose(f);
+ return -1;
+ }
+ len = strlen(msg);
+ written = fwrite(msg, 1, len, f);
+ fclose(f);
+ free(msg);
+ return written == len ? 0 : -1;
+}
+
const struct sockaddr_in6 addr_any6 = {
.sin6_family = AF_INET6,
};
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index 8fdc808df325..ecc6f1e3a414 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -64,6 +64,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
else
test_ok("%s: server alive", tst_name);
}
+ synchronize_threads(); /* 3: counters checks */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
@@ -71,10 +72,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -82,7 +83,7 @@ static void try_server_run(const char *tst_name, unsigned int port,
* Before close() as that will send FIN and move the peer in TCP_CLOSE
* and that will prevent reading AO counters from the peer's socket.
*/
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
out:
close(sk);
}
@@ -176,6 +177,7 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
else
test_ok("%s: post-migrate connection is alive", tst_name);
}
+ synchronize_threads(); /* 3: counters checks */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
@@ -183,13 +185,13 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
- synchronize_threads(); /* 3: verified => closed */
+ synchronize_threads(); /* 4: verified => closed */
close(sk);
}
@@ -206,22 +208,36 @@ static void *client_fn(void *arg)
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snt_isn += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong send ISN", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_isn += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong receive ISN", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.snd_sne += 1;
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
+ -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
+ /* not expecting server => client mismatches as only snd sne is broken */
test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_BAD | TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
ao_img.rcv_sne += 1;
+ /* not expecting client => server mismatches as only rcv sne is broken */
+ trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
+ port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_GOOD | TEST_CNT_BAD);
@@ -231,6 +247,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(20, server_fn, client_fn);
+ test_init(21, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index a2fe88d35ac0..6364facaa63e 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -455,6 +455,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(14, server_fn, client_fn);
+ test_init(15, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index a5698b0a3718..3ecd2b58de6a 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -87,7 +87,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
close(sk);
return;
@@ -148,7 +148,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
netstat_free(ns_after);
close(sk);
if (after_aogood <= before_aogood) {
- test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64,
tst, after_aogood, before_aogood);
return;
}
@@ -163,17 +163,26 @@ static void *client_fn(void *arg)
setup_lo_intf("lo");
tcp_self_connect("self-connect(same keyids)", port++, false, false);
+
+ /* expecting rnext to change based on the first segment RNext != Current */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
tcp_self_connect("self-connect(different keyids)", port++, true, false);
tcp_self_connect("self-connect(restore)", port, false, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1);
+ /* intentionally on restore they are added to the socket in different order */
+ trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr,
+ port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1);
tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
- port += 2;
+ port += 2; /* restore test restores over different port */
return NULL;
}
int main(int argc, char *argv[])
{
- test_init(4, client_fn, NULL);
+ test_init(5, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index ad4e77d6823e..8901a6785dc8 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -116,7 +116,15 @@ static void *server_fn(void *arg)
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
client_new_port, &ao1);
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr,
+ this_ip_dest, test_server_port + 1, client_new_port, 1);
+ trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest,
+ this_ip_addr, client_new_port, test_server_port + 1, 1);
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
if (bytes > 0)
@@ -127,6 +135,7 @@ static void *server_fn(void *arg)
test_ok("server alive");
}
+ synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
@@ -134,15 +143,15 @@ static void *server_fn(void *arg)
test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
test_enable_repair(sk);
@@ -206,12 +215,13 @@ static void *client_fn(void *arg)
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
test_server_port + 1, &ao1);
- synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
test_fail("post-migrate verify failed");
else
test_ok("post-migrate connection alive");
+ synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
if (test_get_tcp_ao_counters(sk, &ao2))
test_error("test_get_tcp_ao_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
@@ -219,15 +229,15 @@ static void *client_fn(void *arg)
test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
if (after_good <= before_good) {
- test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
after_good, before_good);
} else {
- test_ok("TCPAOGood counter increased %zu => %zu",
+ test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64,
before_good, after_good);
}
after_bad = netstat_get_one("TCPAOBad", NULL);
if (after_bad)
- test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad);
else
test_ok("TCPAOBad counter didn't increase");
@@ -240,6 +250,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(7, server_fn, client_fn);
+ test_init(8, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
index 517930f9721b..084db4ecdff6 100644
--- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -30,8 +30,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
#define __cmp_ao(member) \
do { \
if (info->member != tmp.member) { \
- test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \
- tst, (size_t)info->member, (size_t)tmp.member); \
+ test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \
+ tst, (uint64_t)info->member, (uint64_t)tmp.member); \
return; \
} \
} while(0)
@@ -830,6 +830,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(120, client_fn, NULL);
+ test_init(121, client_fn, NULL);
return 0;
}
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index 6b59a652159f..f779e5892bc1 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -70,6 +70,7 @@ static void try_accept(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
err = test_wait_fd(lsk, timeout, 0);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (err == -ETIMEDOUT) {
if (!fault(TIMEOUT))
test_fail("timed out for accept()");
@@ -100,10 +101,10 @@ static void try_accept(const char *tst_name, unsigned int port,
after_cnt = netstat_get_one(cnt_name, NULL);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %zu <= %zu",
+ test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %zu => %zu",
+ test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
if (ao_addr)
@@ -283,6 +284,7 @@ static void try_connect(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (ret < 0) {
if (fault(KEYREJECT) && ret == -EKEYREJECTED)
test_ok("%s: connect() was prevented", tst_name);
@@ -451,6 +453,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ synchronize_threads(); /* connect()/accept() timeouts */
if (ret <= 0) {
test_error("%s: connect() returned %d", tst_name, ret);
goto out;
@@ -671,24 +674,38 @@ static void *client_fn(void *arg)
try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("no sign server: AO client", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("no sign server: MD5 client", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
try_connect("no sign server: no sign client", port++, NULL, 0,
@@ -696,25 +713,37 @@ static void *client_fn(void *arg)
try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
&addr_any, 0, 100, 100, 0, 0, 1, &client2);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &this_ip_addr);
+ trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest,
+ -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1);
try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
port++, NULL, 0, &addr_any, 0, 100, 100, 0,
FAULT_TIMEOUT, 1, &client3);
try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client3);
try_connect("AO+MD5 server: no sign client (unmatched)",
port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+ trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &client2);
+ trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr,
+ this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0);
try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
1, &this_ip_addr);
@@ -736,6 +765,6 @@ static void *client_fn(void *arg)
int main(int argc, char *argv[])
{
- test_init(72, server_fn, client_fn);
+ test_init(73, server_fn, client_fn);
return 0;
}
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index ec60a16c9307..d626f22f9550 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
}
}
- if (batch > 1)
+ if (batch > 1) {
fprintf(stderr, "batched %d timestamps\n", batch);
+ } else if (!batch) {
+ fprintf(stderr, "Failed to report timestamps\n");
+ test_failed = true;
+ }
}
static int recv_errmsg(int fd)
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 11a1ebda564f..d5ffd8c9172e 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,8 +7,6 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
-
# set global exit status, but never reset nonzero one.
check_err()
{
@@ -38,7 +36,7 @@ cfg_veth() {
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
+ ip netns exec "${PEER_NS}" ethtool -K veth1 gro on
}
run_one() {
@@ -46,17 +44,19 @@ run_one() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+ local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -73,6 +73,7 @@ run_one_nat() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
if [[ ${tx_args} = *-4* ]]; then
ipt_cmd=iptables
@@ -93,16 +94,17 @@ run_one_nat() {
# ... so that GRO will match the UDP_GRO enabled socket, but packets
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
- pid=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \
- echo "ok" || \
- echo "failed"&
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- kill -INT $pid
- wait $(jobs -p)
+ check_err $?
+ kill -INT ${PID1}
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -111,20 +113,26 @@ run_one_2sock() {
local -r all="$@"
local -r tx_args=${all%rx*}
local -r rx_args=${all#*rx}
+ local ret=0
cfg_veth
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \
- echo "ok" || \
- echo "failed" &
+ local PID1=$!
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+ local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
+ check_err $?
wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
- ret=$?
- wait $(jobs -p)
+ check_err $?
+ wait ${PID1}
+ check_err $?
+ wait ${PID2}
+ check_err $?
+ [ "$ret" -eq 0 ] && echo "ok" || echo "failed"
return $ret
}
@@ -196,11 +204,6 @@ run_all() {
return $ret
}
-if [ ! -f ${BPF_FILE} ]; then
- echo "Missing ${BPF_FILE}. Run 'make' first"
- exit -1
-fi
-
if [[ $# -eq 0 ]]; then
run_all
elif [[ $1 == "__subprocess" ]]; then
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3e74cfa1a2bf..3f2fca02fec5 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -67,6 +67,7 @@ struct testcase {
int gso_len; /* mss after applying gso */
int r_num_mss; /* recv(): number of calls of full mss */
int r_len_last; /* recv(): size of last non-mss dgram, if any */
+ bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */
};
const struct in6_addr addr6 = {
@@ -77,6 +78,8 @@ const struct in_addr addr4 = {
__constant_htonl(0x0a000001), /* 10.0.0.1 */
};
+static const char ipv6_hopopts_pad1[8] = { 0 };
+
struct testcase testcases_v4[] = {
{
/* no GSO: send a single byte */
@@ -256,6 +259,13 @@ struct testcase testcases_v6[] = {
.r_num_mss = 2,
},
{
+ /* send 2 1B segments with extension headers */
+ .tlen = 2,
+ .gso_len = 1,
+ .r_num_mss = 2,
+ .v6_ext_hdr = true,
+ },
+ {
/* send 2B + 2B + 1B segments */
.tlen = 5,
.gso_len = 2,
@@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr,
int i, ret, val, mss;
bool sent;
- fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+ fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n",
addr->sa_family == AF_INET ? 4 : 6,
test->tlen, test->gso_len,
+ test->v6_ext_hdr ? "ext-hdr " : "",
test->tfail ? "(fail)" : "");
+ if (test->v6_ext_hdr) {
+ if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS,
+ ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1)))
+ error(1, errno, "setsockopt ipv6 hopopts");
+ }
+
val = test->gso_len;
if (cfg_do_setsockopt) {
if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
@@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr,
error(1, 0, "send succeeded while expecting failure");
if (!sent && !test->tfail)
error(1, 0, "send failed while expecting success");
+
+ if (test->v6_ext_hdr) {
+ if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0))
+ error(1, errno, "setsockopt ipv6 hopopts clear");
+ }
+
if (!sent)
return;
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
index f52aa5f7da52..3e751234ccfe 100755
--- a/tools/testing/selftests/net/unicast_extensions.sh
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -30,14 +30,7 @@
source lib.sh
-# nettest can be run from PATH or from same directory as this selftest
-if ! which nettest >/dev/null; then
- PATH=$PWD:$PATH
- if ! which nettest >/dev/null; then
- echo "'nettest' command not found; skipping tests"
- exit $ksft_skip
- fi
-fi
+check_gen_prog "nettest"
result=0
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 152171fb1fc8..e9c2f71da207 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -59,7 +59,6 @@
# while it is forwarded between different vrfs.
source lib.sh
-PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
VERBOSE=0
PAUSE_ON_FAIL=no
DEFAULT_TTYPE=sym
@@ -636,6 +635,8 @@ EOF
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+check_gen_prog "nettest"
+
TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local
ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym"
TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local
diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
new file mode 100755
index 000000000000..2fab29d3cb91
--- /dev/null
+++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+source lib.sh
+
+timeout=4m
+ret=0
+tmp=$(mktemp)
+cleanup() {
+ cleanup_all_ns
+ rm -f "$tmp"
+}
+
+trap cleanup EXIT
+
+maxpolicies=100000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000
+
+do_dummies4() {
+ local dir="$1"
+ local max="$2"
+
+ local policies
+ local pfx
+ pfx=30
+ policies=0
+
+ ip netns exec "$ns" ip xfrm policy flush
+
+ for i in $(seq 1 100);do
+ local s
+ local d
+ for j in $(seq 1 255);do
+ s=$((i+0))
+ d=$((i+100))
+
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block
+ done
+ for a in $(seq 1 8 255); do
+ policies=$((policies+1))
+ [ "$policies" -gt "$max" ] && return
+ echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block
+ done
+ done
+ done
+}
+
+setup_ns ns
+
+do_bench()
+{
+ local max="$1"
+
+ start=$(date +%s%3N)
+ do_dummies4 "out" "$max" > "$tmp"
+ if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then
+ echo "WARNING: policy insertion cancelled after $timeout"
+ ret=1
+ fi
+ stop=$(date +%s%3N)
+
+ result=$((stop-start))
+
+ policies=$(wc -l < "$tmp")
+ printf "Inserted %-06s policies in $result ms\n" $policies
+
+ have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l)
+ if [ "$have" -ne "$policies" ]; then
+ echo "WARNING: mismatch, have $have policies, expected $policies"
+ ret=1
+ fi
+}
+
+p=100
+while [ $p -le "$maxpolicies" ]; do
+ do_bench "$p"
+ p="${p}0"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
index 7f7d3eb8b9c9..f9ccae50349b 100644
--- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -7,8 +7,6 @@
TEST(infinite_rlimit)
{
EXPECT_EQ(BOTTOM_UP, memory_layout());
-
- TEST_MMAPS;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
index 2ba3ec990006..3f53b6ecc326 100644
--- a/tools/testing/selftests/riscv/mm/mmap_default.c
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -7,8 +7,6 @@
TEST(default_rlimit)
{
EXPECT_EQ(TOP_DOWN, memory_layout());
-
- TEST_MMAPS;
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
index 3b29ca3bb3d4..75918d15919f 100644
--- a/tools/testing/selftests/riscv/mm/mmap_test.h
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -10,76 +10,9 @@
#define TOP_DOWN 0
#define BOTTOM_UP 1
-#if __riscv_xlen == 64
-uint64_t random_addresses[] = {
- 0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd,
- 0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7,
- 0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02,
- 0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f,
- 0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1,
- 0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827,
- 0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9,
- 0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753,
- 0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a,
- 0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32,
- 0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3,
- 0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046,
- 0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31,
- 0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012,
- 0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379,
- 0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30,
- 0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6,
- 0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f,
- 0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82,
- 0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4,
-};
-#else
-uint32_t random_addresses[] = {
- 0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213,
- 0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1,
- 0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca,
- 0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646,
- 0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b,
- 0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b,
- 0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31,
- 0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e,
- 0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4,
- 0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d,
-};
-#endif
-
-// Only works on 64 bit
-#if __riscv_xlen == 64
#define PROT (PROT_READ | PROT_WRITE)
#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
-/* mmap must return a value that doesn't use more bits than the hint address. */
-static inline unsigned long get_max_value(unsigned long input)
-{
- unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 -
- __builtin_clzl(input))));
-
- return max_bit + (max_bit - 1);
-}
-
-#define TEST_MMAPS \
- ({ \
- void *mmap_addr; \
- for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) { \
- mmap_addr = mmap((void *)random_addresses[i], \
- 5 * sizeof(int), PROT, FLAGS, 0, 0); \
- EXPECT_NE(MAP_FAILED, mmap_addr); \
- EXPECT_GE((void *)get_max_value(random_addresses[i]), \
- mmap_addr); \
- mmap_addr = mmap((void *)random_addresses[i], \
- 5 * sizeof(int), PROT, FLAGS, 0, 0); \
- EXPECT_NE(MAP_FAILED, mmap_addr); \
- EXPECT_GE((void *)get_max_value(random_addresses[i]), \
- mmap_addr); \
- } \
- })
-#endif /* __riscv_xlen == 64 */
-
static inline int memory_layout(void)
{
void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0);
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index e3f97f90d8db..8c3a73461475 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -60,7 +60,9 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index ee349187636f..4f255cec0c22 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -143,7 +143,6 @@ class PluginMgr:
except Exception as ee:
print('exception {} in call to pre_case for {} plugin'.
format(ee, pgn_inst.__class__))
- print('test_ordinal is {}'.format(test_ordinal))
print('testid is {}'.format(caseinfo['id']))
raise
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 07c81c0093c0..16bd49492efa 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -6,10 +6,13 @@
*
* Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
*/
-
+#define _GNU_SOURCE
#include <sys/time.h>
+#include <sys/types.h>
#include <stdio.h>
#include <signal.h>
+#include <stdint.h>
+#include <string.h>
#include <unistd.h>
#include <time.h>
#include <pthread.h>
@@ -18,6 +21,21 @@
#define DELAY 2
#define USECS_PER_SEC 1000000
+#define NSECS_PER_SEC 1000000000
+
+static void __fatal_error(const char *test, const char *name, const char *what)
+{
+ char buf[64];
+
+ strerror_r(errno, buf, sizeof(buf));
+
+ if (name && strlen(name))
+ ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf);
+ else
+ ksft_exit_fail_msg("%s %s %s\n", test, what, buf);
+}
+
+#define fatal_error(name, what) __fatal_error(__func__, name, what)
static volatile int done;
@@ -74,24 +92,13 @@ static int check_diff(struct timeval start, struct timeval end)
return 0;
}
-static int check_itimer(int which)
+static void check_itimer(int which, const char *name)
{
- const char *name;
- int err;
struct timeval start, end;
struct itimerval val = {
.it_value.tv_sec = DELAY,
};
- if (which == ITIMER_VIRTUAL)
- name = "ITIMER_VIRTUAL";
- else if (which == ITIMER_PROF)
- name = "ITIMER_PROF";
- else if (which == ITIMER_REAL)
- name = "ITIMER_REAL";
- else
- return -1;
-
done = 0;
if (which == ITIMER_VIRTUAL)
@@ -101,17 +108,11 @@ static int check_itimer(int which)
else if (which == ITIMER_REAL)
signal(SIGALRM, sig_handler);
- err = gettimeofday(&start, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&start, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
- err = setitimer(which, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ if (setitimer(which, &val, NULL) < 0)
+ fatal_error(name, "setitimer()");
if (which == ITIMER_VIRTUAL)
user_loop();
@@ -120,68 +121,41 @@ static int check_itimer(int which)
else if (which == ITIMER_REAL)
idle_loop();
- err = gettimeofday(&end, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&end, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
-
- return 0;
}
-static int check_timer_create(int which)
+static void check_timer_create(int which, const char *name)
{
- const char *type;
- int err;
- timer_t id;
struct timeval start, end;
struct itimerspec val = {
.it_value.tv_sec = DELAY,
};
-
- if (which == CLOCK_THREAD_CPUTIME_ID) {
- type = "thread";
- } else if (which == CLOCK_PROCESS_CPUTIME_ID) {
- type = "process";
- } else {
- ksft_print_msg("Unknown timer_create() type %d\n", which);
- return -1;
- }
+ timer_t id;
done = 0;
- err = timer_create(which, NULL, &id);
- if (err < 0) {
- ksft_perror("Can't create timer");
- return -1;
- }
- signal(SIGALRM, sig_handler);
- err = gettimeofday(&start, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (timer_create(which, NULL, &id) < 0)
+ fatal_error(name, "timer_create()");
- err = timer_settime(id, 0, &val, NULL);
- if (err < 0) {
- ksft_perror("Can't set timer");
- return -1;
- }
+ if (signal(SIGALRM, sig_handler) == SIG_ERR)
+ fatal_error(name, "signal()");
+
+ if (gettimeofday(&start, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
+
+ if (timer_settime(id, 0, &val, NULL) < 0)
+ fatal_error(name, "timer_settime()");
user_loop();
- err = gettimeofday(&end, NULL);
- if (err < 0) {
- ksft_perror("Can't call gettimeofday()");
- return -1;
- }
+ if (gettimeofday(&end, NULL) < 0)
+ fatal_error(name, "gettimeofday()");
ksft_test_result(check_diff(start, end) == 0,
- "timer_create() per %s\n", type);
-
- return 0;
+ "timer_create() per %s\n", name);
}
static pthread_t ctd_thread;
@@ -209,15 +183,14 @@ static void *ctd_thread_func(void *arg)
ctd_count = 100;
if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id))
- return "Can't create timer\n";
+ fatal_error(NULL, "timer_create()");
if (timer_settime(id, 0, &val, NULL))
- return "Can't set timer\n";
-
+ fatal_error(NULL, "timer_settime()");
while (ctd_count > 0 && !ctd_failed)
;
if (timer_delete(id))
- return "Can't delete timer\n";
+ fatal_error(NULL, "timer_delete()");
return NULL;
}
@@ -225,19 +198,16 @@ static void *ctd_thread_func(void *arg)
/*
* Test that only the running thread receives the timer signal.
*/
-static int check_timer_distribution(void)
+static void check_timer_distribution(void)
{
- const char *errmsg;
+ if (signal(SIGALRM, ctd_sighandler) == SIG_ERR)
+ fatal_error(NULL, "signal()");
- signal(SIGALRM, ctd_sighandler);
-
- errmsg = "Can't create thread\n";
if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL))
- goto err;
+ fatal_error(NULL, "pthread_create()");
- errmsg = "Can't join thread\n";
- if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg)
- goto err;
+ if (pthread_join(ctd_thread, NULL))
+ fatal_error(NULL, "pthread_join()");
if (!ctd_failed)
ksft_test_result_pass("check signal distribution\n");
@@ -245,31 +215,399 @@ static int check_timer_distribution(void)
ksft_test_result_fail("check signal distribution\n");
else
ksft_test_result_skip("check signal distribution (old kernel)\n");
- return 0;
-err:
- ksft_print_msg("%s", errmsg);
- return -1;
+}
+
+struct tmrsig {
+ int signals;
+ int overruns;
+};
+
+static void siginfo_handler(int sig, siginfo_t *si, void *uc)
+{
+ struct tmrsig *tsig = si ? si->si_ptr : NULL;
+
+ if (tsig) {
+ tsig->signals++;
+ tsig->overruns += si->si_overrun;
+ }
+}
+
+static void *ignore_thread(void *arg)
+{
+ unsigned int *tid = arg;
+ sigset_t set;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ *tid = gettid();
+ sleep(100);
+
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+ return NULL;
+}
+
+static void check_sig_ign(int thread)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ unsigned int tid = 0;
+ struct sigaction sa;
+ struct sigevent sev;
+ pthread_t pthread;
+ timer_t timerid;
+ sigset_t set;
+
+ if (thread) {
+ if (pthread_create(&pthread, NULL, ignore_thread, &tid))
+ fatal_error(NULL, "pthread_create()");
+ sleep(1);
+ }
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (thread) {
+ sev.sigev_notify = SIGEV_THREAD_ID;
+ sev._sigev_un._tid = tid;
+ }
+
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ timer_settime(timerid, 0, &its, NULL);
+
+ sleep(1);
+
+ /* Set the signal to be ignored */
+ if (signal(SIGUSR1, SIG_IGN) == SIG_ERR)
+ fatal_error(NULL, "signal(SIG_IGN)");
+
+ sleep(1);
+
+ if (thread) {
+ /* Stop the thread first. No signal should be delivered to it */
+ if (pthread_cancel(pthread))
+ fatal_error(NULL, "pthread_cancel()");
+ if (pthread_join(pthread, NULL))
+ fatal_error(NULL, "pthread_join()");
+ }
+
+ /* Restore the handler */
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ sleep(1);
+
+ /* Unblock it, which should deliver the signal in the !thread case*/
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ if (!thread) {
+ ksft_test_result(tsig.signals == 1 && tsig.overruns == 29,
+ "check_sig_ign SIGEV_SIGNAL\n");
+ } else {
+ ksft_test_result(tsig.signals == 0 && tsig.overruns == 0,
+ "check_sig_ign SIGEV_THREAD_ID\n");
+ }
+}
+
+static void check_rearm(void)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ sleep(1);
+
+ /* Reprogram the timer to single shot */
+ its.it_value.tv_sec = 10;
+ its.it_value.tv_nsec = 0;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 0;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ /* Unblock it, which should not deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ ksft_test_result(!tsig.signals, "check_rearm\n");
+}
+
+static void check_delete(void)
+{
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(NULL, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(CLOCK_MONOTONIC, &sev, &timerid))
+ fatal_error(NULL, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(NULL, "timer_settime()");
+
+ sleep(1);
+
+ if (timer_delete(timerid))
+ fatal_error(NULL, "timer_delete()");
+
+ /* Unblock it, which should not deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)");
+
+ ksft_test_result(!tsig.signals, "check_delete\n");
+}
+
+static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2)
+{
+ int64_t diff;
+
+ diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec);
+ diff += ((int) t1.tv_nsec - (int) t2.tv_nsec);
+ return diff;
+}
+
+static void check_sigev_none(int which, const char *name)
+{
+ struct timespec start, now;
+ struct itimerspec its;
+ struct sigevent sev;
+ timer_t timerid;
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_NONE;
+
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ timer_settime(timerid, 0, &its, NULL);
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ if (timer_gettime(timerid, &its))
+ fatal_error(name, "timer_gettime()");
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec,
+ "check_sigev_none %s\n", name);
+}
+
+static void check_gettime(int which, const char *name)
+{
+ struct itimerspec its, prev;
+ struct timespec start, now;
+ struct sigevent sev;
+ timer_t timerid;
+ int wraps = 0;
+ sigset_t set;
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(name, "timer_settime()");
+
+ if (timer_gettime(timerid, &prev))
+ fatal_error(name, "timer_gettime()");
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ if (timer_gettime(timerid, &its))
+ fatal_error(name, "timer_gettime()");
+ if (its.it_value.tv_nsec > prev.it_value.tv_nsec)
+ wraps++;
+ prev = its;
+
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(wraps > 1, "check_gettime %s\n", name);
+}
+
+static void check_overrun(int which, const char *name)
+{
+ struct timespec start, now;
+ struct tmrsig tsig = { };
+ struct itimerspec its;
+ struct sigaction sa;
+ struct sigevent sev;
+ timer_t timerid;
+ sigset_t set;
+
+ sa.sa_flags = SA_SIGINFO;
+ sa.sa_sigaction = siginfo_handler;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGUSR1, &sa, NULL))
+ fatal_error(name, "sigaction()");
+
+ /* Block the signal */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ if (sigprocmask(SIG_BLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_BLOCK)");
+
+ memset(&sev, 0, sizeof(sev));
+ sev.sigev_notify = SIGEV_SIGNAL;
+ sev.sigev_signo = SIGUSR1;
+ sev.sigev_value.sival_ptr = &tsig;
+ if (timer_create(which, &sev, &timerid))
+ fatal_error(name, "timer_create()");
+
+ /* Start the timer to expire in 100ms and 100ms intervals */
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 100000000;
+ its.it_interval.tv_sec = 0;
+ its.it_interval.tv_nsec = 100000000;
+ if (timer_settime(timerid, 0, &its, NULL))
+ fatal_error(name, "timer_settime()");
+
+ if (clock_gettime(which, &start))
+ fatal_error(name, "clock_gettime()");
+
+ do {
+ if (clock_gettime(which, &now))
+ fatal_error(name, "clock_gettime()");
+ } while (calcdiff_ns(now, start) < NSECS_PER_SEC);
+
+ /* Unblock it, which should deliver a signal */
+ if (sigprocmask(SIG_UNBLOCK, &set, NULL))
+ fatal_error(name, "sigprocmask(SIG_UNBLOCK)");
+
+ if (timer_delete(timerid))
+ fatal_error(name, "timer_delete()");
+
+ ksft_test_result(tsig.signals == 1 && tsig.overruns == 9,
+ "check_overrun %s\n", name);
}
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(6);
+ ksft_set_plan(18);
ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
ksft_print_msg("based timers if other threads run on the CPU...\n");
- if (check_itimer(ITIMER_VIRTUAL) < 0)
- ksft_exit_fail();
-
- if (check_itimer(ITIMER_PROF) < 0)
- ksft_exit_fail();
-
- if (check_itimer(ITIMER_REAL) < 0)
- ksft_exit_fail();
-
- if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
- ksft_exit_fail();
+ check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL");
+ check_itimer(ITIMER_PROF, "ITIMER_PROF");
+ check_itimer(ITIMER_REAL, "ITIMER_REAL");
+ check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
/*
* It's unfortunately hard to reliably test a timer expiration
@@ -280,11 +618,21 @@ int main(int argc, char **argv)
* to ensure true parallelism. So test only one thread until we
* find a better solution.
*/
- if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
- ksft_exit_fail();
-
- if (check_timer_distribution() < 0)
- ksft_exit_fail();
+ check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_timer_distribution();
+
+ check_sig_ign(0);
+ check_sig_ign(1);
+ check_rearm();
+ check_delete();
+ check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
+ check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC");
+ check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID");
+ check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID");
ksft_finished();
}
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5c8757a25998..d51249f14e2f 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -77,7 +77,7 @@ all_32: $(BINARIES_32)
all_64: $(BINARIES_64)
-EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
+EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso
$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm
diff --git a/tools/testing/selftests/x86/srso.c b/tools/testing/selftests/x86/srso.c
new file mode 100644
index 000000000000..394ec8bdeb00
--- /dev/null
+++ b/tools/testing/selftests/x86/srso.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/perf_event.h>
+#include <cpuid.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+int main(void)
+{
+ struct perf_event_attr ret_attr, mret_attr;
+ long long count_rets, count_rets_mispred;
+ int rrets_fd, mrrets_fd;
+ unsigned int cpuid1_eax, b, c, d;
+
+ __cpuid(1, cpuid1_eax, b, c, d);
+
+ if (cpuid1_eax < 0x00800f00 ||
+ cpuid1_eax > 0x00afffff) {
+ fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax);
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&ret_attr, 0, sizeof(struct perf_event_attr));
+ memset(&mret_attr, 0, sizeof(struct perf_event_attr));
+
+ ret_attr.type = mret_attr.type = PERF_TYPE_RAW;
+ ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr);
+ ret_attr.config = 0xc8;
+ mret_attr.config = 0xc9;
+ ret_attr.disabled = mret_attr.disabled = 1;
+ ret_attr.exclude_user = mret_attr.exclude_user = 1;
+ ret_attr.exclude_hv = mret_attr.exclude_hv = 1;
+
+ rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0);
+ if (rrets_fd == -1) {
+ perror("opening retired RETs fd");
+ exit(EXIT_FAILURE);
+ }
+
+ mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0);
+ if (mrrets_fd == -1) {
+ perror("opening retired mispredicted RETs fd");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0);
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0);
+
+ printf("Sleeping for 10 seconds\n");
+ sleep(10);
+
+ ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+ ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0);
+
+ read(rrets_fd, &count_rets, sizeof(long long));
+ read(mrrets_fd, &count_rets_mispred, sizeof(long long));
+
+ printf("RETs: (%lld retired <-> %lld mispredicted)\n",
+ count_rets, count_rets_mispred);
+ printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n");
+
+ return 0;
+}
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 554b290fefdc..a3d448a075e3 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po
}
/* Connect to <cid, port> and return the file descriptor. */
-static int vsock_connect(unsigned int cid, unsigned int port, int type)
+int vsock_connect(unsigned int cid, unsigned int port, int type)
{
union {
struct sockaddr sa;
@@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type)
/* Listen on <cid, port> and return the first incoming connection. The remote
* address is stored to clientaddrp. clientaddrp may be NULL.
*/
-static int vsock_accept(unsigned int cid, unsigned int port,
- struct sockaddr_vm *clientaddrp, int type)
+int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type)
{
union {
struct sockaddr sa;
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index e95e62485959..fff22d4a14c0 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -39,6 +39,9 @@ struct test_case {
void init_signals(void);
unsigned int parse_cid(const char *str);
unsigned int parse_port(const char *str);
+int vsock_connect(unsigned int cid, unsigned int port, int type);
+int vsock_accept(unsigned int cid, unsigned int port,
+ struct sockaddr_vm *clientaddrp, int type);
int vsock_stream_connect(unsigned int cid, unsigned int port);
int vsock_bind_connect(unsigned int cid, unsigned int port,
unsigned int bind_port, int type);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index f851f8961247..8d38dbf8f41f 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -20,6 +20,8 @@
#include <sys/mman.h>
#include <poll.h>
#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/sockios.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
@@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts)
}
}
+#define MSG_BUF_IOCTL_LEN 64
+static void test_unsent_bytes_server(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int client_fd;
+
+ client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type);
+ if (client_fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_writeln("RECEIVED");
+
+ close(client_fd);
+}
+
+static void test_unsent_bytes_client(const struct test_opts *opts, int type)
+{
+ unsigned char buf[MSG_BUF_IOCTL_LEN];
+ int ret, fd, sock_bytes_unsent;
+
+ fd = vsock_connect(opts->peer_cid, opts->peer_port, type);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ for (int i = 0; i < sizeof(buf); i++)
+ buf[i] = rand() & 0xFF;
+
+ send_buf(fd, buf, sizeof(buf), 0, sizeof(buf));
+ control_expectln("RECEIVED");
+
+ ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n");
+ } else {
+ perror("ioctl");
+ exit(EXIT_FAILURE);
+ }
+ } else if (ret == 0 && sock_bytes_unsent != 0) {
+ fprintf(stderr,
+ "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n",
+ sock_bytes_unsent);
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_unsent_bytes_client(const struct test_opts *opts)
+{
+ test_unsent_bytes_client(opts, SOCK_STREAM);
+}
+
+static void test_stream_unsent_bytes_server(const struct test_opts *opts)
+{
+ test_unsent_bytes_server(opts, SOCK_STREAM);
+}
+
+static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts)
+{
+ test_unsent_bytes_client(opts, SOCK_SEQPACKET);
+}
+
+static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts)
+{
+ test_unsent_bytes_server(opts, SOCK_SEQPACKET);
+}
+
#define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128)
/* This define is the same as in 'include/linux/virtio_vsock.h':
* it is used to decide when to send credit update message during
@@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = {
.run_client = test_stream_rcvlowat_def_cred_upd_client,
.run_server = test_stream_cred_upd_on_low_rx_bytes,
},
+ {
+ .name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes",
+ .run_client = test_stream_unsent_bytes_client,
+ .run_server = test_stream_unsent_bytes_server,
+ },
+ {
+ .name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes",
+ .run_client = test_seqpacket_unsent_bytes_client,
+ .run_server = test_seqpacket_unsent_bytes_server,
+ },
{},
};
diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c
index f594a44df840..2f756628613d 100644
--- a/tools/tracing/rtla/src/osnoise_top.c
+++ b/tools/tracing/rtla/src/osnoise_top.c
@@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
return NULL;
tool->data = osnoise_alloc_top(nr_cpus);
- if (!tool->data)
- goto out_err;
+ if (!tool->data) {
+ osnoise_destroy_tool(tool);
+ return NULL;
+ }
tool->params = params;
@@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params)
osnoise_top_handler, NULL);
return tool;
-
-out_err:
- osnoise_free_top(tool->data);
- osnoise_destroy_tool(tool);
- return NULL;
}
static int stop_tracing;