summaryrefslogtreecommitdiffstats
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/arm64/include/uapi/asm/kvm.h25
-rw-r--r--tools/arch/s390/include/uapi/asm/sie.h2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h6
-rw-r--r--tools/arch/x86/include/asm/disabled-features.h9
-rw-r--r--tools/arch/x86/include/asm/msr-index.h10
-rw-r--r--tools/arch/x86/include/asm/required-features.h2
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h20
-rw-r--r--tools/arch/x86/include/uapi/asm/svm.h13
-rw-r--r--tools/bpf/bpftool/feature.c7
-rw-r--r--tools/bpf/bpftool/prog.c2
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c4
-rw-r--r--tools/build/feature/test-all.c1
-rw-r--r--tools/include/linux/compiler-gcc.h12
-rw-r--r--tools/include/linux/compiler.h3
-rw-r--r--tools/include/linux/objtool.h2
-rw-r--r--tools/include/uapi/asm-generic/unistd.h4
-rw-r--r--tools/include/uapi/drm/i915_drm.h59
-rw-r--r--tools/include/uapi/linux/fscrypt.h6
-rw-r--r--tools/include/uapi/linux/kvm.h19
-rw-r--r--tools/include/uapi/linux/mman.h1
-rw-r--r--tools/include/uapi/linux/mount.h1
-rw-r--r--tools/include/uapi/linux/perf_event.h2
-rw-r--r--tools/include/uapi/linux/prctl.h9
-rw-r--r--tools/include/uapi/linux/vhost.h4
-rw-r--r--tools/lib/bpf/hashmap.h15
-rw-r--r--tools/lib/bpf/xsk.c9
-rw-r--r--tools/perf/Makefile.config1
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl11
-rw-r--r--tools/perf/builtin-trace.c15
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json2
-rw-r--r--tools/perf/tests/dwarf-unwind.c10
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/hashmap.c3
-rw-r--r--tools/perf/util/hashmap.h12
-rw-r--r--tools/perf/util/machine.c11
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c7
-rw-r--r--tools/perf/util/session.c14
-rw-r--r--tools/perf/util/symbol.c7
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/power/cpupower/Makefile2
-rw-r--r--tools/power/cpupower/debug/i386/intel_gsic.c2
-rw-r--r--tools/power/x86/turbostat/Makefile3
-rw-r--r--tools/power/x86/turbostat/turbostat.82
-rw-r--r--tools/power/x86/turbostat/turbostat.c573
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c67
-rw-r--r--tools/testing/kunit/kunit_parser.py3
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py32
-rw-r--r--tools/testing/kunit/test_data/test_config_printk_time.log3
-rw-r--r--tools/testing/kunit/test_data/test_interrupted_tap_output.log3
-rw-r--r--tools/testing/kunit/test_data/test_kernel_panic_interrupt.log3
-rw-r--r--tools/testing/kunit/test_data/test_multiple_prefixes.log3
-rw-r--r--tools/testing/kunit/test_data/test_pound_no_prefix.log3
-rw-r--r--tools/testing/kunit/test_data/test_pound_sign.log1
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c3
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c3
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c4
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c4
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c3
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_init.c214
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h11
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_init.c33
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c2
-rw-r--r--tools/testing/selftests/core/close_range_test.c8
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh436
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c8
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c95
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions7
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc12
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc2
-rw-r--r--tools/testing/selftests/kselftest_harness.h46
-rw-r--r--tools/testing/selftests/kvm/.gitignore5
-rw-r--r--tools/testing/selftests/kvm/Makefile26
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c3
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c841
-rw-r--r--tools/testing/selftests/kvm/clear_dirty_log_test.c6
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c269
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c376
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c191
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h7
-rw-r--r--tools/testing/selftests/kvm/include/perf_test_util.h198
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h2
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h38
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h6
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c3
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c90
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h2
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c4
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c3
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c22
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/handlers.S81
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c146
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c3
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c9
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c234
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c142
-rw-r--r--tools/testing/selftests/lib.mk2
-rw-r--r--tools/testing/selftests/mount/.gitignore1
-rw-r--r--tools/testing/selftests/mount/Makefile4
-rw-r--r--tools/testing/selftests/mount/nosymfollow-test.c218
-rwxr-xr-xtools/testing/selftests/mount/run_nosymfollow.sh4
-rwxr-xr-xtools/testing/selftests/mount/run_unprivileged_remount.sh (renamed from tools/testing/selftests/mount/run_tests.sh)0
-rw-r--r--tools/testing/selftests/net/Makefile1
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh546
-rw-r--r--tools/testing/selftests/net/config7
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh485
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh558
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh107
-rw-r--r--tools/testing/selftests/net/mptcp/config10
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh94
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh79
-rw-r--r--tools/testing/selftests/net/psock_fanout.c72
-rw-r--r--tools/testing/selftests/net/timestamping.c47
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c5
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c8
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c1
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c1
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json4
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/futex.c110
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh8
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config2
148 files changed, 6398 insertions, 703 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..1c17c3a24411 100644
--- a/tools/arch/arm64/include/uapi/asm/kvm.h
+++ b/tools/arch/arm64/include/uapi/asm/kvm.h
@@ -159,6 +159,21 @@ struct kvm_sync_regs {
struct kvm_arch_memory_slot {
};
+/*
+ * PMU filter structure. Describe a range of events with a particular
+ * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
+ */
+struct kvm_pmu_event_filter {
+ __u16 base_event;
+ __u16 nevents;
+
+#define KVM_PMU_EVENT_ALLOW 0
+#define KVM_PMU_EVENT_DENY 1
+
+ __u8 action;
+ __u8 pad[3];
+};
+
/* for KVM_GET/SET_VCPU_EVENTS */
struct kvm_vcpu_events {
struct {
@@ -242,6 +257,15 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
+
+/*
+ * Only two states can be presented by the host kernel:
+ * - NOT_REQUIRED: the guest doesn't need to do anything
+ * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available)
+ *
+ * All the other values are deprecated. The host still accepts all
+ * values (they are ABI), but will narrow them to the above two.
+ */
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
@@ -329,6 +353,7 @@ struct kvm_vcpu_events {
#define KVM_ARM_VCPU_PMU_V3_CTRL 0
#define KVM_ARM_VCPU_PMU_V3_IRQ 0
#define KVM_ARM_VCPU_PMU_V3_INIT 1
+#define KVM_ARM_VCPU_PMU_V3_FILTER 2
#define KVM_ARM_VCPU_TIMER_CTRL 1
#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0
#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1
diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h
index 6ca1e68d7103..ede318653c87 100644
--- a/tools/arch/s390/include/uapi/asm/sie.h
+++ b/tools/arch/s390/include/uapi/asm/sie.h
@@ -29,7 +29,7 @@
{ 0x13, "SIGP conditional emergency signal" }, \
{ 0x15, "SIGP sense running" }, \
{ 0x16, "SIGP set multithreading"}, \
- { 0x17, "SIGP store additional status ait address"}
+ { 0x17, "SIGP store additional status at address"}
#define icpt_prog_codes \
{ 0x0001, "Prog Operation" }, \
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 2901d5df4366..dad350d42ecf 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-/* free ( 3*32+17) */
+#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -236,6 +236,7 @@
#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -288,6 +289,7 @@
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
+#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -353,6 +355,7 @@
#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */
#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */
#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */
+#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */
/* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
@@ -368,6 +371,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h
index 4ea8584682f9..5861d34f9771 100644
--- a/tools/arch/x86/include/asm/disabled-features.h
+++ b/tools/arch/x86/include/asm/disabled-features.h
@@ -56,6 +56,12 @@
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_IOMMU_SUPPORT
+# define DISABLE_ENQCMD 0
+#else
+# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
@@ -75,7 +81,8 @@
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define DISABLED_MASK15 0
-#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP)
+#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \
+ DISABLE_ENQCMD)
#define DISABLED_MASK17 0
#define DISABLED_MASK18 0
#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19)
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 2859ee4f39a8..972a34d93505 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -257,6 +257,9 @@
#define MSR_IA32_LASTINTFROMIP 0x000001dd
#define MSR_IA32_LASTINTTOIP 0x000001de
+#define MSR_IA32_PASID 0x00000d93
+#define MSR_IA32_PASID_VALID BIT_ULL(31)
+
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
#define DEBUGCTLMSR_BTF_SHIFT 1
@@ -464,11 +467,15 @@
#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1)
#define MSR_AMD64_IBSCTL 0xc001103a
#define MSR_AMD64_IBSBRTARGET 0xc001103b
+#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_SEV_ES_GHCB 0xc0010130
#define MSR_AMD64_SEV 0xc0010131
#define MSR_AMD64_SEV_ENABLED_BIT 0
+#define MSR_AMD64_SEV_ES_ENABLED_BIT 1
#define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT)
+#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT)
#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
@@ -857,11 +864,14 @@
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
#define MSR_CORE_PERF_FIXED_CTR2 0x0000030b
+#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c
#define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d
#define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e
#define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390
+#define MSR_PERF_METRICS 0x00000329
+
/* PERF_GLOBAL_OVF_CTL bits */
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55
#define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT)
diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h
index 6847d85400a8..3ff0d48469f2 100644
--- a/tools/arch/x86/include/asm/required-features.h
+++ b/tools/arch/x86/include/asm/required-features.h
@@ -54,7 +54,7 @@
#endif
#ifdef CONFIG_X86_64
-#ifdef CONFIG_PARAVIRT
+#ifdef CONFIG_PARAVIRT_XXL
/* Paravirtualized systems may not have PSE or PGE available */
#define NEED_PSE 0
#define NEED_PGE 0
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 0780f97c1850..89e5f3d1bba8 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -192,6 +192,26 @@ struct kvm_msr_list {
__u32 indices[0];
};
+/* Maximum size of any access bitmap in bytes */
+#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600
+
+/* for KVM_X86_SET_MSR_FILTER */
+struct kvm_msr_filter_range {
+#define KVM_MSR_FILTER_READ (1 << 0)
+#define KVM_MSR_FILTER_WRITE (1 << 1)
+ __u32 flags;
+ __u32 nmsrs; /* number of msrs in bitmap */
+ __u32 base; /* MSR index the bitmap starts at */
+ __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+};
+
+#define KVM_MSR_FILTER_MAX_RANGES 16
+struct kvm_msr_filter {
+#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0)
+ __u32 flags;
+ struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+};
struct kvm_cpuid_entry {
__u32 function;
diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h
index 2e8a30f06c74..f1d8307454e0 100644
--- a/tools/arch/x86/include/uapi/asm/svm.h
+++ b/tools/arch/x86/include/uapi/asm/svm.h
@@ -29,6 +29,7 @@
#define SVM_EXIT_WRITE_DR6 0x036
#define SVM_EXIT_WRITE_DR7 0x037
#define SVM_EXIT_EXCP_BASE 0x040
+#define SVM_EXIT_LAST_EXCP 0x05f
#define SVM_EXIT_INTR 0x060
#define SVM_EXIT_NMI 0x061
#define SVM_EXIT_SMI 0x062
@@ -76,10 +77,21 @@
#define SVM_EXIT_MWAIT_COND 0x08c
#define SVM_EXIT_XSETBV 0x08d
#define SVM_EXIT_RDPRU 0x08e
+#define SVM_EXIT_INVPCID 0x0a2
#define SVM_EXIT_NPF 0x400
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402
+/* SEV-ES software-defined VMGEXIT events */
+#define SVM_VMGEXIT_MMIO_READ 0x80000001
+#define SVM_VMGEXIT_MMIO_WRITE 0x80000002
+#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003
+#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004
+#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
+#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
+#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
+
#define SVM_EXIT_ERR -1
#define SVM_EXIT_REASONS \
@@ -171,6 +183,7 @@
{ SVM_EXIT_MONITOR, "monitor" }, \
{ SVM_EXIT_MWAIT, "mwait" }, \
{ SVM_EXIT_XSETBV, "xsetbv" }, \
+ { SVM_EXIT_INVPCID, "invpcid" }, \
{ SVM_EXIT_NPF, "npf" }, \
{ SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \
{ SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index a43a6f10b564..359960a8f1de 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -843,9 +843,14 @@ static int handle_perms(void)
else
p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
capability_msg(bpf_caps, 0),
+#ifdef CAP_BPF
capability_msg(bpf_caps, 1),
capability_msg(bpf_caps, 2),
- capability_msg(bpf_caps, 3));
+ capability_msg(bpf_caps, 3)
+#else
+ "", "", "", "", "", ""
+#endif /* CAP_BPF */
+ );
goto exit_free;
}
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index d942c1e3372c..acdb2c245f0a 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd,
}
if (*attach_type == BPF_FLOW_DISSECTOR) {
- *mapfd = -1;
+ *mapfd = 0;
return 0;
}
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 4e3512f700c0..ce5b65e07ab1 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX)
static inline void
fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
{
- struct bpf_perf_event_value *before, diff, *accum;
+ struct bpf_perf_event_value *before, diff;
before = bpf_map_lookup_elem(&fentry_readings, &id);
/* only account samples with a valid fentry_reading */
@@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX)
{
struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
u32 cpu = bpf_get_smp_processor_id();
- u32 i, one = 1, zero = 0;
+ u32 i, zero = 0;
int err;
u64 *count;
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index a04e81321c66..464873883396 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -185,7 +185,6 @@ int main(int argc, char *argv[])
main_test_libperl();
main_test_hello();
main_test_libelf();
- main_test_libelf_mmap();
main_test_get_current_dir_name();
main_test_gettid();
main_test_glibc();
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index b9d4322e1e65..95c072b70d0e 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -27,18 +27,6 @@
#define __pure __attribute__((pure))
#endif
#define noinline __attribute__((noinline))
-#ifdef __has_attribute
-#if __has_attribute(disable_tail_calls)
-#define __no_tail_call __attribute__((disable_tail_calls))
-#endif
-#endif
-#ifndef __no_tail_call
-#if GCC_VERSION > 40201
-#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls")))
-#else
-#define __no_tail_call
-#endif
-#endif
#ifndef __packed
#define __packed __attribute__((packed))
#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 2b3f7353e891..d22a974372c0 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -47,9 +47,6 @@
#ifndef noinline
#define noinline
#endif
-#ifndef __no_tail_call
-#define __no_tail_call
-#endif
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
index ab82c793c897..577f51436cf9 100644
--- a/tools/include/linux/objtool.h
+++ b/tools/include/linux/objtool.h
@@ -60,7 +60,7 @@ struct unwind_hint {
* For more information, see tools/objtool/Documentation/stack-validation.txt.
*/
#define STACK_FRAME_NON_STANDARD(func) \
- static void __used __section(.discard.func_stack_frame_non_standard) \
+ static void __used __section(".discard.func_stack_frame_non_standard") \
*__func_stack_frame_non_standard_##func = func
#else /* __ASSEMBLY__ */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index f2b5d72a46c2..2056318988f7 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2)
__SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd)
#define __NR_faccessat2 439
__SYSCALL(__NR_faccessat2, sys_faccessat2)
+#define __NR_process_madvise 440
+__SYSCALL(__NR_process_madvise, sys_process_madvise)
#undef __NR_syscalls
-#define __NR_syscalls 440
+#define __NR_syscalls 441
/*
* 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 00546062e023..fa1f3d62f9a6 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait {
*/
#define I915_PARAM_PERF_REVISION 54
+/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of
+ * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See
+ * I915_EXEC_USE_EXTENSIONS.
+ */
+#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55
+
/* Must be kept compact -- no holes and well documented */
typedef struct drm_i915_getparam {
@@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence {
__u32 flags;
};
+/**
+ * See drm_i915_gem_execbuffer_ext_timeline_fences.
+ */
+#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0
+
+/**
+ * This structure describes an array of drm_syncobj and associated points for
+ * timeline variants of drm_syncobj. It is invalid to append this structure to
+ * the execbuf if I915_EXEC_FENCE_ARRAY is set.
+ */
+struct drm_i915_gem_execbuffer_ext_timeline_fences {
+ struct i915_user_extension base;
+
+ /**
+ * Number of element in the handles_ptr & value_ptr arrays.
+ */
+ __u64 fence_count;
+
+ /**
+ * Pointer to an array of struct drm_i915_gem_exec_fence of length
+ * fence_count.
+ */
+ __u64 handles_ptr;
+
+ /**
+ * Pointer to an array of u64 values of length fence_count. Values
+ * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline
+ * drm_syncobj is invalid as it turns a drm_syncobj into a binary one.
+ */
+ __u64 values_ptr;
+};
+
struct drm_i915_gem_execbuffer2 {
/**
* List of gem_exec_object2 structs
@@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 {
__u32 num_cliprects;
/**
* This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY
- * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a
- * struct drm_i915_gem_exec_fence *fences.
+ * & I915_EXEC_USE_EXTENSIONS are not set.
+ *
+ * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array
+ * of struct drm_i915_gem_exec_fence and num_cliprects is the length
+ * of the array.
+ *
+ * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a
+ * single struct i915_user_extension and num_cliprects is 0.
*/
__u64 cliprects_ptr;
#define I915_EXEC_RING_MASK (0x3f)
@@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_FENCE_SUBMIT (1 << 20)
-#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1))
+/*
+ * Setting I915_EXEC_USE_EXTENSIONS implies that
+ * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked
+ * list of i915_user_extension. Each i915_user_extension node is the base of a
+ * larger structure. The list of supported structures are listed in the
+ * drm_i915_gem_execbuffer_ext enum.
+ */
+#define I915_EXEC_USE_EXTENSIONS (1 << 21)
+
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 7875709ccfeb..e5de60336938 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -45,7 +45,6 @@ struct fscrypt_policy_v1 {
__u8 flags;
__u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE];
};
-#define fscrypt_policy fscrypt_policy_v1
/*
* Process-subscribed "logon" key description prefix and payload format.
@@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg {
__u32 __out_reserved[13];
};
-#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy)
+#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1)
#define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16])
-#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy)
+#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1)
#define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */
#define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg)
#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg)
@@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg {
/* old names; don't add anything new here! */
#ifndef __KERNEL__
+#define fscrypt_policy fscrypt_policy_v1
#define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE
#define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4
#define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 7d8eced6f459..ca41220b40b8 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -248,6 +248,8 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_IOAPIC_EOI 26
#define KVM_EXIT_HYPERV 27
#define KVM_EXIT_ARM_NISV 28
+#define KVM_EXIT_X86_RDMSR 29
+#define KVM_EXIT_X86_WRMSR 30
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -413,6 +415,17 @@ struct kvm_run {
__u64 esr_iss;
__u64 fault_ipa;
} arm_nisv;
+ /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */
+ struct {
+ __u8 error; /* user -> kernel */
+ __u8 pad[7];
+#define KVM_MSR_EXIT_REASON_INVAL (1 << 0)
+#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1)
+#define KVM_MSR_EXIT_REASON_FILTER (1 << 2)
+ __u32 reason; /* kernel -> user */
+ __u32 index; /* kernel -> user */
+ __u64 data; /* kernel <-> user */
+ } msr;
/* Fix the size of the union. */
char padding[256];
};
@@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_SMALLER_MAXPHYADDR 185
#define KVM_CAP_S390_DIAG318 186
#define KVM_CAP_STEAL_TIME 187
+#define KVM_CAP_X86_USER_SPACE_MSR 188
+#define KVM_CAP_X86_MSR_FILTER 189
+#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1538,6 +1554,9 @@ struct kvm_pv_cmd {
/* Available with KVM_CAP_S390_PROTECTED */
#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
+/* Available with KVM_CAP_X86_MSR_FILTER */
+#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index 923cc162609c..f55bc680b5b0 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -27,6 +27,7 @@
#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT
#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK
+#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB
#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB
#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB
#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB
diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h
index 96a0240f23fe..dd8306ea336c 100644
--- a/tools/include/uapi/linux/mount.h
+++ b/tools/include/uapi/linux/mount.h
@@ -16,6 +16,7 @@
#define MS_REMOUNT 32 /* Alter flags of a mounted FS */
#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */
#define MS_DIRSYNC 128 /* Directory modifications are synchronous */
+#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
#define MS_NOATIME 1024 /* Do not update access times. */
#define MS_NODIRATIME 2048 /* Do not update directory access times */
#define MS_BIND 4096
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 3e5dcdd48a49..b95d3c485d27 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1196,7 +1196,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT 38
+#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 07b4f8131e36..7f0827705c9a 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -233,6 +233,15 @@ struct prctl_mm_map {
#define PR_SET_TAGGED_ADDR_CTRL 55
#define PR_GET_TAGGED_ADDR_CTRL 56
# define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+/* MTE tag check fault modes */
+# define PR_MTE_TCF_SHIFT 1
+# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
+/* MTE tag inclusion mask */
+# define PR_MTE_TAG_SHIFT 3
+# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT)
/* Control reclaim behavior when allocating memory */
#define PR_SET_IO_FLUSHER 57
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 75232185324a..c998860d7bbc 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -146,4 +146,8 @@
/* Set event fd for config interrupt*/
#define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int)
+
+/* Get the valid iova range */
+#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
+ struct vhost_vdpa_iova_range)
#endif
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index d9b385fe808c..10a4c4cd13cf 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,6 +15,9 @@
static inline size_t hash_bits(size_t h, int bits)
{
/* shuffle bits and return requested number of upper bits */
+ if (bits == 0)
+ return 0;
+
#if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__)
/* LP64 case */
return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits);
@@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value);
* @key: key to iterate entries for
*/
#define hashmap__for_each_key_entry(map, cur, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur; \
cur = cur->next) \
if (map->equal_fn(cur->key, (_key), map->ctx))
#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
- for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
- map->cap_bits); \
- cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ for (cur = map->buckets \
+ ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \
+ : NULL; \
cur && ({ tmp = cur->next; true; }); \
cur = tmp) \
if (map->equal_fn(cur->key, (_key), map->ctx))
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e3c98c007825..9bc537d0b92d 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -891,13 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem)
void xsk_socket__delete(struct xsk_socket *xsk)
{
size_t desc_sz = sizeof(struct xdp_desc);
- struct xsk_ctx *ctx = xsk->ctx;
struct xdp_mmap_offsets off;
+ struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
int err;
if (!xsk)
return;
+ ctx = xsk->ctx;
+ umem = ctx->umem;
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
@@ -917,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk)
xsk_put_ctx(ctx);
- ctx->umem->refcount--;
+ umem->refcount--;
/* Do not close an fd that also has an associated umem connected
* to it.
*/
- if (xsk->fd != ctx->umem->fd)
+ if (xsk->fd != umem->fd)
close(xsk->fd);
free(xsk);
}
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 6890fc4b063a..ce8516e4de34 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -749,6 +749,7 @@ else
PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null)
PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS))
+ PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS))
PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS))
FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 347809649ba2..379819244b91 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -361,12 +361,13 @@
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
+440 common process_madvise sys_process_madvise
#
-# x32-specific system call numbers start at 512 to avoid cache impact
-# for native 64-bit operation. The __x32_compat_sys stubs are created
-# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
-# is defined.
+# Due to a historical design error, certain syscalls are numbered differently
+# in x32 as compared to native x86_64. These syscalls have numbers 512-547.
+# Do not add new syscalls to this range. Numbers 548 and above are available
+# for non-x32 use.
#
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
@@ -404,3 +405,5 @@
545 x32 execveat compat_sys_execveat
546 x32 preadv2 compat_sys_preadv64v2
547 x32 pwritev2 compat_sys_pwritev64v2
+# This is the end of the legacy x32 range. Numbers 548 and above are
+# not special and are not to be used for x32-specific syscalls.
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 44a75f234db1..de80534473af 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4639,9 +4639,9 @@ do_concat:
err = 0;
if (lists[0]) {
- struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event",
- "event selector. use 'perf list' to list available events",
- parse_events_option);
+ struct option o = {
+ .value = &trace->evlist,
+ };
err = parse_events_option(&o, lists[0], 0);
}
out:
@@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
{
struct trace *trace = opt->value;
- if (!list_empty(&trace->evlist->core.entries))
- return parse_cgroups(opt, str, unset);
-
+ if (!list_empty(&trace->evlist->core.entries)) {
+ struct option o = {
+ .value = &trace->evlist,
+ };
+ return parse_cgroups(&o, str, unset);
+ }
trace->cgroup = evlist__findnew_cgroup(trace->evlist, str);
return 0;
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index de3193552277..00f4fcffa815 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -329,7 +329,7 @@
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
"MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index f31794d3b926..0dd8b13b5cfb 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -323,7 +323,7 @@
},
{
"BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
- "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time",
+ "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time",
"MetricGroup": "Memory_BW;SoC",
"MetricName": "DRAM_BW_Use"
},
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 2491d167bf76..83638097c3bc 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return strcmp((const char *) symbol, funcs[idx]);
}
-__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
+noinline int test_dwarf_unwind__thread(struct thread *thread)
{
struct perf_sample sample;
unsigned long cnt = 0;
@@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
static int global_unwind_retval = -INT_MAX;
-__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+noinline int test_dwarf_unwind__compare(void *p1, void *p2)
{
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
@@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
return p1 - p2;
}
-__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+noinline int test_dwarf_unwind__krava_3(struct thread *thread)
{
struct thread *array[2] = {thread, thread};
void *fp = &bsearch;
@@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
return global_unwind_retval;
}
-__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+noinline int test_dwarf_unwind__krava_2(struct thread *thread)
{
return test_dwarf_unwind__krava_3(thread);
}
-__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+noinline int test_dwarf_unwind__krava_1(struct thread *thread)
{
return test_dwarf_unwind__krava_2(thread);
}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index a07626f07208..b0e1880cf992 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
struct popup_action actions[MAX_OPTIONS];
int nr_options = 0;
int key = -1;
- char buf[64];
+ char buf[128];
int delay_secs = hbt ? hbt->refresh : 0;
#define HIST_BROWSER_HELP_COMMON \
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 8763772f1095..6b410c3d52dc 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf)
const u8 *raw = build_id->data;
size_t i;
+ bf[0] = 0x0;
+
for (i = 0; i < build_id->size; ++i) {
sprintf(bid, "%02x", *raw);
++raw;
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
index a405dad068f5..3c20b126d60d 100644
--- a/tools/perf/util/hashmap.c
+++ b/tools/perf/util/hashmap.c
@@ -15,6 +15,9 @@
/* make sure libbpf doesn't use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+/* prevent accidental re-addition of reallocarray() */
+#pragma GCC poison reallocarray
+
/* start with 4 buckets */
#define HASHMAP_MIN_CAP_BITS 2
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
index e0af36b0e5d8..d9b385fe808c 100644
--- a/tools/perf/util/hashmap.h
+++ b/tools/perf/util/hashmap.h
@@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits)
#endif
}
+/* generic C-string hashing function */
+static inline size_t str_hash(const char *s)
+{
+ size_t h = 0;
+
+ while (*s) {
+ h = h * 31 + *s;
+ s++;
+ }
+ return h;
+}
+
typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7d4194ffc5b0..15385ea00190 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
{
+ struct symbol *sym;
struct map *map;
map = maps__find(&machine->kmaps, event->ksymbol.addr);
- if (map)
+ if (!map)
+ return 0;
+
+ if (map != machine->vmlinux_map)
maps__remove(&machine->kmaps, map);
+ else {
+ sym = dso__find_symbol(map->dso, map->map_ip(map, map->start));
+ if (sym)
+ dso__delete_symbol(map->dso, sym);
+ }
return 0;
}
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 7cbd024e3e63..c83c2c6564e0 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num)
static int python_start_script(const char *script, int argc, const char **argv)
{
struct tables *tables = &tables_global;
- PyMODINIT_FUNC (*initfunc)(void);
#if PY_MAJOR_VERSION < 3
const char **command_line;
#else
@@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv)
FILE *fp;
#if PY_MAJOR_VERSION < 3
- initfunc = initperf_trace_context;
command_line = malloc((argc + 1) * sizeof(const char *));
command_line[0] = script;
for (i = 1; i < argc + 1; i++)
command_line[i] = argv[i - 1];
+ PyImport_AppendInittab(name, initperf_trace_context);
#else
- initfunc = PyInit_perf_trace_context;
command_line = malloc((argc + 1) * sizeof(wchar_t *));
command_line[0] = Py_DecodeLocale(script, NULL);
for (i = 1; i < argc + 1; i++)
command_line[i] = Py_DecodeLocale(argv[i - 1], NULL);
+ PyImport_AppendInittab(name, PyInit_perf_trace_context);
#endif
-
- PyImport_AppendInittab(name, initfunc);
Py_Initialize();
#if PY_MAJOR_VERSION < 3
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7a5f03764702..098080287c68 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event,
event->mmap2.maj = bswap_32(event->mmap2.maj);
event->mmap2.min = bswap_32(event->mmap2.min);
event->mmap2.ino = bswap_64(event->mmap2.ino);
+ event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation);
if (sample_id_all) {
void *data = &event->mmap2.filename;
@@ -710,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event,
swap_sample_id_all(event, &event->namespaces.link_info[i]);
}
+static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all)
+{
+ event->cgroup.id = bswap_64(event->cgroup.id);
+
+ if (sample_id_all) {
+ void *data = &event->cgroup.path;
+
+ data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
static u8 revbyte(u8 b)
{
int rev = (b >> 4) | ((b & 0xf) << 4);
@@ -952,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SWITCH] = perf_event__switch_swap,
[PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap,
+ [PERF_RECORD_CGROUP] = perf_event__cgroup_swap,
[PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 6138866665df..0d14abdf3d72 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym)
}
}
+void dso__delete_symbol(struct dso *dso, struct symbol *sym)
+{
+ rb_erase_cached(&sym->rb_node, &dso->symbols);
+ symbol__delete(sym);
+ dso__reset_find_symbol_cache(dso);
+}
+
struct symbol *dso__find_symbol(struct dso *dso, u64 addr)
{
if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) {
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index f4801c488def..954d6a049ee2 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map);
void dso__insert_symbol(struct dso *dso,
struct symbol *sym);
+void dso__delete_symbol(struct dso *dso,
+ struct symbol *sym);
struct symbol *dso__find_symbol(struct dso *dso, u64 addr);
struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name);
diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile
index c8622497ef23..c7bcddbd486d 100644
--- a/tools/power/cpupower/Makefile
+++ b/tools/power/cpupower/Makefile
@@ -51,7 +51,7 @@ DESTDIR ?=
# Package-related definitions. Distributions can modify the version
# and _should_ modify the PACKAGE_BUGREPORT definition
-VERSION= $(shell ./utils/version-gen.sh)
+VERSION:= $(shell ./utils/version-gen.sh)
LIB_MAJ= 0.0.1
LIB_MIN= 0
diff --git a/tools/power/cpupower/debug/i386/intel_gsic.c b/tools/power/cpupower/debug/i386/intel_gsic.c
index e5e926f46d6b..befd837f07f8 100644
--- a/tools/power/cpupower/debug/i386/intel_gsic.c
+++ b/tools/power/cpupower/debug/i386/intel_gsic.c
@@ -71,7 +71,7 @@ int main (void)
printf("\tsmi_cmd=0x?? smi_port=0x?? smi_sig=1\n");
printf("\nUnfortunately, you have to know what exactly are "
"smi_cmd and smi_port, and this\nis system "
- "dependant.\n");
+ "dependent.\n");
}
return 1;
}
diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile
index 2b6551269e43..f3e3c94ab9bd 100644
--- a/tools/power/x86/turbostat/Makefile
+++ b/tools/power/x86/turbostat/Makefile
@@ -12,11 +12,12 @@ turbostat : turbostat.c
override CFLAGS += -O2 -Wall -I../../../include
override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"'
override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"'
+override CFLAGS += -D_FILE_OFFSET_BITS=64
override CFLAGS += -D_FORTIFY_SOURCE=2
%: %.c
@mkdir -p $(BUILD_OUTPUT)
- $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap
+ $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt
.PHONY : clean
clean :
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index a6db83a88e85..f6b7e85b121c 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date.
.SH REFERENCES
Volume 3B: System Programming Guide"
-http://www.intel.com/products/processor/manuals/
+https://www.intel.com/products/processor/manuals/
.SH FILES
.ta
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 33b370865d16..f3a1746f7f45 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
unsigned int gfx_cur_mhz;
+unsigned int gfx_act_mhz;
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
double rapl_power_units, rapl_time_units;
@@ -210,13 +211,14 @@ struct pkg_data {
unsigned long long pkg_both_core_gfxe_c0;
long long gfx_rc6_ms;
unsigned int gfx_mhz;
+ unsigned int gfx_act_mhz;
unsigned int package_id;
- unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
- unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
- unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */
- unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */
- unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
- unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
+ unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */
+ unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */
+ unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */
+ unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */
+ unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */
+ unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */
unsigned int pkg_temp_c;
unsigned long long counter[MAX_ADDED_COUNTERS];
} *package_even, *package_odd;
@@ -259,6 +261,113 @@ struct msr_counter {
#define SYSFS_PERCPU (1 << 1)
};
+/*
+ * The accumulated sum of MSR is defined as a monotonic
+ * increasing MSR, it will be accumulated periodically,
+ * despite its register's bit width.
+ */
+enum {
+ IDX_PKG_ENERGY,
+ IDX_DRAM_ENERGY,
+ IDX_PP0_ENERGY,
+ IDX_PP1_ENERGY,
+ IDX_PKG_PERF,
+ IDX_DRAM_PERF,
+ IDX_COUNT,
+};
+
+int get_msr_sum(int cpu, off_t offset, unsigned long long *msr);
+
+struct msr_sum_array {
+ /* get_msr_sum() = sum + (get_msr() - last) */
+ struct {
+ /*The accumulated MSR value is updated by the timer*/
+ unsigned long long sum;
+ /*The MSR footprint recorded in last timer*/
+ unsigned long long last;
+ } entries[IDX_COUNT];
+};
+
+/* The percpu MSR sum array.*/
+struct msr_sum_array *per_cpu_msr_sum;
+
+int idx_to_offset(int idx)
+{
+ int offset;
+
+ switch (idx) {
+ case IDX_PKG_ENERGY:
+ offset = MSR_PKG_ENERGY_STATUS;
+ break;
+ case IDX_DRAM_ENERGY:
+ offset = MSR_DRAM_ENERGY_STATUS;
+ break;
+ case IDX_PP0_ENERGY:
+ offset = MSR_PP0_ENERGY_STATUS;
+ break;
+ case IDX_PP1_ENERGY:
+ offset = MSR_PP1_ENERGY_STATUS;
+ break;
+ case IDX_PKG_PERF:
+ offset = MSR_PKG_PERF_STATUS;
+ break;
+ case IDX_DRAM_PERF:
+ offset = MSR_DRAM_PERF_STATUS;
+ break;
+ default:
+ offset = -1;
+ }
+ return offset;
+}
+
+int offset_to_idx(int offset)
+{
+ int idx;
+
+ switch (offset) {
+ case MSR_PKG_ENERGY_STATUS:
+ idx = IDX_PKG_ENERGY;
+ break;
+ case MSR_DRAM_ENERGY_STATUS:
+ idx = IDX_DRAM_ENERGY;
+ break;
+ case MSR_PP0_ENERGY_STATUS:
+ idx = IDX_PP0_ENERGY;
+ break;
+ case MSR_PP1_ENERGY_STATUS:
+ idx = IDX_PP1_ENERGY;
+ break;
+ case MSR_PKG_PERF_STATUS:
+ idx = IDX_PKG_PERF;
+ break;
+ case MSR_DRAM_PERF_STATUS:
+ idx = IDX_DRAM_PERF;
+ break;
+ default:
+ idx = -1;
+ }
+ return idx;
+}
+
+int idx_valid(int idx)
+{
+ switch (idx) {
+ case IDX_PKG_ENERGY:
+ return do_rapl & RAPL_PKG;
+ case IDX_DRAM_ENERGY:
+ return do_rapl & RAPL_DRAM;
+ case IDX_PP0_ENERGY:
+ return do_rapl & RAPL_CORES_ENERGY_STATUS;
+ case IDX_PP1_ENERGY:
+ return do_rapl & RAPL_GFX;
+ case IDX_PKG_PERF:
+ return do_rapl & RAPL_PKG_PERF_STATUS;
+ case IDX_DRAM_PERF:
+ return do_rapl & RAPL_DRAM_PERF_STATUS;
+ default:
+ return 0;
+ }
+}
struct sys_counters {
unsigned int added_thread_counters;
unsigned int added_core_counters;
@@ -451,6 +560,7 @@ struct msr_counter bic[] = {
{ 0x0, "APIC" },
{ 0x0, "X2APIC" },
{ 0x0, "Die" },
+ { 0x0, "GFXAMHz" },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
@@ -505,6 +615,7 @@ struct msr_counter bic[] = {
#define BIC_APIC (1ULL << 48)
#define BIC_X2APIC (1ULL << 49)
#define BIC_Die (1ULL << 50)
+#define BIC_GFXACTMHz (1ULL << 51)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
@@ -724,6 +835,9 @@ void print_header(char *delim)
if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_GFXACTMHz))
+ outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : ""));
if (DO_BIC(BIC_Any_c0))
@@ -858,13 +972,13 @@ int dump_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "pc10: %016llX\n", p->pc10);
outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi);
outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi);
- outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg);
- outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores);
- outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx);
- outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0X\n",
+ outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg);
+ outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
+ outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
+ outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n",
p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0X\n",
+ outp += sprintf(outp, "Throttle RAM: %0llX\n",
p->rapl_dram_perf_status);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
@@ -1062,14 +1176,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
}
}
- /*
- * If measurement interval exceeds minimum RAPL Joule Counter range,
- * indicate that results are suspect by printing "**" in fraction place.
- */
- if (interval_float < rapl_joule_counter_range)
- fmt8 = "%s%.2f";
- else
- fmt8 = "%6.0f**";
+ fmt8 = "%s%.2f";
if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
@@ -1098,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (DO_BIC(BIC_GFXMHz))
outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz);
+ /* GFXACTMHz */
+ if (DO_BIC(BIC_GFXACTMHz))
+ outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
@@ -1210,11 +1321,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_
}
#define DELTA_WRAP32(new, old) \
- if (new > old) { \
- old = new - old; \
- } else { \
- old = 0x100000000 + new - old; \
- }
+ old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
int
delta_package(struct pkg_data *new, struct pkg_data *old)
@@ -1253,13 +1360,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
old->gfx_mhz = new->gfx_mhz;
+ old->gfx_act_mhz = new->gfx_act_mhz;
- DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
- DELTA_WRAP32(new->energy_cores, old->energy_cores);
- DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
- DELTA_WRAP32(new->energy_dram, old->energy_dram);
- DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status);
- DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status);
+ old->energy_pkg = new->energy_pkg - old->energy_pkg;
+ old->energy_cores = new->energy_cores - old->energy_cores;
+ old->energy_gfx = new->energy_gfx - old->energy_gfx;
+ old->energy_dram = new->energy_dram - old->energy_dram;
+ old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status;
+ old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW)
@@ -1469,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->gfx_rc6_ms = 0;
p->gfx_mhz = 0;
+ p->gfx_act_mhz = 0;
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next)
t->counter[i] = 0;
@@ -1564,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
average.packages.gfx_mhz = p->gfx_mhz;
+ average.packages.gfx_act_mhz = p->gfx_act_mhz;
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
@@ -1784,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
int i;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -1966,39 +2076,39 @@ retry:
p->sys_lpi = cpuidle_cur_sys_lpi_us;
if (do_rapl & RAPL_PKG) {
- if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr))
return -13;
- p->energy_pkg = msr & 0xFFFFFFFF;
+ p->energy_pkg = msr;
}
if (do_rapl & RAPL_CORES_ENERGY_STATUS) {
- if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr))
return -14;
- p->energy_cores = msr & 0xFFFFFFFF;
+ p->energy_cores = msr;
}
if (do_rapl & RAPL_DRAM) {
- if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr))
return -15;
- p->energy_dram = msr & 0xFFFFFFFF;
+ p->energy_dram = msr;
}
if (do_rapl & RAPL_GFX) {
- if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr))
return -16;
- p->energy_gfx = msr & 0xFFFFFFFF;
+ p->energy_gfx = msr;
}
if (do_rapl & RAPL_PKG_PERF_STATUS) {
- if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr))
return -16;
- p->rapl_pkg_perf_status = msr & 0xFFFFFFFF;
+ p->rapl_pkg_perf_status = msr;
}
if (do_rapl & RAPL_DRAM_PERF_STATUS) {
- if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr))
+ if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr))
return -16;
- p->rapl_dram_perf_status = msr & 0xFFFFFFFF;
+ p->rapl_dram_perf_status = msr;
}
if (do_rapl & RAPL_AMD_F17H) {
- if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr))
+ if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr))
return -13;
- p->energy_pkg = msr & 0xFFFFFFFF;
+ p->energy_pkg = msr;
}
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
@@ -2012,6 +2122,9 @@ retry:
if (DO_BIC(BIC_GFXMHz))
p->gfx_mhz = gfx_cur_mhz;
+ if (DO_BIC(BIC_GFXACTMHz))
+ p->gfx_act_mhz = gfx_act_mhz;
+
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (get_mp(cpu, mp, &p->counter[i]))
return -10;
@@ -2173,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model)
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
case INTEL_FAM6_ATOM_GOLDMONT_D:
+ case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
}
return 0;
@@ -2650,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu)
sprintf(path,
"/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
- filep = fopen_or_die(path, "r");
+ filep = fopen(path, "r");
+
+ if (!filep) {
+ warnx("%s: open failed", path);
+ return -1;
+ }
do {
offset -= BITMASK_SIZE;
if (fscanf(filep, "%lx%c", &map, &character) != 2)
@@ -2763,18 +2882,25 @@ void re_initialize(void)
{
free_all_buffers();
setup_all_buffers();
- printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
+ fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus);
}
void set_max_cpu_num(void)
{
FILE *filep;
+ int base_cpu;
unsigned long dummy;
+ char pathname[64];
+
+ base_cpu = sched_getcpu();
+ if (base_cpu < 0)
+ err(1, "cannot find calling cpu ID");
+ sprintf(pathname,
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings",
+ base_cpu);
+ filep = fopen_or_die(pathname, "r");
topo.max_cpu_num = 0;
- filep = fopen_or_die(
- "/sys/devices/system/cpu/cpu0/topology/thread_siblings",
- "r");
while (fscanf(filep, "%lx,", &dummy) == 1)
topo.max_cpu_num += BITMASK_SIZE;
fclose(filep);
@@ -2916,6 +3042,33 @@ int snapshot_gfx_mhz(void)
}
/*
+ * snapshot_gfx_cur_mhz()
+ *
+ * record snapshot of
+ * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_act_mhz(void)
+{
+ static FILE *fp;
+ int retval;
+
+ if (fp == NULL)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r");
+ else {
+ rewind(fp);
+ fflush(fp);
+ }
+
+ retval = fscanf(fp, "%d", &gfx_act_mhz);
+ if (retval != 1)
+ err(1, "GFX ACT MHz");
+
+ return 0;
+}
+
+/*
* snapshot_cpu_lpi()
*
* record snapshot of
@@ -2980,6 +3133,9 @@ int snapshot_proc_sysfs_files(void)
if (DO_BIC(BIC_GFXMHz))
snapshot_gfx_mhz();
+ if (DO_BIC(BIC_GFXACTMHz))
+ snapshot_gfx_act_mhz();
+
if (DO_BIC(BIC_CPU_LPI))
snapshot_cpu_lpi_us();
@@ -3057,6 +3213,111 @@ void do_sleep(void)
}
}
+int get_msr_sum(int cpu, off_t offset, unsigned long long *msr)
+{
+ int ret, idx;
+ unsigned long long msr_cur, msr_last;
+
+ if (!per_cpu_msr_sum)
+ return 1;
+
+ idx = offset_to_idx(offset);
+ if (idx < 0)
+ return idx;
+ /* get_msr_sum() = sum + (get_msr() - last) */
+ ret = get_msr(cpu, offset, &msr_cur);
+ if (ret)
+ return ret;
+ msr_last = per_cpu_msr_sum[cpu].entries[idx].last;
+ DELTA_WRAP32(msr_cur, msr_last);
+ *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum;
+
+ return 0;
+}
+
+timer_t timerid;
+
+/* Timer callback, update the sum of MSRs periodically. */
+static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ int i, ret;
+ int cpu = t->cpu_id;
+
+ for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
+ unsigned long long msr_cur, msr_last;
+ int offset;
+
+ if (!idx_valid(i))
+ continue;
+ offset = idx_to_offset(i);
+ if (offset < 0)
+ continue;
+ ret = get_msr(cpu, offset, &msr_cur);
+ if (ret) {
+ fprintf(outf, "Can not update msr(0x%x)\n", offset);
+ continue;
+ }
+
+ msr_last = per_cpu_msr_sum[cpu].entries[i].last;
+ per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff;
+
+ DELTA_WRAP32(msr_cur, msr_last);
+ per_cpu_msr_sum[cpu].entries[i].sum += msr_last;
+ }
+ return 0;
+}
+
+static void
+msr_record_handler(union sigval v)
+{
+ for_all_cpus(update_msr_sum, EVEN_COUNTERS);
+}
+
+void msr_sum_record(void)
+{
+ struct itimerspec its;
+ struct sigevent sev;
+
+ per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array));
+ if (!per_cpu_msr_sum) {
+ fprintf(outf, "Can not allocate memory for long time MSR.\n");
+ return;
+ }
+ /*
+ * Signal handler might be restricted, so use thread notifier instead.
+ */
+ memset(&sev, 0, sizeof(struct sigevent));
+ sev.sigev_notify = SIGEV_THREAD;
+ sev.sigev_notify_function = msr_record_handler;
+
+ sev.sigev_value.sival_ptr = &timerid;
+ if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) {
+ fprintf(outf, "Can not create timer.\n");
+ goto release_msr;
+ }
+
+ its.it_value.tv_sec = 0;
+ its.it_value.tv_nsec = 1;
+ /*
+ * A wraparound time has been calculated early.
+ * Some sources state that the peak power for a
+ * microprocessor is usually 1.5 times the TDP rating,
+ * use 2 * TDP for safety.
+ */
+ its.it_interval.tv_sec = rapl_joule_counter_range / 2;
+ its.it_interval.tv_nsec = 0;
+
+ if (timer_settime(timerid, 0, &its, NULL) == -1) {
+ fprintf(outf, "Can not set timer.\n");
+ goto release_timer;
+ }
+ return;
+
+ release_timer:
+ timer_delete(timerid);
+ release_msr:
+ free(per_cpu_msr_sum);
+}
void turbostat_loop()
{
@@ -3075,7 +3336,7 @@ restart:
if (retval < -1) {
exit(retval);
} else if (retval == -1) {
- if (restarted > 1) {
+ if (restarted > 10) {
exit(retval);
}
re_initialize();
@@ -3279,6 +3540,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
+ case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
pkg_cstate_limits = glm_pkg_cstate_limits;
break;
default:
@@ -3361,6 +3623,17 @@ int is_ehl(unsigned int family, unsigned int model)
}
return 0;
}
+int is_jvl(unsigned int family, unsigned int model)
+{
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ATOM_TREMONT_D:
+ return 1;
+ }
+ return 0;
+}
int has_turbo_ratio_limit(unsigned int family, unsigned int model)
{
@@ -3475,6 +3748,20 @@ int has_config_tdp(unsigned int family, unsigned int model)
}
static void
+remove_underbar(char *s)
+{
+ char *to = s;
+
+ while (*s) {
+ if (*s != '_')
+ *to++ = *s;
+ s++;
+ }
+
+ *to = 0;
+}
+
+static void
dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
{
if (!do_nhm_platform_info)
@@ -3530,9 +3817,6 @@ dump_sysfs_cstate_config(void)
int state;
char *sp;
- if (!DO_BIC(BIC_sysfs))
- return;
-
if (access("/sys/devices/system/cpu/cpuidle", R_OK)) {
fprintf(outf, "cpuidle not loaded\n");
return;
@@ -3559,6 +3843,8 @@ dump_sysfs_cstate_config(void)
*sp = '\0';
fclose(input);
+ remove_underbar(name_buf);
+
sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
base_cpu, state);
input = fopen(path, "r");
@@ -3645,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -3689,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -3777,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
return 0;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -3881,13 +4167,8 @@ double get_tdp_intel(unsigned int model)
double get_tdp_amd(unsigned int family)
{
- switch (family) {
- case 0x17:
- case 0x18:
- default:
- /* This is the max stock TDP of HEDT/Server Fam17h chips */
- return 250.0;
- }
+ /* This is the max stock TDP of HEDT/Server Fam17h+ chips */
+ return 280.0;
}
/*
@@ -3959,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model)
BIC_PRESENT(BIC_GFXWatt);
}
break;
+ case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
+ do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ BIC_PRESENT(BIC_PKG__);
+ if (rapl_joules)
+ BIC_PRESENT(BIC_Pkg_J);
+ else
+ BIC_PRESENT(BIC_PkgWatt);
+ break;
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
@@ -4069,27 +4358,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model)
if (max_extended_level >= 0x80000007) {
__cpuid(0x80000007, eax, ebx, ecx, edx);
- /* RAPL (Fam 17h) */
+ /* RAPL (Fam 17h+) */
has_rapl = edx & (1 << 14);
}
- if (!has_rapl)
+ if (!has_rapl || family < 0x17)
return;
- switch (family) {
- case 0x17: /* Zen, Zen+ */
- case 0x18: /* Hygon Dhyana */
- do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
- if (rapl_joules) {
- BIC_PRESENT(BIC_Pkg_J);
- BIC_PRESENT(BIC_Cor_J);
- } else {
- BIC_PRESENT(BIC_PkgWatt);
- BIC_PRESENT(BIC_CorWatt);
- }
- break;
- default:
- return;
+ do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY;
+ if (rapl_joules) {
+ BIC_PRESENT(BIC_Pkg_J);
+ BIC_PRESENT(BIC_Cor_J);
+ } else {
+ BIC_PRESENT(BIC_PkgWatt);
+ BIC_PRESENT(BIC_CorWatt);
}
if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr))
@@ -4162,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -4234,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id;
if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -4361,6 +4643,7 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
+ case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
return 1;
}
return 0;
@@ -4507,12 +4790,33 @@ double discover_bclk(unsigned int family, unsigned int model)
* below this value, including the Digital Thermal Sensor (DTS),
* Package Thermal Management Sensor (PTM), and thermal event thresholds.
*/
-int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+int read_tcc_activation_temp()
{
unsigned long long msr;
- unsigned int target_c_local;
- int cpu;
+ unsigned int tcc, target_c, offset_c;
+ /* Temperature Target MSR is Nehalem and newer only */
+ if (!do_nhm_platform_info)
+ return 0;
+
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ return 0;
+
+ target_c = (msr >> 16) & 0xFF;
+
+ offset_c = (msr >> 24) & 0xF;
+
+ tcc = target_c - offset_c;
+
+ if (!quiet)
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+ base_cpu, msr, tcc, target_c, offset_c);
+
+ return tcc;
+}
+
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
/* tcc_activation_temp is used only for dts or ptm */
if (!(do_dts || do_ptm))
return 0;
@@ -4521,43 +4825,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- cpu = t->cpu_id;
- if (cpu_migrate(cpu)) {
- fprintf(outf, "Could not migrate to CPU %d\n", cpu);
- return -1;
- }
-
if (tcc_activation_temp_override != 0) {
tcc_activation_temp = tcc_activation_temp_override;
- fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
- cpu, tcc_activation_temp);
+ fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp);
return 0;
}
- /* Temperature Target MSR is Nehalem and newer only */
- if (!do_nhm_platform_info)
- goto guess;
-
- if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
- goto guess;
-
- target_c_local = (msr >> 16) & 0xFF;
-
- if (!quiet)
- fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
- cpu, msr, target_c_local);
-
- if (!target_c_local)
- goto guess;
-
- tcc_activation_temp = target_c_local;
-
- return 0;
+ tcc_activation_temp = read_tcc_activation_temp();
+ if (tcc_activation_temp)
+ return 0;
-guess:
tcc_activation_temp = TJMAX_DEFAULT;
- fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
- cpu, tcc_activation_temp);
+ fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp);
return 0;
}
@@ -4685,19 +4964,46 @@ unsigned int intel_model_duplicates(unsigned int model)
case INTEL_FAM6_ICELAKE_NNPI:
case INTEL_FAM6_TIGERLAKE_L:
case INTEL_FAM6_TIGERLAKE:
+ case INTEL_FAM6_ROCKETLAKE:
+ case INTEL_FAM6_LAKEFIELD:
+ case INTEL_FAM6_ALDERLAKE:
return INTEL_FAM6_CANNONLAKE_L;
- case INTEL_FAM6_ATOM_TREMONT_D:
- return INTEL_FAM6_ATOM_GOLDMONT_D;
-
case INTEL_FAM6_ATOM_TREMONT_L:
return INTEL_FAM6_ATOM_TREMONT;
case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_SAPPHIRERAPIDS_X:
return INTEL_FAM6_SKYLAKE_X;
}
return model;
}
+
+void print_dev_latency(void)
+{
+ char *path = "/dev/cpu_dma_latency";
+ int fd;
+ int value;
+ int retval;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ warn("fopen %s\n", path);
+ return;
+ }
+
+ retval = read(fd, (void *)&value, sizeof(int));
+ if (retval != sizeof(int)) {
+ warn("read %s\n", path);
+ close(fd);
+ return;
+ }
+ fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n",
+ value, value == 2000000000 ? "default" : "constrained");
+
+ close(fd);
+}
+
void process_cpuid()
{
unsigned int eax, ebx, ecx, edx;
@@ -4916,6 +5222,14 @@ void process_cpuid()
BIC_PRESENT(BIC_Mod_c6);
use_c1_residency_msr = 1;
}
+ if (is_jvl(family, model)) {
+ BIC_NOT_PRESENT(BIC_CPU_c3);
+ BIC_NOT_PRESENT(BIC_CPU_c7);
+ BIC_NOT_PRESENT(BIC_Pkgpc2);
+ BIC_NOT_PRESENT(BIC_Pkgpc3);
+ BIC_NOT_PRESENT(BIC_Pkgpc6);
+ BIC_NOT_PRESENT(BIC_Pkgpc7);
+ }
if (is_dnv(family, model)) {
BIC_PRESENT(BIC_CPU_c1);
BIC_NOT_PRESENT(BIC_CPU_c3);
@@ -4935,9 +5249,12 @@ void process_cpuid()
BIC_NOT_PRESENT(BIC_Pkgpc7);
}
if (has_c8910_msrs(family, model)) {
- BIC_PRESENT(BIC_Pkgpc8);
- BIC_PRESENT(BIC_Pkgpc9);
- BIC_PRESENT(BIC_Pkgpc10);
+ if (pkg_cstate_limit >= PCL__8)
+ BIC_PRESENT(BIC_Pkgpc8);
+ if (pkg_cstate_limit >= PCL__9)
+ BIC_PRESENT(BIC_Pkgpc9);
+ if (pkg_cstate_limit >= PCL_10)
+ BIC_PRESENT(BIC_Pkgpc10);
}
do_irtl_hsw = has_c8910_msrs(family, model);
if (has_skl_msrs(family, model)) {
@@ -4967,6 +5284,8 @@ void process_cpuid()
dump_cstate_pstate_config_info(family, model);
if (!quiet)
+ print_dev_latency();
+ if (!quiet)
dump_sysfs_cstate_config();
if (!quiet)
dump_sysfs_pstate_config();
@@ -4980,6 +5299,9 @@ void process_cpuid()
if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK))
BIC_PRESENT(BIC_GFXMHz);
+ if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK))
+ BIC_PRESENT(BIC_GFXACTMHz);
+
if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK))
BIC_PRESENT(BIC_CPU_LPI);
else
@@ -5390,7 +5712,7 @@ int get_and_dump_counters(void)
}
void print_version() {
- fprintf(outf, "turbostat version 20.03.20"
+ fprintf(outf, "turbostat version 20.09.30"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -5597,6 +5919,8 @@ void probe_sysfs(void)
*sp = '%';
*(sp + 1) = '\0';
+ remove_underbar(name_buf);
+
fclose(input);
sprintf(path, "cpuidle/state%d/time", state);
@@ -5624,6 +5948,8 @@ void probe_sysfs(void)
*sp = '\0';
fclose(input);
+ remove_underbar(name_buf);
+
sprintf(path, "cpuidle/state%d/usage", state);
if (is_deferred_skip(name_buf))
@@ -5868,6 +6194,7 @@ int main(int argc, char **argv)
return 0;
}
+ msr_sum_record();
/*
* if any params left, it must be a command to fork
*/
diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
index 3fe1eed900d4..ff6c6661f075 100644
--- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
+++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c
@@ -622,6 +622,57 @@ void cmdline(int argc, char **argv)
}
}
+/*
+ * Open a file, and exit on failure
+ */
+FILE *fopen_or_die(const char *path, const char *mode)
+{
+ FILE *filep = fopen(path, "r");
+
+ if (!filep)
+ err(1, "%s: open failed", path);
+ return filep;
+}
+
+void err_on_hypervisor(void)
+{
+ FILE *cpuinfo;
+ char *flags, *hypervisor;
+ char *buffer;
+
+ /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */
+ cpuinfo = fopen_or_die("/proc/cpuinfo", "ro");
+
+ buffer = malloc(4096);
+ if (!buffer) {
+ fclose(cpuinfo);
+ err(-ENOMEM, "buffer malloc fail");
+ }
+
+ if (!fread(buffer, 1024, 1, cpuinfo)) {
+ fclose(cpuinfo);
+ free(buffer);
+ err(1, "Reading /proc/cpuinfo failed");
+ }
+
+ flags = strstr(buffer, "flags");
+ rewind(cpuinfo);
+ fseek(cpuinfo, flags - buffer, SEEK_SET);
+ if (!fgets(buffer, 4096, cpuinfo)) {
+ fclose(cpuinfo);
+ free(buffer);
+ err(1, "Reading /proc/cpuinfo failed");
+ }
+ fclose(cpuinfo);
+
+ hypervisor = strstr(buffer, "hypervisor");
+
+ free(buffer);
+
+ if (hypervisor)
+ err(-1,
+ "not supported on this virtual machine");
+}
int get_msr(int cpu, int offset, unsigned long long *msr)
{
@@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr)
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
retval = pread(fd, msr, sizeof(*msr), offset);
- if (retval != sizeof(*msr))
+ if (retval != sizeof(*msr)) {
+ err_on_hypervisor();
err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+ }
if (debug > 1)
fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr);
@@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu)
return 0;
}
-/*
- * Open a file, and exit on failure
- */
-FILE *fopen_or_die(const char *path, const char *mode)
-{
- FILE *filep = fopen(path, "r");
-
- if (!filep)
- err(1, "%s: open failed", path);
- return filep;
-}
-
unsigned int get_pkg_num(int cpu)
{
FILE *fp;
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 8019e3dd4c32..84a1af2581f5 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output):
def raw_output(kernel_output):
for line in kernel_output:
print(line)
- yield line
DIVIDER = '=' * 60
@@ -242,7 +241,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
return None
test_suite.name = name
expected_test_case_num = parse_subtest_plan(lines)
- if not expected_test_case_num:
+ if expected_test_case_num is None:
return None
while expected_test_case_num > 0:
test_case = parse_test_case(lines)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 99c3c5671ea4..0b60855fb819 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -179,7 +179,7 @@ class KUnitParserTest(unittest.TestCase):
print_mock = mock.patch('builtins.print').start()
result = kunit_parser.parse_run_tests(
kunit_parser.isolate_kunit_output(file.readlines()))
- print_mock.assert_any_call(StrContains("no kunit output detected"))
+ print_mock.assert_any_call(StrContains('no tests run!'))
print_mock.stop()
file.close()
@@ -198,39 +198,57 @@ class KUnitParserTest(unittest.TestCase):
'test_data/test_config_printk_time.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_ignores_multiple_prefixes(self):
prefix_log = get_absolute_path(
'test_data/test_multiple_prefixes.log')
with open(prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_prefix_mixed_kernel_output(self):
mixed_prefix_log = get_absolute_path(
'test_data/test_interrupted_tap_output.log')
with open(mixed_prefix_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_prefix_poundsign(self):
pound_log = get_absolute_path('test_data/test_pound_sign.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_kernel_panic_end(self):
panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
with open(panic_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.TEST_CRASHED,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
def test_pound_no_prefix(self):
pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
with open(pound_log) as file:
result = kunit_parser.parse_run_tests(file.readlines())
- self.assertEqual('kunit-resource-test', result.suites[0].name)
+ self.assertEqual(
+ kunit_parser.TestStatus.SUCCESS,
+ result.status)
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
class KUnitJsonTest(unittest.TestCase):
diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log
index c02ca773946d..6bdb57f76eac 100644
--- a/tools/testing/kunit/test_data/test_config_printk_time.log
+++ b/tools/testing/kunit/test_data/test_config_printk_time.log
@@ -1,6 +1,7 @@
[ 0.060000] printk: console [mc-1] enabled
[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
[ 0.060000] TAP version 14
+[ 0.060000] 1..3
[ 0.060000] # Subtest: kunit-resource-test
[ 0.060000] 1..5
[ 0.060000] ok 1 - kunit_resource_test_init_resources
@@ -28,4 +29,4 @@
[ 0.060000] Stack:
[ 0.060000] 602086f8 601bc260 705c0000 705c0000
[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
-[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
index 5c73fb3a1c6f..1fb677728abe 100644
--- a/tools/testing/kunit/test_data/test_interrupted_tap_output.log
+++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
@@ -1,6 +1,7 @@
[ 0.060000] printk: console [mc-1] enabled
[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
[ 0.060000] TAP version 14
+[ 0.060000] 1..3
[ 0.060000] # Subtest: kunit-resource-test
[ 0.060000] 1..5
[ 0.060000] ok 1 - kunit_resource_test_init_resources
@@ -34,4 +35,4 @@
[ 0.060000] Stack:
[ 0.060000] 602086f8 601bc260 705c0000 705c0000
[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
-[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
index c045eee75f27..a014ffe9725e 100644
--- a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
+++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
@@ -1,6 +1,7 @@
[ 0.060000] printk: console [mc-1] enabled
[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
[ 0.060000] TAP version 14
+[ 0.060000] 1..3
[ 0.060000] # Subtest: kunit-resource-test
[ 0.060000] 1..5
[ 0.060000] ok 1 - kunit_resource_test_init_resources
@@ -22,4 +23,4 @@
[ 0.060000] Stack:
[ 0.060000] 602086f8 601bc260 705c0000 705c0000
[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
-[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log
index bc48407dcc36..0ad78481a0b4 100644
--- a/tools/testing/kunit/test_data/test_multiple_prefixes.log
+++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log
@@ -1,6 +1,7 @@
[ 0.060000][ T1] printk: console [mc-1] enabled
[ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
[ 0.060000][ T1] TAP version 14
+[ 0.060000][ T1] 1..3
[ 0.060000][ T1] # Subtest: kunit-resource-test
[ 0.060000][ T1] 1..5
[ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources
@@ -28,4 +29,4 @@
[ 0.060000][ T1] Stack:
[ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000
[ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab
-[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
+[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log
index 2ceb360be7d5..dc4cf09a96d0 100644
--- a/tools/testing/kunit/test_data/test_pound_no_prefix.log
+++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log
@@ -1,6 +1,7 @@
printk: console [mc-1] enabled
random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
TAP version 14
+ 1..3
# Subtest: kunit-resource-test
1..5
ok 1 - kunit_resource_test_init_resources
@@ -30,4 +31,4 @@
Stack:
602086f8 601bc260 705c0000 705c0000
602086f8 6005fcec 705c0000 6002c6ab
- 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
+ 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log
index 28ffa5ba03bf..3f358e3a7ba0 100644
--- a/tools/testing/kunit/test_data/test_pound_sign.log
+++ b/tools/testing/kunit/test_data/test_pound_sign.log
@@ -1,6 +1,7 @@
[ 0.060000] printk: console [mc-1] enabled
[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
[ 0.060000] TAP version 14
+[ 0.060000] 1..3
[ 0.060000] # Subtest: kunit-resource-test
[ 0.060000] 1..5
[ 0.060000] ok 1 - kunit_resource_test_init_resources
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
index 242635d79035..c9fa141ebdcc 100644
--- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -417,6 +417,9 @@ int main(int argc, char *argv[])
/* Register SIGSEGV handler */
mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Set test plan */
+ ksft_set_plan(20);
+
/* Buffer by byte tests */
evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
"Check buffer correctness by byte with sync err mode and mmap memory\n");
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
index 97bebdecd29e..43bd94f853ba 100644
--- a/tools/testing/selftests/arm64/mte/check_child_memory.c
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -163,6 +163,9 @@ int main(int argc, char *argv[])
mte_register_signal(SIGSEGV, mte_default_handler);
mte_register_signal(SIGBUS, mte_default_handler);
+ /* Set test plan */
+ ksft_set_plan(12);
+
evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
"Check child anonymous memory with private mapping, precise mode and mmap memory\n");
evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index bc41ae630c86..3b23c4d61d38 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -140,6 +140,10 @@ int main(int argc, char *argv[])
/* Register signal handlers */
mte_register_signal(SIGBUS, mte_default_handler);
mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(4);
+
/* Enable KSM */
mte_ksm_setup();
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
index 33b13b86199b..a04b12c21ac9 100644
--- a/tools/testing/selftests/arm64/mte/check_mmap_options.c
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -205,7 +205,11 @@ int main(int argc, char *argv[])
mte_register_signal(SIGBUS, mte_default_handler);
mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Set test plan */
+ ksft_set_plan(22);
+
mte_enable_pstate_tco();
+
evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
index 94d245a0ed56..deaef1f61076 100644
--- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -170,6 +170,9 @@ int main(int argc, char *argv[])
/* Register SIGSEGV handler */
mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Set test plan */
+ ksft_set_plan(4);
+
evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
"Check an included tag value with sync mode\n");
evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 594e98e76880..4bfa80f2a8c3 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -92,9 +92,13 @@ int main(int argc, char *argv[])
err = mte_default_setup();
if (err)
return err;
+
/* Register signal handlers */
mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Set test plan */
+ ksft_set_plan(4);
+
evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
new file mode 100644
index 000000000000..14a31109dd0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include <test_progs.h>
+#include "test_map_init.skel.h"
+
+#define TEST_VALUE 0x1234
+#define FILL_VALUE 0xdeadbeef
+
+static int nr_cpus;
+static int duration;
+
+typedef unsigned long long map_key_t;
+typedef unsigned long long map_value_t;
+typedef struct {
+ map_value_t v; /* padding */
+} __bpf_percpu_val_align pcpu_map_value_t;
+
+
+static int map_populate(int map_fd, int num)
+{
+ pcpu_map_value_t value[nr_cpus];
+ int i, err;
+ map_key_t key;
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value, i) = FILL_VALUE;
+
+ for (key = 1; key <= num; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz,
+ int *map_fd, int populate)
+{
+ struct test_map_init *skel;
+ int err;
+
+ skel = test_map_init__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hashmap1, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto error;
+
+ err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto error;
+
+ err = test_map_init__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto error;
+
+ *map_fd = bpf_map__fd(skel->maps.hashmap1);
+ if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+ goto error;
+
+ err = map_populate(*map_fd, populate);
+ if (!ASSERT_OK(err, "map_populate"))
+ goto error_map;
+
+ return skel;
+
+error_map:
+ close(*map_fd);
+error:
+ test_map_init__destroy(skel);
+ return NULL;
+}
+
+/* executes bpf program that updates map with key, value */
+static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key,
+ map_value_t value)
+{
+ struct test_map_init__bss *bss;
+
+ bss = skel->bss;
+
+ bss->inKey = key;
+ bss->inValue = value;
+ bss->inPid = getpid();
+
+ if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach"))
+ return -1;
+
+ /* Let tracepoint trigger */
+ syscall(__NR_getpgid);
+
+ test_map_init__detach(skel);
+
+ return 0;
+}
+
+static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected)
+{
+ int i, nzCnt = 0;
+ map_value_t val;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = bpf_percpu(value, i);
+ if (val) {
+ if (CHECK(val != expected, "map value",
+ "unexpected for cpu %d: 0x%llx\n", i, val))
+ return -1;
+ nzCnt++;
+ }
+ }
+
+ if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ nzCnt))
+ return -1;
+
+ return 0;
+}
+
+/* Add key=1 elem with values set for all CPUs
+ * Delete elem key=1
+ * Run bpf prog that inserts new key=1 elem with value=0x1234
+ * (bpf prog can only set value for current CPU)
+ * Lookup Key=1 and check value is as expected for all CPUs:
+ * value set by bpf prog for one CPU, 0 for all others
+ */
+static void test_pcpu_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* max 1 elem in map so insertion is forced to reuse freed entry */
+ skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* delete element so the entry can be re-used*/
+ key = 1;
+ err = bpf_map_delete_elem(map_fd, &key);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem"))
+ goto cleanup;
+
+ /* run bpf prog that inserts new elem, re-using the slot just freed */
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=1 was re-created by bpf prog */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+/* Add key=1 and key=2 elems with values set for all CPUs
+ * Run bpf prog that inserts new key=3 elem
+ * (only for current cpu; other cpus should have initial value = 0)
+ * Lookup Key=1 and check value is as expected for all CPUs
+ */
+static void test_pcpu_lru_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* Set up LRU map with 2 elements, values filled for all CPUs.
+ * With these 2 elements, the LRU map is full
+ */
+ skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* run bpf prog that inserts new key=3 element, re-using LRU slot */
+ key = 3;
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=3 replaced one of earlier elements */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+void test_map_init(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+ if (nr_cpus <= 1) {
+ printf("%s:SKIP: >1 cpu needed for this test\n", __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("pcpu_map_init"))
+ test_pcpu_map_init();
+ if (test__start_subtest("pcpu_lru_map_init"))
+ test_pcpu_lru_map_init();
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 00578311a423..30982a7e4d0f 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node)
}
}
-int pids_cgrp_id = 1;
+extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
+enum cgroup_subsys_id___local {
+ pids_cgrp_id___local = 123, /* value doesn't matter */
+};
static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
struct task_struct* task,
@@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
- if (ENABLE_CGROUP_V1_RESOLVER) {
+ if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
+ int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
+ pids_cgrp_id___local);
#ifdef UNROLL
#pragma unroll
#endif
@@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
BPF_CORE_READ(task, cgroups, subsys[i]);
if (subsys != NULL) {
int subsys_id = BPF_CORE_READ(subsys, ss, id);
- if (subsys_id == pids_cgrp_id) {
+ if (subsys_id == cgrp_id) {
proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
break;
diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c
new file mode 100644
index 000000000000..c89d28ead673
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_init.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 inKey = 0;
+__u64 inValue = 0;
+__u32 inPid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int sysenter_getpgid(const void *ctx)
+{
+ /* Just do it for once, when called from our own test prog. This
+ * ensures the map value is only updated for a single CPU.
+ */
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid == inPid)
+ bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 55bd387ce7ec..52d3f0364bda 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore)
test_clone3_supported();
EXPECT_EQ(getuid(), 0)
- XFAIL(return, "Skipping all tests as non-root\n");
+ SKIP(return, "Skipping all tests as non-root");
memset(&set_tid, 0, sizeof(set_tid));
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index c99b98b0d461..575b391ddc78 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -44,7 +44,7 @@ TEST(close_range)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
- XFAIL(return, "Skipping test since /dev/null does not exist");
+ SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
@@ -52,7 +52,7 @@ TEST(close_range)
EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
if (errno == ENOSYS)
- XFAIL(return, "close_range() syscall not supported");
+ SKIP(return, "close_range() syscall not supported");
}
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
@@ -108,7 +108,7 @@ TEST(close_range_unshare)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
- XFAIL(return, "Skipping test since /dev/null does not exist");
+ SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
@@ -197,7 +197,7 @@ TEST(close_range_unshare_capped)
fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(fd, 0) {
if (errno == ENOENT)
- XFAIL(return, "Skipping test since /dev/null does not exist");
+ SKIP(return, "Skipping test since /dev/null does not exist");
}
open_fds[i] = fd;
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
new file mode 100755
index 000000000000..be0c1b5ee6b8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the nexthop offload API. It makes use of netdevsim
+# which registers a listener to the nexthop notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ nexthop_single_add_test
+ nexthop_single_add_err_test
+ nexthop_group_add_test
+ nexthop_group_add_err_test
+ nexthop_group_replace_test
+ nexthop_group_replace_err_test
+ nexthop_single_replace_test
+ nexthop_single_replace_err_test
+ nexthop_single_in_group_replace_test
+ nexthop_single_in_group_replace_err_test
+ nexthop_single_in_group_delete_test
+ nexthop_single_in_group_delete_err_test
+ nexthop_replay_test
+ nexthop_replay_err_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+nexthop_check()
+{
+ local nharg="$1"; shift
+ local expected="$1"; shift
+
+ out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
+ if [[ "$out" != "$expected" ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+nexthop_resource_check()
+{
+ local expected_occ=$1; shift
+
+ occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="nexthops") | .["occ"]')
+
+ if [ $expected_occ -ne $occ ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+nexthop_resource_set()
+{
+ local size=$1; shift
+
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size $size
+ $DEVLINK dev reload $DEVLINK_DEV
+}
+
+nexthop_single_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 1
+ nexthop_resource_check 0
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Single nexthop add and delete"
+}
+
+nexthop_single_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 1
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop addition succeeded when should fail"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,20/2,39
+ nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_resource_check 61
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 10 group 1/2/3
+ nexthop_check "id 10" "id 10 group 1/2/3 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_group_replace_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 5
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 10 group 1/2/3 &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop replace id 1 via 192.0.2.3 dev dummy1
+ nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_err_test()
+{
+ RET=0
+
+ # This is supposed to cause the replace to fail because the new nexthop
+ # is programmed before deleting the replaced one.
+ nexthop_resource_set 1
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replace succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Single nexthop replace failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_replace_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 5
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in group failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_delete_err_test()
+{
+ RET=0
+
+ # First, nexthop 1 will be deleted, which will reduce the occupancy to
+ # 5. Afterwards, a replace notification will be sent for nexthop group
+ # 10 with only two nexthops. Since the new group is allocated before
+ # the old is deleted, the replacement will fail as it will result in an
+ # occupancy of 7.
+ nexthop_resource_set 6
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3
+
+ $IP nexthop del id 1
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in group failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_replay_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $DEVLINK dev reload $DEVLINK_DEV
+ check_err $? "Failed to reload when should not"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after reload"
+
+ nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after reload"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after reload"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop replay"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_replay_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ # Reduce size of nexthop resource so that reload will fail.
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size 3
+ $DEVLINK dev reload $DEVLINK_DEV &> /dev/null
+ check_fail $? "Reload succeeded when should fail"
+
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
+ $DEVLINK dev reload $DEVLINK_DEV
+ check_err $? "Failed to reload when should not"
+
+ log_test "Nexthop replay failure"
+
+ $IP nexthop flush &> /dev/null
+}
+
+setup_prepare()
+{
+ local netdev
+
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ IP="ip -netns testns1"
+ DEVLINK="devlink -N testns1"
+
+ $IP link add name dummy1 up type dummy
+ $IP address add 192.0.2.1/24 dev dummy1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 1d27f52c61e6..477cbb042f5b 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata)
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
EXPECT_EQ(ret, 0) {
if (errno == ENODEV)
- XFAIL(goto out, "binderfs missing");
+ SKIP(goto out, "binderfs missing");
TH_LOG("%s - Failed to mount binderfs", strerror(errno));
goto rmdir;
}
@@ -475,10 +475,10 @@ TEST(binderfs_stress)
TEST(binderfs_test_privileged)
{
if (geteuid() != 0)
- XFAIL(return, "Tests are not run as root. Skipping privileged tests");
+ SKIP(return, "Tests are not run as root. Skipping privileged tests");
if (__do_binderfs_test(_metadata))
- XFAIL(return, "The Android binderfs filesystem is not available");
+ SKIP(return, "The Android binderfs filesystem is not available");
}
TEST(binderfs_test_unprivileged)
@@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged)
ret = wait_for_pid(pid);
if (ret) {
if (ret == 2)
- XFAIL(return, "The Android binderfs filesystem is not available");
+ SKIP(return, "The Android binderfs filesystem is not available");
ASSERT_EQ(ret, 0) {
TH_LOG("wait_for_pid() failed");
}
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index d979ff14775a..8f82f99f7748 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -3282,4 +3282,99 @@ TEST(epoll60)
close(ctx.epfd);
}
+struct epoll61_ctx {
+ int epfd;
+ int evfd;
+};
+
+static void *epoll61_write_eventfd(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ int64_t l = 1;
+
+ usleep(10950);
+ write(ctx->evfd, &l, sizeof(l));
+ return NULL;
+}
+
+static void *epoll61_epoll_with_timeout(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ struct epoll_event events[1];
+ int n;
+
+ n = epoll_wait(ctx->epfd, events, 1, 11);
+ /*
+ * If epoll returned the eventfd, write on the eventfd to wake up the
+ * blocking poller.
+ */
+ if (n == 1) {
+ int64_t l = 1;
+
+ write(ctx->evfd, &l, sizeof(l));
+ }
+ return NULL;
+}
+
+static void *epoll61_blocking_epoll(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ struct epoll_event events[1];
+
+ epoll_wait(ctx->epfd, events, 1, -1);
+ return NULL;
+}
+
+TEST(epoll61)
+{
+ struct epoll61_ctx ctx;
+ struct epoll_event ev;
+ int i, r;
+
+ ctx.epfd = epoll_create1(0);
+ ASSERT_GE(ctx.epfd, 0);
+ ctx.evfd = eventfd(0, EFD_NONBLOCK);
+ ASSERT_GE(ctx.evfd, 0);
+
+ ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP;
+ ev.data.ptr = NULL;
+ r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev);
+ ASSERT_EQ(r, 0);
+
+ /*
+ * We are testing a race. Repeat the test case 1000 times to make it
+ * more likely to fail in case of a bug.
+ */
+ for (i = 0; i < 1000; i++) {
+ pthread_t threads[3];
+ int n;
+
+ /*
+ * Start 3 threads:
+ * Thread 1 sleeps for 10.9ms and writes to the evenfd.
+ * Thread 2 calls epoll with a timeout of 11ms.
+ * Thread 3 calls epoll with a timeout of -1.
+ *
+ * The eventfd write by Thread 1 should either wakeup Thread 2
+ * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the
+ * eventfd to wake up Thread 3.
+ *
+ * If no events are missed, all three threads should eventually
+ * be joinable.
+ */
+ ASSERT_EQ(pthread_create(&threads[0], NULL,
+ epoll61_write_eventfd, &ctx), 0);
+ ASSERT_EQ(pthread_create(&threads[1], NULL,
+ epoll61_epoll_with_timeout, &ctx), 0);
+ ASSERT_EQ(pthread_create(&threads[2], NULL,
+ epoll61_blocking_epoll, &ctx), 0);
+
+ for (n = 0; n < ARRAY_SIZE(threads); ++n)
+ ASSERT_EQ(pthread_join(threads[n], NULL), 0);
+ }
+
+ close(ctx.epfd);
+ close(ctx.evfd);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 3bcd4c3624ee..b4da41d126d5 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
echo "p:myevent1 $PLACE" >> dynamic_events
echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index 438961971b7e..3a0e2885fff5 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index a8603bd23e0d..d3e138e8377f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=kernel_clone
+PLACE=$FUNCTION_FORK
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
index acb17ce543d2..80541964b927 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
@@ -39,7 +39,7 @@ do_test() {
disable_tracing
echo do_execve* > set_ftrace_filter
- echo *do_fork >> set_ftrace_filter
+ echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_notrace_pid
echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 9f0a9687c773..2f7211254529 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -39,7 +39,7 @@ do_test() {
disable_tracing
echo do_execve* > set_ftrace_filter
- echo *do_fork >> set_ftrace_filter
+ echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_pid
echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 98305d76bd04..191d116b7883 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
# requires: set_ftrace_filter
# flags: instance
-echo kernel_clone:stacktrace >> set_ftrace_filter
+echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter
-grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
+grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter
(echo "forked"; sleep 1)
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index c5dec55b7d95..a6fac927ee82 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -133,6 +133,13 @@ yield() {
ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
}
+# The fork function in the kernel was renamed from "_do_fork" to
+# "kernel_fork". As older tests should still work with older kernels
+# as well as newer kernels, check which version of fork is used on this
+# kernel so that the tests can use the fork function for the running kernel.
+FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then
+ echo kernel_clone; else echo '_do_fork'; fi)`
+
# Since probe event command may include backslash, explicitly use printf "%s"
# to NOT interpret it.
ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index 9737cd0578a7..2428a3ed78c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - adding and removing
# requires: kprobe_events
-echo p:myevent kernel_clone > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index f9a40af76888..010a8b1d6c1d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - busy event check
# requires: kprobe_events
-echo p:myevent kernel_clone > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index eb543d3cfe5f..a96a1dc7014f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
# description: Kprobe dynamic event with arguments
# requires: kprobe_events
-echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events
grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
-grep testprobe trace | grep 'kernel_clone' | \
+grep testprobe trace | grep "$FUNCTION_FORK" | \
grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 4e5b63be51c9..a053ee2e7d77 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
-echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events
grep testprobe kprobe_events | grep -q 'comm=$comm'
test -d events/kprobes/testprobe
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index a1d70588ab21..84285a6f60b0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
: "Test get argument (1)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test kernel_clone" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test kernel_clone" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index bd25dd0ba0d0..717130ed4feb 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
fi
: "Test get basic types symbol argument"
-echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
if grep -q "x8/16/32/64" README; then
- echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+ echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
fi
-echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
grep "testprobe_bf:.* arg1=.*" trace
: "Test get string symbol argument"
-echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 91fcce1c241c..25b7708eb559 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "x8/16/32/64":README
gen_event() { # Bitsize
- echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+ echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
}
check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
index a30a9c07290d..d25d01a19778 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
@@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
:;: "user-memory access syntax and ustring working on user memory";:
echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
> kprobe_events
+echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \
+ >> kprobe_events
grep myevent kprobe_events | \
grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
echo 1 > events/kprobes/myevent/enable
+echo 1 > events/kprobes/myevent2/enable
echo > /dev/null
echo 0 > events/kprobes/myevent/enable
+echo 0 > events/kprobes/myevent2/enable
grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 0d179094191f..5556292601a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
# prepare
echo nop > current_tracer
-echo kernel_clone > set_ftrace_filter
-echo 'p:testprobe kernel_clone' > kprobe_events
+echo $FUNCTION_FORK > set_ftrace_filter
+echo "p:testprobe $FUNCTION_FORK" > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep 'kernel_clone <-' trace
+! grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep 'kernel_clone <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep 'kernel_clone <-' trace
+! grep "$FUNCTION_FORK <-" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 45d90b6c763d..f0d5b7777ed7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "Create/append/":README
# Choose 2 symbols for target
-SYM1=kernel_clone
+SYM1=$FUNCTION_FORK
SYM2=do_exit
EVENT_NAME=kprobes/testevent
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index 1b5550ef8a9b..fa928b431555 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,15 @@ esac
# multiprobe errors
if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent kernel_clone' > kprobe_events
+echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
# Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE
+printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events
+check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE
fi
# %return suffix errors
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 7ae492c204a4..197cc2afd404 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
# requires: kprobe_events
# Add new kretprobe event
-echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
+echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events
grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
-cat trace | grep testprobe2 | grep -q '<- kernel_clone'
+cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK"
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index c4093fc1a773..98166fa3eb91 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
-echo p:myevent kernel_clone > kprobe_events
+echo "p:myevent $FUNCTION_FORK" > kprobe_events
grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
echo 1 > events/kprobes/myevent/enable
( echo "forked" )
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index f19804df244c..edce85420d19 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -126,7 +126,7 @@
snprintf(_metadata->results->reason, \
sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
if (TH_LOG_ENABLED) { \
- fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
+ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
_metadata->results->reason); \
} \
_metadata->passed = 1; \
@@ -432,7 +432,7 @@
*/
/**
- * ASSERT_EQ(expected, seen)
+ * ASSERT_EQ()
*
* @expected: expected value
* @seen: measured value
@@ -443,7 +443,7 @@
__EXPECT(expected, #expected, seen, #seen, ==, 1)
/**
- * ASSERT_NE(expected, seen)
+ * ASSERT_NE()
*
* @expected: expected value
* @seen: measured value
@@ -454,7 +454,7 @@
__EXPECT(expected, #expected, seen, #seen, !=, 1)
/**
- * ASSERT_LT(expected, seen)
+ * ASSERT_LT()
*
* @expected: expected value
* @seen: measured value
@@ -465,7 +465,7 @@
__EXPECT(expected, #expected, seen, #seen, <, 1)
/**
- * ASSERT_LE(expected, seen)
+ * ASSERT_LE()
*
* @expected: expected value
* @seen: measured value
@@ -476,7 +476,7 @@
__EXPECT(expected, #expected, seen, #seen, <=, 1)
/**
- * ASSERT_GT(expected, seen)
+ * ASSERT_GT()
*
* @expected: expected value
* @seen: measured value
@@ -487,7 +487,7 @@
__EXPECT(expected, #expected, seen, #seen, >, 1)
/**
- * ASSERT_GE(expected, seen)
+ * ASSERT_GE()
*
* @expected: expected value
* @seen: measured value
@@ -498,7 +498,7 @@
__EXPECT(expected, #expected, seen, #seen, >=, 1)
/**
- * ASSERT_NULL(seen)
+ * ASSERT_NULL()
*
* @seen: measured value
*
@@ -508,7 +508,7 @@
__EXPECT(NULL, "NULL", seen, #seen, ==, 1)
/**
- * ASSERT_TRUE(seen)
+ * ASSERT_TRUE()
*
* @seen: measured value
*
@@ -518,7 +518,7 @@
__EXPECT(0, "0", seen, #seen, !=, 1)
/**
- * ASSERT_FALSE(seen)
+ * ASSERT_FALSE()
*
* @seen: measured value
*
@@ -528,7 +528,7 @@
__EXPECT(0, "0", seen, #seen, ==, 1)
/**
- * ASSERT_STREQ(expected, seen)
+ * ASSERT_STREQ()
*
* @expected: expected value
* @seen: measured value
@@ -539,7 +539,7 @@
__EXPECT_STR(expected, seen, ==, 1)
/**
- * ASSERT_STRNE(expected, seen)
+ * ASSERT_STRNE()
*
* @expected: expected value
* @seen: measured value
@@ -550,7 +550,7 @@
__EXPECT_STR(expected, seen, !=, 1)
/**
- * EXPECT_EQ(expected, seen)
+ * EXPECT_EQ()
*
* @expected: expected value
* @seen: measured value
@@ -561,7 +561,7 @@
__EXPECT(expected, #expected, seen, #seen, ==, 0)
/**
- * EXPECT_NE(expected, seen)
+ * EXPECT_NE()
*
* @expected: expected value
* @seen: measured value
@@ -572,7 +572,7 @@
__EXPECT(expected, #expected, seen, #seen, !=, 0)
/**
- * EXPECT_LT(expected, seen)
+ * EXPECT_LT()
*
* @expected: expected value
* @seen: measured value
@@ -583,7 +583,7 @@
__EXPECT(expected, #expected, seen, #seen, <, 0)
/**
- * EXPECT_LE(expected, seen)
+ * EXPECT_LE()
*
* @expected: expected value
* @seen: measured value
@@ -594,7 +594,7 @@
__EXPECT(expected, #expected, seen, #seen, <=, 0)
/**
- * EXPECT_GT(expected, seen)
+ * EXPECT_GT()
*
* @expected: expected value
* @seen: measured value
@@ -605,7 +605,7 @@
__EXPECT(expected, #expected, seen, #seen, >, 0)
/**
- * EXPECT_GE(expected, seen)
+ * EXPECT_GE()
*
* @expected: expected value
* @seen: measured value
@@ -616,7 +616,7 @@
__EXPECT(expected, #expected, seen, #seen, >=, 0)
/**
- * EXPECT_NULL(seen)
+ * EXPECT_NULL()
*
* @seen: measured value
*
@@ -626,7 +626,7 @@
__EXPECT(NULL, "NULL", seen, #seen, ==, 0)
/**
- * EXPECT_TRUE(seen)
+ * EXPECT_TRUE()
*
* @seen: measured value
*
@@ -636,7 +636,7 @@
__EXPECT(0, "0", seen, #seen, !=, 0)
/**
- * EXPECT_FALSE(seen)
+ * EXPECT_FALSE()
*
* @seen: measured value
*
@@ -646,7 +646,7 @@
__EXPECT(0, "0", seen, #seen, ==, 0)
/**
- * EXPECT_STREQ(expected, seen)
+ * EXPECT_STREQ()
*
* @expected: expected value
* @seen: measured value
@@ -657,7 +657,7 @@
__EXPECT_STR(expected, seen, ==, 0)
/**
- * EXPECT_STRNE(expected, seen)
+ * EXPECT_STRNE()
*
* @expected: expected value
* @seen: measured value
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 307ceaadbbb9..7a2c242b7152 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,10 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+/aarch64/get-reg-list
+/aarch64/get-reg-list-sve
/s390x/memop
/s390x/resets
/s390x/sync_regs_test
/x86_64/cr4_cpuid_sync_test
/x86_64/debug_regs
/x86_64/evmcs_test
+/x86_64/kvm_pv_test
/x86_64/hyperv_cpuid
/x86_64/mmio_warning_test
/x86_64/platform_info_test
@@ -15,6 +18,7 @@
/x86_64/vmx_preemption_timer_test
/x86_64/svm_vmcall_test
/x86_64/sync_regs_test
+/x86_64/vmx_apic_access_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
/x86_64/vmx_set_nested_state_test
@@ -23,6 +27,7 @@
/clear_dirty_log_test
/demand_paging_test
/dirty_log_test
+/dirty_log_perf_test
/kvm_create_max_vcpus
/set_memory_region_test
/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 7ebe71fbca53..3d14ef77755e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -34,13 +34,14 @@ ifeq ($(ARCH),s390)
endif
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
+LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@@ -49,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
@@ -57,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test
-TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
-TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
+TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
@@ -110,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
include ../lib.mk
STATIC_LIBS := $(OUTPUT)/libkvm.a
-LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+
+x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
-$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
$(AR) crs $@ $^
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
new file mode 100644
index 000000000000..efba76682b4b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define REG_LIST_SVE
+#include "get-reg-list.c"
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
new file mode 100644
index 000000000000..33218a395d9f
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -0,0 +1,841 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * Note, the blessed list should be created from the oldest possible
+ * kernel. We can't go older than v4.15, though, because that's the first
+ * release to expose the ID system registers in KVM_GET_REG_LIST, see
+ * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features
+ * from guests"). Also, one must use the --core-reg-fixup command line
+ * option when running on an older kernel that doesn't include df205b5c6328
+ * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST")
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#ifdef REG_LIST_SVE
+#define reg_list_sve() (true)
+#else
+#define reg_list_sve() (false)
+#endif
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define for_each_reg(i) \
+ for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_missing_reg(i) \
+ for ((i) = 0; (i) < blessed_n; ++(i)) \
+ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i]))
+
+#define for_each_new_reg(i) \
+ for ((i) = 0; (i) < reg_list->n; ++(i)) \
+ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+
+static struct kvm_reg_list *reg_list;
+
+static __u64 base_regs[], vregs[], sve_regs[], rejects_set[];
+static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n;
+static __u64 *blessed_reg, blessed_n;
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+ int i;
+
+ for (i = 0; i < nr_regs; ++i)
+ if (reg == regs[i])
+ return true;
+ return false;
+}
+
+static const char *str_with_index(const char *template, __u64 index)
+{
+ char *str, *p;
+ int n;
+
+ str = strdup(template);
+ p = strstr(str, "##");
+ n = sprintf(p, "%lld", index);
+ strcat(p + n, strstr(template, "##") + 2);
+
+ return (const char *)str;
+}
+
+#define CORE_REGS_XX_NR_WORDS 2
+#define CORE_SPSR_XX_NR_WORDS 2
+#define CORE_FPREGS_XX_NR_WORDS 4
+
+static const char *core_id_to_str(__u64 id)
+{
+ __u64 core_off = id & ~REG_MASK, idx;
+
+ /*
+ * core_off is the offset into struct kvm_regs
+ */
+ switch (core_off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx);
+ return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx);
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ return "KVM_REG_ARM_CORE_REG(regs.sp)";
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ return "KVM_REG_ARM_CORE_REG(regs.pc)";
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ return "KVM_REG_ARM_CORE_REG(sp_el1)";
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ return "KVM_REG_ARM_CORE_REG(elr_el1)";
+ case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+ KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+ TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx);
+ return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx);
+ return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+ }
+
+ TEST_FAIL("Unknown core reg id: 0x%llx", id);
+ return NULL;
+}
+
+static const char *sve_id_to_str(__u64 id)
+{
+ __u64 sve_off, n, i;
+
+ if (id == KVM_REG_ARM64_SVE_VLS)
+ return "KVM_REG_ARM64_SVE_VLS";
+
+ sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+ i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+ TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id);
+
+ switch (sve_off) {
+ case KVM_REG_ARM64_SVE_ZREG_BASE ...
+ KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+ "Unexpected bits set in SVE ZREG id: 0x%llx", id);
+ return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n);
+ case KVM_REG_ARM64_SVE_PREG_BASE ...
+ KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+ "Unexpected bits set in SVE PREG id: 0x%llx", id);
+ return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n);
+ case KVM_REG_ARM64_SVE_FFR_BASE:
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+ "Unexpected bits set in SVE FFR id: 0x%llx", id);
+ return "KVM_REG_ARM64_SVE_FFR(0)";
+ }
+
+ return NULL;
+}
+
+static void print_reg(__u64 id)
+{
+ unsigned op0, op1, crn, crm, op2;
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+ "KVM_REG_ARM64 missing in reg id: 0x%llx", id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U8:
+ reg_size = "KVM_REG_SIZE_U8";
+ break;
+ case KVM_REG_SIZE_U16:
+ reg_size = "KVM_REG_SIZE_U16";
+ break;
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
+ case KVM_REG_SIZE_U512:
+ reg_size = "KVM_REG_SIZE_U512";
+ break;
+ case KVM_REG_SIZE_U1024:
+ reg_size = "KVM_REG_SIZE_U1024";
+ break;
+ case KVM_REG_SIZE_U2048:
+ reg_size = "KVM_REG_SIZE_U2048";
+ break;
+ default:
+ TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ }
+
+ switch (id & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE:
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id));
+ break;
+ case KVM_REG_ARM_DEMUX:
+ TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+ "Unexpected bits set in DEMUX reg id: 0x%llx", id);
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+ reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+ break;
+ case KVM_REG_ARM64_SYSREG:
+ op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+ op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+ crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+ crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+ op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+ TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+ "Unexpected bits set in SYSREG reg id: 0x%llx", id);
+ printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+ break;
+ case KVM_REG_ARM_FW:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+ "Unexpected bits set in FW reg id: 0x%llx", id);
+ printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+ break;
+ case KVM_REG_ARM64_SVE:
+ if (reg_list_sve())
+ printf("\t%s,\n", sve_id_to_str(id));
+ else
+ TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id);
+ break;
+ default:
+ TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ }
+}
+
+/*
+ * Older kernels listed each 32-bit word of CORE registers separately.
+ * For 64 and 128-bit registers we need to ignore the extra words. We
+ * also need to fixup the sizes, because the older kernels stated all
+ * registers were 64-bit, even when they weren't.
+ */
+static void core_reg_fixup(void)
+{
+ struct kvm_reg_list *tmp;
+ __u64 id, core_off;
+ int i;
+
+ tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64));
+
+ for (i = 0; i < reg_list->n; ++i) {
+ id = reg_list->reg[i];
+
+ if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) {
+ tmp->reg[tmp->n++] = id;
+ continue;
+ }
+
+ core_off = id & ~REG_MASK;
+
+ switch (core_off) {
+ case 0x52: case 0xd2: case 0xd6:
+ /*
+ * These offsets are pointing at padding.
+ * We need to ignore them too.
+ */
+ continue;
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ if (core_off & 3)
+ continue;
+ id &= ~KVM_REG_SIZE_MASK;
+ id |= KVM_REG_SIZE_U128;
+ tmp->reg[tmp->n++] = id;
+ continue;
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ id &= ~KVM_REG_SIZE_MASK;
+ id |= KVM_REG_SIZE_U32;
+ tmp->reg[tmp->n++] = id;
+ continue;
+ default:
+ if (core_off & 1)
+ continue;
+ tmp->reg[tmp->n++] = id;
+ break;
+ }
+ }
+
+ free(reg_list);
+ reg_list = tmp;
+}
+
+static void prepare_vcpu_init(struct kvm_vcpu_init *init)
+{
+ if (reg_list_sve())
+ init->features[0] |= 1 << KVM_ARM_VCPU_SVE;
+}
+
+static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ int feature;
+
+ if (reg_list_sve()) {
+ feature = KVM_ARM_VCPU_SVE;
+ vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
+}
+
+static void check_supported(void)
+{
+ if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) {
+ fprintf(stderr, "SVE not available, skipping tests\n");
+ exit(KSFT_SKIP);
+ }
+}
+
+int main(int ac, char **av)
+{
+ struct kvm_vcpu_init init = { .target = -1, };
+ int new_regs = 0, missing_regs = 0, i;
+ int failed_get = 0, failed_set = 0, failed_reject = 0;
+ bool print_list = false, fixup_core_regs = false;
+ struct kvm_vm *vm;
+ __u64 *vec_regs;
+
+ check_supported();
+
+ for (i = 1; i < ac; ++i) {
+ if (strcmp(av[i], "--core-reg-fixup") == 0)
+ fixup_core_regs = true;
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else
+ fprintf(stderr, "Ignoring unknown option: %s\n", av[i]);
+ }
+
+ vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ prepare_vcpu_init(&init);
+ aarch64_vcpu_add_default(vm, 0, &init, NULL);
+ finalize_vcpu(vm, 0);
+
+ reg_list = vcpu_get_reg_list(vm, 0);
+
+ if (fixup_core_regs)
+ core_reg_fixup();
+
+ if (print_list) {
+ putchar('\n');
+ for_each_reg(i)
+ print_reg(reg_list->reg[i]);
+ putchar('\n');
+ return 0;
+ }
+
+ /*
+ * We only test that we can get the register and then write back the
+ * same value. Some registers may allow other values to be written
+ * back, but others only allow some bits to be changed, and at least
+ * for ID registers set will fail if the value does not exactly match
+ * what was returned by get. If registers that allow other values to
+ * be written need to have the other values tested, then we should
+ * create a new set of tests for those in a new independent test
+ * executable.
+ */
+ for_each_reg(i) {
+ uint8_t addr[2048 / 8];
+ struct kvm_one_reg reg = {
+ .id = reg_list->reg[i],
+ .addr = (__u64)&addr,
+ };
+ int ret;
+
+ ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, &reg);
+ if (ret) {
+ puts("Failed to get ");
+ print_reg(reg.id);
+ putchar('\n');
+ ++failed_get;
+ }
+
+ /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */
+ if (find_reg(rejects_set, rejects_set_n, reg.id)) {
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || errno != EPERM) {
+ printf("Failed to reject (ret=%d, errno=%d) ", ret, errno);
+ print_reg(reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ continue;
+ }
+
+ ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ puts("Failed to set ");
+ print_reg(reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
+ }
+
+ if (reg_list_sve()) {
+ blessed_n = base_regs_n + sve_regs_n;
+ vec_regs = sve_regs;
+ } else {
+ blessed_n = base_regs_n + vregs_n;
+ vec_regs = vregs;
+ }
+
+ blessed_reg = calloc(blessed_n, sizeof(__u64));
+ for (i = 0; i < base_regs_n; ++i)
+ blessed_reg[i] = base_regs[i];
+ for (i = 0; i < blessed_n - base_regs_n; ++i)
+ blessed_reg[base_regs_n + i] = vec_regs[i];
+
+ for_each_new_reg(i)
+ ++new_regs;
+
+ for_each_missing_reg(i)
+ ++missing_regs;
+
+ if (new_regs || missing_regs) {
+ printf("Number blessed registers: %5lld\n", blessed_n);
+ printf("Number registers: %5lld\n", reg_list->n);
+ }
+
+ if (new_regs) {
+ printf("\nThere are %d new registers.\n"
+ "Consider adding them to the blessed reg "
+ "list with the following lines:\n\n", new_regs);
+ for_each_new_reg(i)
+ print_reg(reg_list->reg[i]);
+ putchar('\n');
+ }
+
+ if (missing_regs) {
+ printf("\nThere are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", missing_regs);
+ for_each_missing_reg(i)
+ print_reg(blessed_reg[i]);
+ putchar('\n');
+ }
+
+ TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+ "There are %d missing registers; "
+ "%d registers failed get; %d registers failed set; %d registers failed reject",
+ missing_regs, failed_get, failed_set, failed_reject);
+
+ return 0;
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+ KVM_REG_ARM_FW_REG(0),
+ KVM_REG_ARM_FW_REG(1),
+ KVM_REG_ARM_FW_REG(2),
+ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 2),
+ ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */
+ ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */
+ ARM64_SYS_REG(2, 0, 0, 0, 4),
+ ARM64_SYS_REG(2, 0, 0, 0, 5),
+ ARM64_SYS_REG(2, 0, 0, 0, 6),
+ ARM64_SYS_REG(2, 0, 0, 0, 7),
+ ARM64_SYS_REG(2, 0, 0, 1, 4),
+ ARM64_SYS_REG(2, 0, 0, 1, 5),
+ ARM64_SYS_REG(2, 0, 0, 1, 6),
+ ARM64_SYS_REG(2, 0, 0, 1, 7),
+ ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 4),
+ ARM64_SYS_REG(2, 0, 0, 2, 5),
+ ARM64_SYS_REG(2, 0, 0, 2, 6),
+ ARM64_SYS_REG(2, 0, 0, 2, 7),
+ ARM64_SYS_REG(2, 0, 0, 3, 4),
+ ARM64_SYS_REG(2, 0, 0, 3, 5),
+ ARM64_SYS_REG(2, 0, 0, 3, 6),
+ ARM64_SYS_REG(2, 0, 0, 3, 7),
+ ARM64_SYS_REG(2, 0, 0, 4, 4),
+ ARM64_SYS_REG(2, 0, 0, 4, 5),
+ ARM64_SYS_REG(2, 0, 0, 4, 6),
+ ARM64_SYS_REG(2, 0, 0, 4, 7),
+ ARM64_SYS_REG(2, 0, 0, 5, 4),
+ ARM64_SYS_REG(2, 0, 0, 5, 5),
+ ARM64_SYS_REG(2, 0, 0, 5, 6),
+ ARM64_SYS_REG(2, 0, 0, 5, 7),
+ ARM64_SYS_REG(2, 0, 0, 6, 4),
+ ARM64_SYS_REG(2, 0, 0, 6, 5),
+ ARM64_SYS_REG(2, 0, 0, 6, 6),
+ ARM64_SYS_REG(2, 0, 0, 6, 7),
+ ARM64_SYS_REG(2, 0, 0, 7, 4),
+ ARM64_SYS_REG(2, 0, 0, 7, 5),
+ ARM64_SYS_REG(2, 0, 0, 7, 6),
+ ARM64_SYS_REG(2, 0, 0, 7, 7),
+ ARM64_SYS_REG(2, 0, 0, 8, 4),
+ ARM64_SYS_REG(2, 0, 0, 8, 5),
+ ARM64_SYS_REG(2, 0, 0, 8, 6),
+ ARM64_SYS_REG(2, 0, 0, 8, 7),
+ ARM64_SYS_REG(2, 0, 0, 9, 4),
+ ARM64_SYS_REG(2, 0, 0, 9, 5),
+ ARM64_SYS_REG(2, 0, 0, 9, 6),
+ ARM64_SYS_REG(2, 0, 0, 9, 7),
+ ARM64_SYS_REG(2, 0, 0, 10, 4),
+ ARM64_SYS_REG(2, 0, 0, 10, 5),
+ ARM64_SYS_REG(2, 0, 0, 10, 6),
+ ARM64_SYS_REG(2, 0, 0, 10, 7),
+ ARM64_SYS_REG(2, 0, 0, 11, 4),
+ ARM64_SYS_REG(2, 0, 0, 11, 5),
+ ARM64_SYS_REG(2, 0, 0, 11, 6),
+ ARM64_SYS_REG(2, 0, 0, 11, 7),
+ ARM64_SYS_REG(2, 0, 0, 12, 4),
+ ARM64_SYS_REG(2, 0, 0, 12, 5),
+ ARM64_SYS_REG(2, 0, 0, 12, 6),
+ ARM64_SYS_REG(2, 0, 0, 12, 7),
+ ARM64_SYS_REG(2, 0, 0, 13, 4),
+ ARM64_SYS_REG(2, 0, 0, 13, 5),
+ ARM64_SYS_REG(2, 0, 0, 13, 6),
+ ARM64_SYS_REG(2, 0, 0, 13, 7),
+ ARM64_SYS_REG(2, 0, 0, 14, 4),
+ ARM64_SYS_REG(2, 0, 0, 14, 5),
+ ARM64_SYS_REG(2, 0, 0, 14, 6),
+ ARM64_SYS_REG(2, 0, 0, 14, 7),
+ ARM64_SYS_REG(2, 0, 0, 15, 4),
+ ARM64_SYS_REG(2, 0, 0, 15, 5),
+ ARM64_SYS_REG(2, 0, 0, 15, 6),
+ ARM64_SYS_REG(2, 0, 0, 15, 7),
+ ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
+ ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 3),
+ ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 7),
+ ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 2),
+ ARM64_SYS_REG(3, 0, 0, 4, 3),
+ ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 5),
+ ARM64_SYS_REG(3, 0, 0, 4, 6),
+ ARM64_SYS_REG(3, 0, 0, 4, 7),
+ ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 2),
+ ARM64_SYS_REG(3, 0, 0, 5, 3),
+ ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 6),
+ ARM64_SYS_REG(3, 0, 0, 5, 7),
+ ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 2),
+ ARM64_SYS_REG(3, 0, 0, 6, 3),
+ ARM64_SYS_REG(3, 0, 0, 6, 4),
+ ARM64_SYS_REG(3, 0, 0, 6, 5),
+ ARM64_SYS_REG(3, 0, 0, 6, 6),
+ ARM64_SYS_REG(3, 0, 0, 6, 7),
+ ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3),
+ ARM64_SYS_REG(3, 0, 0, 7, 4),
+ ARM64_SYS_REG(3, 0, 0, 7, 5),
+ ARM64_SYS_REG(3, 0, 0, 7, 6),
+ ARM64_SYS_REG(3, 0, 0, 7, 7),
+ ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
+ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
+ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
+ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 8, 0),
+ ARM64_SYS_REG(3, 3, 14, 8, 1),
+ ARM64_SYS_REG(3, 3, 14, 8, 2),
+ ARM64_SYS_REG(3, 3, 14, 8, 3),
+ ARM64_SYS_REG(3, 3, 14, 8, 4),
+ ARM64_SYS_REG(3, 3, 14, 8, 5),
+ ARM64_SYS_REG(3, 3, 14, 8, 6),
+ ARM64_SYS_REG(3, 3, 14, 8, 7),
+ ARM64_SYS_REG(3, 3, 14, 9, 0),
+ ARM64_SYS_REG(3, 3, 14, 9, 1),
+ ARM64_SYS_REG(3, 3, 14, 9, 2),
+ ARM64_SYS_REG(3, 3, 14, 9, 3),
+ ARM64_SYS_REG(3, 3, 14, 9, 4),
+ ARM64_SYS_REG(3, 3, 14, 9, 5),
+ ARM64_SYS_REG(3, 3, 14, 9, 6),
+ ARM64_SYS_REG(3, 3, 14, 9, 7),
+ ARM64_SYS_REG(3, 3, 14, 10, 0),
+ ARM64_SYS_REG(3, 3, 14, 10, 1),
+ ARM64_SYS_REG(3, 3, 14, 10, 2),
+ ARM64_SYS_REG(3, 3, 14, 10, 3),
+ ARM64_SYS_REG(3, 3, 14, 10, 4),
+ ARM64_SYS_REG(3, 3, 14, 10, 5),
+ ARM64_SYS_REG(3, 3, 14, 10, 6),
+ ARM64_SYS_REG(3, 3, 14, 10, 7),
+ ARM64_SYS_REG(3, 3, 14, 11, 0),
+ ARM64_SYS_REG(3, 3, 14, 11, 1),
+ ARM64_SYS_REG(3, 3, 14, 11, 2),
+ ARM64_SYS_REG(3, 3, 14, 11, 3),
+ ARM64_SYS_REG(3, 3, 14, 11, 4),
+ ARM64_SYS_REG(3, 3, 14, 11, 5),
+ ARM64_SYS_REG(3, 3, 14, 11, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 0),
+ ARM64_SYS_REG(3, 3, 14, 12, 1),
+ ARM64_SYS_REG(3, 3, 14, 12, 2),
+ ARM64_SYS_REG(3, 3, 14, 12, 3),
+ ARM64_SYS_REG(3, 3, 14, 12, 4),
+ ARM64_SYS_REG(3, 3, 14, 12, 5),
+ ARM64_SYS_REG(3, 3, 14, 12, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 7),
+ ARM64_SYS_REG(3, 3, 14, 13, 0),
+ ARM64_SYS_REG(3, 3, 14, 13, 1),
+ ARM64_SYS_REG(3, 3, 14, 13, 2),
+ ARM64_SYS_REG(3, 3, 14, 13, 3),
+ ARM64_SYS_REG(3, 3, 14, 13, 4),
+ ARM64_SYS_REG(3, 3, 14, 13, 5),
+ ARM64_SYS_REG(3, 3, 14, 13, 6),
+ ARM64_SYS_REG(3, 3, 14, 13, 7),
+ ARM64_SYS_REG(3, 3, 14, 14, 0),
+ ARM64_SYS_REG(3, 3, 14, 14, 1),
+ ARM64_SYS_REG(3, 3, 14, 14, 2),
+ ARM64_SYS_REG(3, 3, 14, 14, 3),
+ ARM64_SYS_REG(3, 3, 14, 14, 4),
+ ARM64_SYS_REG(3, 3, 14, 14, 5),
+ ARM64_SYS_REG(3, 3, 14, 14, 6),
+ ARM64_SYS_REG(3, 3, 14, 14, 7),
+ ARM64_SYS_REG(3, 3, 14, 15, 0),
+ ARM64_SYS_REG(3, 3, 14, 15, 1),
+ ARM64_SYS_REG(3, 3, 14, 15, 2),
+ ARM64_SYS_REG(3, 3, 14, 15, 3),
+ ARM64_SYS_REG(3, 3, 14, 15, 4),
+ ARM64_SYS_REG(3, 3, 14, 15, 5),
+ ARM64_SYS_REG(3, 3, 14, 15, 6),
+ ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0,
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1,
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2,
+};
+static __u64 base_regs_n = ARRAY_SIZE(base_regs);
+
+static __u64 vregs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+static __u64 vregs_n = ARRAY_SIZE(vregs);
+
+static __u64 sve_regs[] = {
+ KVM_REG_ARM64_SVE_VLS,
+ KVM_REG_ARM64_SVE_ZREG(0, 0),
+ KVM_REG_ARM64_SVE_ZREG(1, 0),
+ KVM_REG_ARM64_SVE_ZREG(2, 0),
+ KVM_REG_ARM64_SVE_ZREG(3, 0),
+ KVM_REG_ARM64_SVE_ZREG(4, 0),
+ KVM_REG_ARM64_SVE_ZREG(5, 0),
+ KVM_REG_ARM64_SVE_ZREG(6, 0),
+ KVM_REG_ARM64_SVE_ZREG(7, 0),
+ KVM_REG_ARM64_SVE_ZREG(8, 0),
+ KVM_REG_ARM64_SVE_ZREG(9, 0),
+ KVM_REG_ARM64_SVE_ZREG(10, 0),
+ KVM_REG_ARM64_SVE_ZREG(11, 0),
+ KVM_REG_ARM64_SVE_ZREG(12, 0),
+ KVM_REG_ARM64_SVE_ZREG(13, 0),
+ KVM_REG_ARM64_SVE_ZREG(14, 0),
+ KVM_REG_ARM64_SVE_ZREG(15, 0),
+ KVM_REG_ARM64_SVE_ZREG(16, 0),
+ KVM_REG_ARM64_SVE_ZREG(17, 0),
+ KVM_REG_ARM64_SVE_ZREG(18, 0),
+ KVM_REG_ARM64_SVE_ZREG(19, 0),
+ KVM_REG_ARM64_SVE_ZREG(20, 0),
+ KVM_REG_ARM64_SVE_ZREG(21, 0),
+ KVM_REG_ARM64_SVE_ZREG(22, 0),
+ KVM_REG_ARM64_SVE_ZREG(23, 0),
+ KVM_REG_ARM64_SVE_ZREG(24, 0),
+ KVM_REG_ARM64_SVE_ZREG(25, 0),
+ KVM_REG_ARM64_SVE_ZREG(26, 0),
+ KVM_REG_ARM64_SVE_ZREG(27, 0),
+ KVM_REG_ARM64_SVE_ZREG(28, 0),
+ KVM_REG_ARM64_SVE_ZREG(29, 0),
+ KVM_REG_ARM64_SVE_ZREG(30, 0),
+ KVM_REG_ARM64_SVE_ZREG(31, 0),
+ KVM_REG_ARM64_SVE_PREG(0, 0),
+ KVM_REG_ARM64_SVE_PREG(1, 0),
+ KVM_REG_ARM64_SVE_PREG(2, 0),
+ KVM_REG_ARM64_SVE_PREG(3, 0),
+ KVM_REG_ARM64_SVE_PREG(4, 0),
+ KVM_REG_ARM64_SVE_PREG(5, 0),
+ KVM_REG_ARM64_SVE_PREG(6, 0),
+ KVM_REG_ARM64_SVE_PREG(7, 0),
+ KVM_REG_ARM64_SVE_PREG(8, 0),
+ KVM_REG_ARM64_SVE_PREG(9, 0),
+ KVM_REG_ARM64_SVE_PREG(10, 0),
+ KVM_REG_ARM64_SVE_PREG(11, 0),
+ KVM_REG_ARM64_SVE_PREG(12, 0),
+ KVM_REG_ARM64_SVE_PREG(13, 0),
+ KVM_REG_ARM64_SVE_PREG(14, 0),
+ KVM_REG_ARM64_SVE_PREG(15, 0),
+ KVM_REG_ARM64_SVE_FFR(0),
+ ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
+};
+static __u64 sve_regs_n = ARRAY_SIZE(sve_regs);
+
+static __u64 rejects_set[] = {
+#ifdef REG_LIST_SVE
+ KVM_REG_ARM64_SVE_VLS,
+#endif
+};
+static __u64 rejects_set_n = ARRAY_SIZE(rejects_set);
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
deleted file mode 100644
index 11672ec6f74e..000000000000
--- a/tools/testing/selftests/kvm/clear_dirty_log_test.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#define USE_CLEAR_DIRTY_LOG
-#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
-#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
-#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
- KVM_DIRTY_LOG_INITIALLY_SET)
-#include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 360cd3ea4cd6..3d96a7bfaff3 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -21,20 +21,12 @@
#include <linux/bitops.h>
#include <linux/userfaultfd.h>
-#include "test_util.h"
-#include "kvm_util.h"
+#include "perf_test_util.h"
#include "processor.h"
+#include "test_util.h"
#ifdef __NR_userfaultfd
-/* The memory slot index demand page */
-#define TEST_MEM_SLOT_INDEX 1
-
-/* Default guest test virtual memory offset */
-#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-
-#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
-
#ifdef PRINT_PER_PAGE_UPDATES
#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
#else
@@ -47,77 +39,17 @@
#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
#endif
-#define MAX_VCPUS 512
-
-/*
- * Guest/Host shared variables. Ensure addr_gva2hva() and/or
- * sync_global_to/from_guest() are used when accessing from
- * the host. READ/WRITE_ONCE() should also be used with anything
- * that may change.
- */
-static uint64_t host_page_size;
-static uint64_t guest_page_size;
-
static char *guest_data_prototype;
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-
-struct vcpu_args {
- uint64_t gva;
- uint64_t pages;
-
- /* Only used by the host userspace part of the vCPU thread */
- int vcpu_id;
- struct kvm_vm *vm;
-};
-
-static struct vcpu_args vcpu_args[MAX_VCPUS];
-
-/*
- * Continuously write to the first 8 bytes of each page in the demand paging
- * memory region.
- */
-static void guest_code(uint32_t vcpu_id)
-{
- uint64_t gva;
- uint64_t pages;
- int i;
-
- /* Make sure vCPU args data structure is not corrupt. */
- GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
-
- gva = vcpu_args[vcpu_id].gva;
- pages = vcpu_args[vcpu_id].pages;
-
- for (i = 0; i < pages; i++) {
- uint64_t addr = gva + (i * guest_page_size);
-
- addr &= ~(host_page_size - 1);
- *(uint64_t *)addr = 0x0123456789ABCDEF;
- }
-
- GUEST_SYNC(1);
-}
-
static void *vcpu_worker(void *data)
{
int ret;
- struct vcpu_args *args = (struct vcpu_args *)data;
- struct kvm_vm *vm = args->vm;
- int vcpu_id = args->vcpu_id;
+ struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+ int vcpu_id = vcpu_args->vcpu_id;
+ struct kvm_vm *vm = perf_test_args.vm;
struct kvm_run *run;
- struct timespec start, end, ts_diff;
+ struct timespec start;
+ struct timespec ts_diff;
vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
run = vcpu_state(vm, vcpu_id);
@@ -133,52 +65,18 @@ static void *vcpu_worker(void *data)
exit_reason_str(run->exit_reason));
}
- clock_gettime(CLOCK_MONOTONIC, &end);
- ts_diff = timespec_sub(end, start);
+ ts_diff = timespec_diff_now(start);
PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
ts_diff.tv_sec, ts_diff.tv_nsec);
return NULL;
}
-#define PAGE_SHIFT_4K 12
-#define PTES_PER_4K_PT 512
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes)
-{
- struct kvm_vm *vm;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
-
- /* Account for a few pages per-vCPU for stacks */
- pages += DEFAULT_STACK_PGS * vcpus;
-
- /*
- * Reserve twice the ammount of memory needed to map the test region and
- * the page table / stacks region, at 4k, for page tables. Do the
- * calculation with 4K page size: the smallest of all archs. (e.g., 64K
- * page size guest will need even less memory for page tables).
- */
- pages += (2 * pages) / PTES_PER_4K_PT;
- pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
- PTES_PER_4K_PT;
- pages = vm_adjust_num_guest_pages(mode, pages);
-
- pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
- vm = _vm_create(mode, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
- vm_create_irqchip(vm);
-#endif
- return vm;
-}
-
static int handle_uffd_page_request(int uffd, uint64_t addr)
{
pid_t tid;
struct timespec start;
- struct timespec end;
+ struct timespec ts_diff;
struct uffdio_copy copy;
int r;
@@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
copy.src = (uint64_t)guest_data_prototype;
copy.dst = addr;
- copy.len = host_page_size;
+ copy.len = perf_test_args.host_page_size;
copy.mode = 0;
clock_gettime(CLOCK_MONOTONIC, &start);
@@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr)
return r;
}
- clock_gettime(CLOCK_MONOTONIC, &end);
+ ts_diff = timespec_diff_now(start);
PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
- timespec_to_ns(timespec_sub(end, start)));
+ timespec_to_ns(ts_diff));
PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
- host_page_size, addr, tid);
+ perf_test_args.host_page_size, addr, tid);
return 0;
}
@@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg)
int pipefd = uffd_args->pipefd;
useconds_t delay = uffd_args->delay;
int64_t pages = 0;
- struct timespec start, end, ts_diff;
+ struct timespec start;
+ struct timespec ts_diff;
clock_gettime(CLOCK_MONOTONIC, &start);
while (!quit_uffd_thread) {
@@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg)
pages++;
}
- clock_gettime(CLOCK_MONOTONIC, &end);
- ts_diff = timespec_sub(end, start);
+ ts_diff = timespec_diff_now(start);
PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
pages, ts_diff.tv_sec, ts_diff.tv_nsec,
pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
@@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm,
}
static void run_test(enum vm_guest_mode mode, bool use_uffd,
- useconds_t uffd_delay, int vcpus,
- uint64_t vcpu_memory_bytes)
+ useconds_t uffd_delay)
{
pthread_t *vcpu_threads;
pthread_t *uffd_handler_threads = NULL;
struct uffd_handler_args *uffd_args = NULL;
- struct timespec start, end, ts_diff;
+ struct timespec start;
+ struct timespec ts_diff;
int *pipefds = NULL;
struct kvm_vm *vm;
- uint64_t guest_num_pages;
int vcpu_id;
int r;
- vm = create_vm(mode, vcpus, vcpu_memory_bytes);
-
- guest_page_size = vm_get_page_size(vm);
-
- TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
- "Guest memory size is not guest page size aligned.");
-
- guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
- guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-
- /*
- * If there should be more memory in the guest test region than there
- * can be pages in the guest, it will definitely cause problems.
- */
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
- "Requested more guest memory than address space allows.\n"
- " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
- vcpu_memory_bytes);
-
- host_page_size = getpagesize();
- TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
- "Guest memory size is not host page size aligned.");
+ vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
- guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
- guest_page_size;
- guest_test_phys_mem &= ~(host_page_size - 1);
+ perf_test_args.wr_fract = 1;
-#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-
- pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
- /* Add an extra memory slot for testing demand paging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem,
- TEST_MEM_SLOT_INDEX,
- guest_num_pages, 0);
-
- /* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
-
- ucall_init(vm, NULL);
-
- guest_data_prototype = malloc(host_page_size);
+ guest_data_prototype = malloc(perf_test_args.host_page_size);
TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern");
- memset(guest_data_prototype, 0xAB, host_page_size);
+ memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size);
- vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
+ vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+ add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
if (use_uffd) {
uffd_handler_threads =
- malloc(vcpus * sizeof(*uffd_handler_threads));
+ malloc(nr_vcpus * sizeof(*uffd_handler_threads));
TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
- uffd_args = malloc(vcpus * sizeof(*uffd_args));
+ uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
TEST_ASSERT(uffd_args, "Memory allocation failed");
- pipefds = malloc(sizeof(int) * vcpus * 2);
+ pipefds = malloc(sizeof(int) * nr_vcpus * 2);
TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
- }
-
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- vm_paddr_t vcpu_gpa;
- void *vcpu_hva;
- vm_vcpu_add_default(vm, vcpu_id, guest_code);
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vm_paddr_t vcpu_gpa;
+ void *vcpu_hva;
- vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
- PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+ vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size);
+ PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size);
- /* Cache the HVA pointer of the region */
- vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+ /* Cache the HVA pointer of the region */
+ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
- if (use_uffd) {
/*
* Set up user fault fd to handle demand paging
* requests.
@@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
&uffd_handler_threads[vcpu_id],
pipefds[vcpu_id * 2],
uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, vcpu_memory_bytes);
+ vcpu_hva, guest_percpu_mem_size);
if (r < 0)
exit(-r);
}
-
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
- vcpu_args[vcpu_id].vm = vm;
- vcpu_args[vcpu_id].vcpu_id = vcpu_id;
- vcpu_args[vcpu_id].gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
- vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
}
/* Export the shared variables to the guest */
- sync_global_to_guest(vm, host_page_size);
- sync_global_to_guest(vm, guest_page_size);
- sync_global_to_guest(vm, vcpu_args);
+ sync_global_to_guest(vm, perf_test_args);
pr_info("Finished creating vCPUs and starting uffd threads\n");
clock_gettime(CLOCK_MONOTONIC, &start);
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &vcpu_args[vcpu_id]);
+ &perf_test_args.vcpu_args[vcpu_id]);
}
pr_info("Started all vCPUs\n");
/* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
pthread_join(vcpu_threads[vcpu_id], NULL);
PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
}
- pr_info("All vCPU threads joined\n");
+ ts_diff = timespec_diff_now(start);
- clock_gettime(CLOCK_MONOTONIC, &end);
+ pr_info("All vCPU threads joined\n");
if (use_uffd) {
char c;
/* Tell the user fault fd handler threads to quit */
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
TEST_ASSERT(r == 1, "Unable to write to pipefd");
@@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd,
}
}
- ts_diff = timespec_sub(end, start);
pr_info("Total guest execution time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
pr_info("Overall demand paging rate: %f pgs/sec\n",
- guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+ perf_test_args.vcpu_args[0].pages * nr_vcpus /
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
ucall_uninit(vm);
kvm_vm_free(vm);
@@ -568,9 +409,8 @@ static void help(char *name)
int main(int argc, char *argv[])
{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
bool mode_selected = false;
- uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
- int vcpus = 1;
unsigned int mode;
int opt, i;
bool use_uffd = false;
@@ -619,15 +459,12 @@ int main(int argc, char *argv[])
"A negative UFFD delay is not supported.");
break;
case 'b':
- vcpu_memory_bytes = parse_size(optarg);
+ guest_percpu_mem_size = parse_size(optarg);
break;
case 'v':
- vcpus = atoi(optarg);
- TEST_ASSERT(vcpus > 0,
- "Must have a positive number of vCPUs");
- TEST_ASSERT(vcpus <= MAX_VCPUS,
- "This test does not currently support\n"
- "more than %d vCPUs.", MAX_VCPUS);
+ nr_vcpus = atoi(optarg);
+ TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
break;
case 'h':
default:
@@ -642,7 +479,7 @@ int main(int argc, char *argv[])
TEST_ASSERT(guest_modes[i].supported,
"Guest mode ID %d (%s) not supported.",
i, vm_guest_mode_string(i));
- run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+ run_test(i, use_uffd, uffd_delay);
}
return 0;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
new file mode 100644
index 000000000000..85c9b8f73142
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -0,0 +1,376 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging performance test
+ *
+ * Based on dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
+#define TEST_HOST_LOOP_N 2UL
+
+/* Host variables */
+static bool host_quit;
+static uint64_t iteration;
+static uint64_t vcpu_last_completed_iteration[MAX_VCPUS];
+
+static void *vcpu_worker(void *data)
+{
+ int ret;
+ struct kvm_vm *vm = perf_test_args.vm;
+ uint64_t pages_count = 0;
+ struct kvm_run *run;
+ struct timespec start;
+ struct timespec ts_diff;
+ struct timespec total = (struct timespec){0};
+ struct timespec avg;
+ struct vcpu_args *vcpu_args = (struct vcpu_args *)data;
+ int vcpu_id = vcpu_args->vcpu_id;
+
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+ run = vcpu_state(vm, vcpu_id);
+
+ while (!READ_ONCE(host_quit)) {
+ uint64_t current_iteration = READ_ONCE(iteration);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ ret = _vcpu_run(vm, vcpu_id);
+ ts_diff = timespec_diff_now(start);
+
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+
+ pr_debug("Got sync event from vCPU %d\n", vcpu_id);
+ vcpu_last_completed_iteration[vcpu_id] = current_iteration;
+ pr_debug("vCPU %d updated last completed iteration to %lu\n",
+ vcpu_id, vcpu_last_completed_iteration[vcpu_id]);
+
+ if (current_iteration) {
+ pages_count += vcpu_args->pages;
+ total = timespec_add(total, ts_diff);
+ pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n",
+ vcpu_id, current_iteration, ts_diff.tv_sec,
+ ts_diff.tv_nsec);
+ } else {
+ pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n",
+ vcpu_id, current_iteration, ts_diff.tv_sec,
+ ts_diff.tv_nsec);
+ }
+
+ while (current_iteration == READ_ONCE(iteration) &&
+ !READ_ONCE(host_quit)) {}
+ }
+
+ avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]);
+ pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id],
+ total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+ return NULL;
+}
+
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
+static void run_test(enum vm_guest_mode mode, unsigned long iterations,
+ uint64_t phys_offset, int wr_fract)
+{
+ pthread_t *vcpu_threads;
+ struct kvm_vm *vm;
+ unsigned long *bmap;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ int vcpu_id;
+ struct timespec start;
+ struct timespec ts_diff;
+ struct timespec get_dirty_log_total = (struct timespec){0};
+ struct timespec vcpu_dirty_total = (struct timespec){0};
+ struct timespec avg;
+#ifdef USE_CLEAR_DIRTY_LOG
+ struct kvm_enable_cap cap = {};
+ struct timespec clear_dirty_log_total = (struct timespec){0};
+#endif
+
+ vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size);
+
+ perf_test_args.wr_fract = wr_fract;
+
+ guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ bmap = bitmap_alloc(host_num_pages);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+ cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+ cap.args[0] = dirty_log_manual_caps;
+ vm_enable_cap(vm, &cap);
+#endif
+
+ vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+ TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+ add_vcpus(vm, nr_vcpus, guest_percpu_mem_size);
+
+ sync_global_to_guest(vm, perf_test_args);
+
+ /* Start the iterations */
+ iteration = 0;
+ host_quit = false;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+ &perf_test_args.vcpu_args[vcpu_id]);
+ }
+
+ /* Allow the vCPU to populate memory */
+ pr_debug("Starting iteration %lu - Populating\n", iteration);
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+ pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n",
+ iteration);
+
+ ts_diff = timespec_diff_now(start);
+ pr_info("Populate memory time: %ld.%.9lds\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Enable dirty logging */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+ KVM_MEM_LOG_DIRTY_PAGES);
+ ts_diff = timespec_diff_now(start);
+ pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ while (iteration < iterations) {
+ /*
+ * Incrementing the iteration number will start the vCPUs
+ * dirtying memory again.
+ */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ iteration++;
+
+ pr_debug("Starting iteration %lu\n", iteration);
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration)
+ pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n",
+ vcpu_id, iteration);
+ }
+
+ ts_diff = timespec_diff_now(start);
+ vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
+ pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
+
+ ts_diff = timespec_diff_now(start);
+ get_dirty_log_total = timespec_add(get_dirty_log_total,
+ ts_diff);
+ pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
+ host_num_pages);
+
+ ts_diff = timespec_diff_now(start);
+ clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+ ts_diff);
+ pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+#endif
+ }
+
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++)
+ pthread_join(vcpu_threads[vcpu_id], NULL);
+
+ /* Disable dirty logging */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+ ts_diff = timespec_diff_now(start);
+ pr_info("Disabling dirty logging time: %ld.%.9lds\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ avg = timespec_div(get_dirty_log_total, iterations);
+ pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ iterations, get_dirty_log_total.tv_sec,
+ get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+#ifdef USE_CLEAR_DIRTY_LOG
+ avg = timespec_div(clear_dirty_log_total, iterations);
+ pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ iterations, clear_dirty_log_total.tv_sec,
+ clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+#endif
+
+ free(bmap);
+ free(vcpu_threads);
+ ucall_uninit(vm);
+ kvm_vm_free(vm);
+}
+
+struct guest_mode {
+ bool supported;
+ bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+ guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+ int i;
+
+ puts("");
+ printf("usage: %s [-h] [-i iterations] [-p offset] "
+ "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name);
+ puts("");
+ printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+ TEST_HOST_LOOP_N);
+ printf(" -p: specify guest physical test memory offset\n"
+ " Warning: a low offset can conflict with the loaded test code.\n");
+ printf(" -m: specify the guest mode ID to test "
+ "(default: test all supported modes)\n"
+ " This option may be used multiple times.\n"
+ " Guest mode IDs:\n");
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
+ guest_modes[i].supported ? " (supported)" : "");
+ }
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -f: specify the fraction of pages which should be written to\n"
+ " as opposed to simply read, in the form\n"
+ " 1/<fraction of pages to write>.\n"
+ " (default: 1 i.e. all pages are written to.)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long iterations = TEST_HOST_LOOP_N;
+ bool mode_selected = false;
+ uint64_t phys_offset = 0;
+ unsigned int mode;
+ int opt, i;
+ int wr_fract = 1;
+
+#ifdef USE_CLEAR_DIRTY_LOG
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ if (!dirty_log_manual_caps) {
+ print_skip("KVM_CLEAR_DIRTY_LOG not available");
+ exit(KSFT_SKIP);
+ }
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+#endif
+
+#ifdef __x86_64__
+ guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
+ guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+ {
+ unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+ if (limit >= 52)
+ guest_mode_init(VM_MODE_P52V48_64K, true, true);
+ if (limit >= 48) {
+ guest_mode_init(VM_MODE_P48V48_4K, true, true);
+ guest_mode_init(VM_MODE_P48V48_64K, true, true);
+ }
+ }
+#endif
+#ifdef __s390x__
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+ while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) {
+ switch (opt) {
+ case 'i':
+ iterations = strtol(optarg, NULL, 10);
+ break;
+ case 'p':
+ phys_offset = strtoull(optarg, NULL, 0);
+ break;
+ case 'm':
+ if (!mode_selected) {
+ for (i = 0; i < NUM_VM_MODES; ++i)
+ guest_modes[i].enabled = false;
+ mode_selected = true;
+ }
+ mode = strtoul(optarg, NULL, 10);
+ TEST_ASSERT(mode < NUM_VM_MODES,
+ "Guest mode ID %d too big", mode);
+ guest_modes[mode].enabled = true;
+ break;
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'f':
+ wr_fract = atoi(optarg);
+ TEST_ASSERT(wr_fract >= 1,
+ "Write fraction cannot be less than one");
+ break;
+ case 'v':
+ nr_vcpus = atoi(optarg);
+ TEST_ASSERT(nr_vcpus > 0,
+ "Must have a positive number of vCPUs");
+ TEST_ASSERT(nr_vcpus <= MAX_VCPUS,
+ "This test does not currently support\n"
+ "more than %d vCPUs.", MAX_VCPUS);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_ASSERT(iterations >= 2, "The test should have at least two iterations");
+
+ pr_info("Test iterations: %"PRIu64"\n", iterations);
+
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ if (!guest_modes[i].enabled)
+ continue;
+ TEST_ASSERT(guest_modes[i].supported,
+ "Guest mode ID %d (%s) not supported.",
+ i, vm_guest_mode_string(i));
+ run_test(i, iterations, phys_offset, wr_fract);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 752ec158ac59..54da9cc20db4 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -128,6 +128,78 @@ static uint64_t host_dirty_count;
static uint64_t host_clear_count;
static uint64_t host_track_next_count;
+enum log_mode_t {
+ /* Only use KVM_GET_DIRTY_LOG for logging */
+ LOG_MODE_DIRTY_LOG = 0,
+
+ /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
+ LOG_MODE_CLEAR_LOG = 1,
+
+ LOG_MODE_NUM,
+
+ /* Run all supported modes */
+ LOG_MODE_ALL = LOG_MODE_NUM,
+};
+
+/* Mode of logging to test. Default is to run all supported modes */
+static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
+/* Logging mode for current run */
+static enum log_mode_t host_log_mode;
+
+static bool clear_log_supported(void)
+{
+ return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+}
+
+static void clear_log_create_vm_done(struct kvm_vm *vm)
+{
+ struct kvm_enable_cap cap = {};
+ u64 manual_caps;
+
+ manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
+ manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
+ cap.args[0] = manual_caps;
+ vm_enable_cap(vm, &cap);
+}
+
+static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages)
+{
+ kvm_vm_get_dirty_log(vm, slot, bitmap);
+}
+
+static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages)
+{
+ kvm_vm_get_dirty_log(vm, slot, bitmap);
+ kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages);
+}
+
+struct log_mode {
+ const char *name;
+ /* Return true if this mode is supported, otherwise false */
+ bool (*supported)(void);
+ /* Hook when the vm creation is done (before vcpu creation) */
+ void (*create_vm_done)(struct kvm_vm *vm);
+ /* Hook to collect the dirty pages into the bitmap provided */
+ void (*collect_dirty_pages) (struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages);
+} log_modes[LOG_MODE_NUM] = {
+ {
+ .name = "dirty-log",
+ .collect_dirty_pages = dirty_log_collect_dirty_pages,
+ },
+ {
+ .name = "clear-log",
+ .supported = clear_log_supported,
+ .create_vm_done = clear_log_create_vm_done,
+ .collect_dirty_pages = clear_log_collect_dirty_pages,
+ },
+};
+
/*
* We use this bitmap to track some pages that should have its dirty
* bit set in the _next_ iteration. For example, if we detected the
@@ -137,6 +209,44 @@ static uint64_t host_track_next_count;
*/
static unsigned long *host_bmap_track;
+static void log_modes_dump(void)
+{
+ int i;
+
+ printf("all");
+ for (i = 0; i < LOG_MODE_NUM; i++)
+ printf(", %s", log_modes[i].name);
+ printf("\n");
+}
+
+static bool log_mode_supported(void)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->supported)
+ return mode->supported();
+
+ return true;
+}
+
+static void log_mode_create_vm_done(struct kvm_vm *vm)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->create_vm_done)
+ mode->create_vm_done(vm);
+}
+
+static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot,
+ void *bitmap, uint32_t num_pages)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ TEST_ASSERT(mode->collect_dirty_pages != NULL,
+ "collect_dirty_pages() is required for any log mode!");
+ mode->collect_dirty_pages(vm, slot, bitmap, num_pages);
+}
+
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
@@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
page);
}
- if (test_bit_le(page, bmap)) {
+ if (test_and_clear_bit_le(page, bmap)) {
host_dirty_count++;
/*
* If the bit is set, the value written onto
@@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
+ vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
#ifdef __x86_64__
vm_create_irqchip(vm);
#endif
+ log_mode_create_vm_done(vm);
vm_vcpu_add_default(vm, vcpuid, guest_code);
return vm;
}
@@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
#define DIRTY_MEM_BITS 30 /* 1G */
#define PAGE_SHIFT_4K 12
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
-
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
unsigned long interval, uint64_t phys_offset)
{
@@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
struct kvm_vm *vm;
unsigned long *bmap;
+ if (!log_mode_supported()) {
+ print_skip("Log mode '%s' not supported",
+ log_modes[host_log_mode].name);
+ return;
+ }
+
/*
* We reserve page table for 2 times of extra dirty mem which
* will definitely cover the original (1G+) test range. Here
@@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
bmap = bitmap_alloc(host_num_pages);
host_bmap_track = bitmap_alloc(host_num_pages);
-#ifdef USE_CLEAR_DIRTY_LOG
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = dirty_log_manual_caps;
- vm_enable_cap(vm, &cap);
-#endif
-
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
guest_test_phys_mem,
@@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
while (iteration < iterations) {
/* Give the vcpu thread some time to dirty some pages */
usleep(interval * 1000);
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-#ifdef USE_CLEAR_DIRTY_LOG
- kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
- host_num_pages);
-#endif
+ log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX,
+ bmap, host_num_pages);
vm_dirty_log_verify(mode, bmap);
iteration++;
sync_global_to_guest(vm, iteration);
@@ -410,6 +512,9 @@ static void help(char *name)
TEST_HOST_LOOP_INTERVAL);
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
+ printf(" -M: specify the host logging mode "
+ "(default: run all log modes). Supported modes: \n\t");
+ log_modes_dump();
printf(" -m: specify the guest mode ID to test "
"(default: test all supported modes)\n"
" This option may be used multiple times.\n"
@@ -429,18 +534,7 @@ int main(int argc, char *argv[])
bool mode_selected = false;
uint64_t phys_offset = 0;
unsigned int mode;
- int opt, i;
-
-#ifdef USE_CLEAR_DIRTY_LOG
- dirty_log_manual_caps =
- kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
- if (!dirty_log_manual_caps) {
- print_skip("KVM_CLEAR_DIRTY_LOG not available");
- exit(KSFT_SKIP);
- }
- dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
- KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
+ int opt, i, j;
#ifdef __x86_64__
guest_mode_init(VM_MODE_PXXV48_4K, true, true);
@@ -464,7 +558,7 @@ int main(int argc, char *argv[])
guest_mode_init(VM_MODE_P40V48_4K, true, true);
#endif
- while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) {
switch (opt) {
case 'i':
iterations = strtol(optarg, NULL, 10);
@@ -486,6 +580,26 @@ int main(int argc, char *argv[])
"Guest mode ID %d too big", mode);
guest_modes[mode].enabled = true;
break;
+ case 'M':
+ if (!strcmp(optarg, "all")) {
+ host_log_mode_option = LOG_MODE_ALL;
+ break;
+ }
+ for (i = 0; i < LOG_MODE_NUM; i++) {
+ if (!strcmp(optarg, log_modes[i].name)) {
+ pr_info("Setting log mode to: '%s'\n",
+ optarg);
+ host_log_mode_option = i;
+ break;
+ }
+ }
+ if (i == LOG_MODE_NUM) {
+ printf("Log mode '%s' invalid. Please choose "
+ "from: ", optarg);
+ log_modes_dump();
+ exit(1);
+ }
+ break;
case 'h':
default:
help(argv[0]);
@@ -507,7 +621,18 @@ int main(int argc, char *argv[])
TEST_ASSERT(guest_modes[i].supported,
"Guest mode ID %d (%s) not supported.",
i, vm_guest_mode_string(i));
- run_test(i, iterations, interval, phys_offset);
+ if (host_log_mode_option == LOG_MODE_ALL) {
+ /* Run each log mode */
+ for (j = 0; j < LOG_MODE_NUM; j++) {
+ pr_info("Testing Log Mode '%s'\n",
+ log_modes[j].name);
+ host_log_mode = j;
+ run_test(i, iterations, interval, phys_offset);
+ }
+ } else {
+ host_log_mode = host_log_mode_option;
+ run_test(i, iterations, interval, phys_offset);
+ }
}
return 0;
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 919e161dd289..7d29aa786959 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -63,9 +63,11 @@ enum vm_mem_backing_src_type {
int kvm_check_cap(long cap);
int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_enable_cap *cap);
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
void kvm_vm_free(struct kvm_vm *vmp);
void kvm_vm_restart(struct kvm_vm *vmp, int perm);
void kvm_vm_release(struct kvm_vm *vmp);
@@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_guest_debug *debug);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
@@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
memcpy(&(g), _p, sizeof(g)); \
})
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid);
+
/* Common ucalls */
enum {
UCALL_NONE,
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
new file mode 100644
index 000000000000..2618052057b1
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/perf_test_util.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
+#define SELFTEST_KVM_PERF_TEST_UTIL_H
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define MAX_VCPUS 512
+
+#define PAGE_SHIFT_4K 12
+#define PTES_PER_4K_PT 512
+
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+/* Number of VCPUs for the test */
+static int nr_vcpus = 1;
+
+struct vcpu_args {
+ uint64_t gva;
+ uint64_t pages;
+
+ /* Only used by the host userspace part of the vCPU thread */
+ int vcpu_id;
+};
+
+struct perf_test_args {
+ struct kvm_vm *vm;
+ uint64_t host_page_size;
+ uint64_t guest_page_size;
+ int wr_fract;
+
+ struct vcpu_args vcpu_args[MAX_VCPUS];
+};
+
+static struct perf_test_args perf_test_args;
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+ struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+ uint64_t gva;
+ uint64_t pages;
+ int i;
+
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id);
+
+ gva = vcpu_args->gva;
+ pages = vcpu_args->pages;
+
+ while (true) {
+ for (i = 0; i < pages; i++) {
+ uint64_t addr = gva + (i * perf_test_args.guest_page_size);
+
+ if (i % perf_test_args.wr_fract == 0)
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+ uint64_t vcpu_memory_bytes)
+{
+ struct kvm_vm *vm;
+ uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+ uint64_t guest_num_pages;
+
+ /* Account for a few pages per-vCPU for stacks */
+ pages += DEFAULT_STACK_PGS * vcpus;
+
+ /*
+ * Reserve twice the ammount of memory needed to map the test region and
+ * the page table / stacks region, at 4k, for page tables. Do the
+ * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+ * page size guest will need even less memory for page tables).
+ */
+ pages += (2 * pages) / PTES_PER_4K_PT;
+ pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+ PTES_PER_4K_PT;
+ pages = vm_adjust_num_guest_pages(mode, pages);
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+ vm = vm_create(mode, pages, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+ vm_create_irqchip(vm);
+#endif
+
+ perf_test_args.vm = vm;
+ perf_test_args.guest_page_size = vm_get_page_size(vm);
+ perf_test_args.host_page_size = getpagesize();
+
+ TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0,
+ "Guest memory size is not guest page size aligned.");
+
+ guest_num_pages = (vcpus * vcpu_memory_bytes) /
+ perf_test_args.guest_page_size;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+ TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+ guest_num_pages, vm_get_max_gfn(vm), vcpus,
+ vcpu_memory_bytes);
+
+ TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0,
+ "Guest memory size is not host page size aligned.");
+
+ guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+ perf_test_args.guest_page_size;
+ guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1);
+
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+ pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+ /* Add an extra memory slot for testing */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ guest_test_phys_mem,
+ TEST_MEM_SLOT_INDEX,
+ guest_num_pages, 0);
+
+ /* Do mapping for the demand paging memory slot */
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+ ucall_init(vm, NULL);
+
+ return vm;
+}
+
+static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes)
+{
+ vm_paddr_t vcpu_gpa;
+ struct vcpu_args *vcpu_args;
+ int vcpu_id;
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ vcpu_args = &perf_test_args.vcpu_args[vcpu_id];
+
+ vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+#ifdef __x86_64__
+ vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+ vcpu_args->vcpu_id = vcpu_id;
+ vcpu_args->gva = guest_test_virt_mem +
+ (vcpu_id * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ perf_test_args.guest_page_size;
+
+ vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+ pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+ }
+}
+
+#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 5eb01bf51b86..ffffa560436b 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts);
struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_diff_now(struct timespec start);
+struct timespec timespec_div(struct timespec ts, int divisor);
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 82b7fe16a824..8e61340b3911 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -36,6 +36,8 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
+#define UNEXPECTED_VECTOR_PORT 0xfff0u
+
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
@@ -59,7 +61,7 @@ struct gpr64_regs {
struct desc64 {
uint16_t limit0;
uint16_t base0;
- unsigned base1:8, s:1, type:4, dpl:2, p:1;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
uint32_t base3;
uint32_t zero1;
@@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void)
return idt;
}
+static inline void outl(uint16_t port, uint32_t value)
+{
+ __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
#define SET_XMM(__var, __xmm) \
asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
@@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void);
uint32_t kvm_get_cpuid_max_extended(void);
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+struct ex_regs {
+ uint64_t rax, rcx, rdx, rbx;
+ uint64_t rbp, rsi, rdi;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+ uint64_t vector;
+ uint64_t error_code;
+ uint64_t rip;
+ uint64_t cs;
+ uint64_t rflags;
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid);
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *));
+
+/*
+ * set_cpuid() - overwrites a matching cpuid entry with the provided value.
+ * matches based on ent->function && ent->index. returns true
+ * if a match was found and successfully overwritten.
+ * @cpuid: the kvm cpuid list to modify.
+ * @ent: cpuid entry to insert
+ */
+bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3);
+
/*
* Basic CPU control in CR0
*/
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 54d624dd6c10..e78d7e26ba61 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -573,6 +573,10 @@ struct vmx_pages {
void *eptp_hva;
uint64_t eptp_gpa;
void *eptp;
+
+ void *apic_access_hva;
+ uint64_t apic_access_gpa;
+ void *apic_access;
};
union vmx_basic {
@@ -615,5 +619,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot, uint32_t eptp_memslot);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 2afa6618b396..d6c32c328e9a 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index c8e0ec20d3bf..2f37b90ee1a9 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
if (run->exit_reason == KVM_EXIT_MMIO &&
run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
vm_vaddr_t gva;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 74776ee228f2..126c6727a6b0 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -14,6 +14,7 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <unistd.h>
#include <linux/kernel.h>
#define KVM_UTIL_PGS_PER_HUGEPG 512
@@ -85,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
return ret;
}
+/* VCPU Enable Capability
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - VCPU
+ * cap - Capability
+ *
+ * Output Args: None
+ *
+ * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
+ *
+ * Enables a capability (KVM_CAP_*) on the VCPU.
+ */
+int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_enable_cap *cap)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpu_id);
+ int r;
+
+ TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id);
+
+ r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap);
+ TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n"
+ " rc: %i, errno: %i", r, errno);
+
+ return r;
+}
+
static void vm_open(struct kvm_vm *vm, int perm)
{
vm->kvm_fd = open(KVM_DEV_PATH, perm);
@@ -151,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params)
* descriptor to control the created VM is created with the permissions
* given by perm (e.g. O_RDWR).
*/
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
struct kvm_vm *vm;
@@ -242,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
return vm;
}
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
-{
- return _vm_create(mode, phy_pages, perm);
-}
-
/*
* VM Restart
*
@@ -664,13 +688,21 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* As needed perform madvise */
if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
- ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
- TEST_ASSERT(ret == 0, "madvise failed,\n"
- " addr: %p\n"
- " length: 0x%lx\n"
- " src_type: %x",
- region->host_mem, npages * vm->page_size, src_type);
+ struct stat statbuf;
+
+ ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+ "stat /sys/kernel/mm/transparent_hugepage");
+
+ TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
+ "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
+
+ if (ret == 0) {
+ ret = madvise(region->host_mem, npages * vm->page_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
+ region->host_mem, npages * vm->page_size, src_type);
+ }
}
region->unused_phy_pages = sparsebit_alloc();
@@ -1195,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
do {
rc = ioctl(vcpu->fd, KVM_RUN, NULL);
} while (rc == -1 && errno == EINTR);
+
+ assert_on_unhandled_exception(vm, vcpuid);
+
return rc;
}
@@ -1252,6 +1287,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
}
/*
+ * VM VCPU Get Reg List
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ *
+ * Output Args:
+ * None
+ *
+ * Return:
+ * A pointer to an allocated struct kvm_reg_list
+ *
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls
+ */
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
+ int ret;
+
+ ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, &reg_list_n);
+ TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
+ reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+ reg_list->n = reg_list_n.n;
+ vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list);
+ return reg_list;
+}
+
+/*
* VM VCPU Regs Get
*
* Input Args:
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 2ef446520748..f07d383d03a1 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -50,6 +50,8 @@ struct kvm_vm {
vm_paddr_t pgd;
vm_vaddr_t gdt;
vm_vaddr_t tss;
+ vm_vaddr_t idt;
+ vm_vaddr_t handlers;
};
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index a88c5d665725..7349bb2e1a24 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index fd589dc9bfab..9d3b0f15249a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 689e97c27ee2..8e04c0b1608e 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,10 +4,13 @@
*
* Copyright (C) 2020, Google LLC.
*/
-#include <stdlib.h>
+
+#include <assert.h>
#include <ctype.h>
#include <limits.h>
-#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+
#include "test_util.h"
/*
@@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
+struct timespec timespec_diff_now(struct timespec start)
+{
+ struct timespec end;
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ return timespec_sub(end, start);
+}
+
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+ int64_t ns = timespec_to_ns(ts) / divisor;
+
+ return timespec_add_ns((struct timespec){0}, ns);
+}
+
void print_skip(const char *fmt, ...)
{
va_list ap;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644
index 000000000000..aaf7bc7d2ce1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+
+ push %rdi
+ push %rsi
+ push %rbp
+ push %rbx
+ push %rdx
+ push %rcx
+ push %rax
+ mov %rsp, %rdi
+
+ call route_exception
+
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rbx
+ pop %rbp
+ pop %rsi
+ pop %rdi
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ /* Discard vector and error code. */
+ add $16, %rsp
+ iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+ vector = \from
+ .rept \to - \from + 1
+ .align 8
+
+ /* Fetch current address and append it to idt_handlers. */
+ current_handler = .
+.pushsection .rodata
+.quad current_handler
+.popsection
+
+ .if ! \has_error
+ pushq $0
+ .endif
+ pushq $vector
+ jmp handle_exception
+ vector = vector + 1
+ .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+ HANDLERS has_error=0 from=0 to=7
+ HANDLERS has_error=1 from=8 to=8
+ HANDLERS has_error=0 from=9 to=9
+ HANDLERS has_error=1 from=10 to=14
+ HANDLERS has_error=0 from=15 to=16
+ HANDLERS has_error=1 from=17 to=17
+ HANDLERS has_error=0 from=18 to=255
+
+.section .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f6eb34eaa0d2..d10c5c05bdf0 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -12,9 +12,18 @@
#include "../kvm_util_internal.h"
#include "processor.h"
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
+
/* Minimum physical address used for virtual translation tables. */
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+vm_vaddr_t exception_handlers;
+
/* Virtual translation table structure declarations */
struct pageMapL4Entry {
uint64_t present:1;
@@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->limit0 = segp->limit & 0xFFFF;
desc->base0 = segp->base & 0xFFFF;
desc->base1 = segp->base >> 16;
- desc->s = segp->s;
desc->type = segp->type;
+ desc->s = segp->s;
desc->dpl = segp->dpl;
desc->p = segp->present;
desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
desc->l = segp->l;
desc->db = segp->db;
desc->g = segp->g;
@@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
+ kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
break;
@@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
*va_bits = (entry->eax >> 8) & 0xff;
}
}
+
+struct idt_entry {
+ uint16_t offset0;
+ uint16_t selector;
+ uint16_t ist : 3;
+ uint16_t : 5;
+ uint16_t type : 4;
+ uint16_t : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t offset1;
+ uint32_t offset2; uint32_t reserved;
+};
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
+
+void kvm_exit_unexpected_vector(uint32_t value)
+{
+ outl(UNEXPECTED_VECTOR_PORT, value);
+}
+
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
+
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
+ }
+
+ kvm_exit_unexpected_vector(regs->vector);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ extern void *idt_handlers;
+ int i;
+
+ vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0);
+ vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0);
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+ DEFAULT_CODE_SELECTOR);
+}
+
+void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct kvm_sregs sregs;
+
+ vcpu_sregs_get(vm, vcpuid, &sregs);
+ sregs.idt.base = vm->idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+ kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+ vcpu_sregs_set(vm, vcpuid, &sregs);
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_handle_exception(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO
+ && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT
+ && vcpu_state(vm, vcpuid)->io.size == 4) {
+ /* Grab pointer to io data */
+ uint32_t *data = (void *)vcpu_state(vm, vcpuid)
+ + vcpu_state(vm, vcpuid)->io.data_offset;
+
+ TEST_ASSERT(false,
+ "Unexpected vectored event in guest (vector:0x%x)",
+ *data);
+ }
+}
+
+bool set_cpuid(struct kvm_cpuid2 *cpuid,
+ struct kvm_cpuid_entry2 *ent)
+{
+ int i;
+
+ for (i = 0; i < cpuid->nent; i++) {
+ struct kvm_cpuid_entry2 *cur = &cpuid->entries[i];
+
+ if (cur->function != ent->function || cur->index != ent->index)
+ continue;
+
+ memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2));
+ return true;
+ }
+
+ return false;
+}
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3)
+{
+ uint64_t r;
+
+ asm volatile("vmcall"
+ : "=a"(r)
+ : "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+ return r;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index da4d89ad5419..a3489973e290 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
struct kvm_run *run = vcpu_state(vm, vcpu_id);
struct ucall ucall = {};
+ if (uc)
+ memset(uc, 0, sizeof(*uc));
+
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index f1e00d43eea2..2448b30e8efa 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -542,3 +542,12 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint32_t eptp_memslot)
+{
+ vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(),
+ 0x10000, 0, 0);
+ vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+ vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000000..b10a27485bad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+extern unsigned char rdmsr_start;
+extern unsigned char rdmsr_end;
+
+static u64 do_rdmsr(u32 idx)
+{
+ u32 lo, hi;
+
+ asm volatile("rdmsr_start: rdmsr;"
+ "rdmsr_end:"
+ : "=a"(lo), "=c"(hi)
+ : "c"(idx));
+
+ return (((u64) hi) << 32) | lo;
+}
+
+extern unsigned char wrmsr_start;
+extern unsigned char wrmsr_end;
+
+static void do_wrmsr(u32 idx, u64 val)
+{
+ u32 lo, hi;
+
+ lo = val;
+ hi = val >> 32;
+
+ asm volatile("wrmsr_start: wrmsr;"
+ "wrmsr_end:"
+ : : "a"(lo), "c"(idx), "d"(hi));
+}
+
+static int nr_gp;
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ unsigned char *rip = (unsigned char *)regs->rip;
+ bool r, w;
+
+ r = rip == &rdmsr_start;
+ w = rip == &wrmsr_start;
+ GUEST_ASSERT(r || w);
+
+ nr_gp++;
+
+ if (r)
+ regs->rip = (uint64_t)&rdmsr_end;
+ else
+ regs->rip = (uint64_t)&wrmsr_end;
+}
+
+struct msr_data {
+ uint32_t idx;
+ const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+ TEST_MSR(MSR_KVM_SYSTEM_TIME),
+ TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+ TEST_MSR(MSR_KVM_WALL_CLOCK),
+ TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+ TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+ TEST_MSR(MSR_KVM_STEAL_TIME),
+ TEST_MSR(MSR_KVM_PV_EOI_EN),
+ TEST_MSR(MSR_KVM_POLL_CONTROL),
+ TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+ TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+ PR_MSR(msr);
+ do_rdmsr(msr->idx);
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+
+ nr_gp = 0;
+ do_wrmsr(msr->idx, 0);
+ GUEST_ASSERT(READ_ONCE(nr_gp) == 1);
+ nr_gp = 0;
+}
+
+struct hcall_data {
+ uint64_t nr;
+ const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+ TEST_HCALL(KVM_HC_KICK_CPU),
+ TEST_HCALL(KVM_HC_SEND_IPI),
+ TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+ uint64_t r;
+
+ PR_HCALL(hc);
+ r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+ GUEST_ASSERT(r == -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+ test_msr(&msrs_to_test[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+ test_hcall(&hcalls_to_test[i]);
+ }
+
+ GUEST_DONE();
+}
+
+static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid)
+{
+ struct kvm_cpuid_entry2 ent = {0};
+
+ ent.function = KVM_CPUID_FEATURES;
+ TEST_ASSERT(set_cpuid(cpuid, &ent),
+ "failed to clear KVM_CPUID_FEATURES leaf");
+}
+
+static void pr_msr(struct ucall *uc)
+{
+ struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+ pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+ struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+ pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+ __FILE__, uc->args[1]);
+}
+
+#define VCPU_ID 0
+
+static void enter_guest(struct kvm_vm *vm)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int r;
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (true) {
+ r = _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_PR_MSR:
+ pr_msr(&uc);
+ break;
+ case UCALL_PR_HCALL:
+ pr_hcall(&uc);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_enable_cap cap = {0};
+ struct kvm_cpuid2 *best;
+ struct kvm_vm *vm;
+
+ if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) {
+ pr_info("will skip kvm paravirt restriction tests.\n");
+ return 0;
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+ cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID;
+ cap.args[0] = 1;
+ vcpu_enable_cap(vm, VCPU_ID, &cap);
+
+ best = kvm_get_supported_cpuid();
+ clear_kvm_cpuid_features(best);
+ vcpu_set_cpuid(vm, VCPU_ID, best);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+ vm_handle_exception(vm, GP_VECTOR, guest_gp_handler);
+
+ enter_guest(vm);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
new file mode 100644
index 000000000000..1f65342d6cb7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define VCPU_ID 0
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+ vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+ /* Try to launch L2 with the memory-backed APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+ /* Try to resume L2 with the unbacked APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long apic_access_addr = ~0ul;
+ unsigned int paddr_width;
+ unsigned int vaddr_width;
+ vm_vaddr_t vmx_pages_gva;
+ unsigned long high_gpa;
+ struct vmx_pages *vmx;
+ bool done = false;
+
+ nested_vmx_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ kvm_get_cpu_address_width(&paddr_width, &vaddr_width);
+ high_gpa = (1ul << paddr_width) - getpagesize();
+ if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) {
+ print_skip("No unbacked physical page available");
+ exit(KSFT_SKIP);
+ }
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm, 0);
+ vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa);
+
+ while (!done) {
+ volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ if (apic_access_addr == high_gpa) {
+ TEST_ASSERT(run->exit_reason ==
+ KVM_EXIT_INTERNAL_ERROR,
+ "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ TEST_ASSERT(run->internal.suberror ==
+ KVM_INTERNAL_ERROR_EMULATION,
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n",
+ run->internal.suberror);
+ break;
+ }
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ apic_access_addr = uc.args[1];
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+ }
+ }
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 30848ca36555..a5ce26d548e4 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -136,7 +136,7 @@ endif
ifeq ($(OVERRIDE_TARGETS),)
LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
index 0bc64a6d4c18..17f2d8415162 100644
--- a/tools/testing/selftests/mount/.gitignore
+++ b/tools/testing/selftests/mount/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
unprivileged-remount-test
+nosymfollow-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 026890744215..2d9454841644 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,7 +3,7 @@
CFLAGS = -Wall \
-O2
-TEST_PROGS := run_tests.sh
-TEST_GEN_FILES := unprivileged-remount-test
+TEST_PROGS := run_unprivileged_remount.sh run_nosymfollow.sh
+TEST_GEN_FILES := unprivileged-remount-test nosymfollow-test
include ../lib.mk
diff --git a/tools/testing/selftests/mount/nosymfollow-test.c b/tools/testing/selftests/mount/nosymfollow-test.c
new file mode 100644
index 000000000000..650d6d80a1d2
--- /dev/null
+++ b/tools/testing/selftests/mount/nosymfollow-test.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#ifndef MS_NOSYMFOLLOW
+# define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
+#endif
+
+#ifndef ST_NOSYMFOLLOW
+# define ST_NOSYMFOLLOW 0x2000 /* Do not follow symlinks */
+#endif
+
+#define DATA "/tmp/data"
+#define LINK "/tmp/symlink"
+#define TMP "/tmp"
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt,
+ va_list ap)
+{
+ ssize_t written;
+ char buf[4096];
+ int buf_len;
+ int fd;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0)
+ die("vsnprintf failed: %s\n", strerror(errno));
+
+ if (buf_len >= sizeof(buf))
+ die("vsnprintf output truncated\n");
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ die("open of %s failed: %s\n", filename, strerror(errno));
+ }
+
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ die("short write to %s\n", filename);
+ } else {
+ die("write to %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ }
+
+ if (close(fd) != 0)
+ die("close of %s failed: %s\n", filename, strerror(errno));
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void create_and_enter_ns(void)
+{
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ if (unshare(CLONE_NEWUSER) != 0)
+ die("unshare(CLONE_NEWUSER) failed: %s\n", strerror(errno));
+
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "0 %d 1", uid);
+ write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+ if (setgid(0) != 0)
+ die("setgid(0) failed %s\n", strerror(errno));
+ if (setuid(0) != 0)
+ die("setuid(0) failed %s\n", strerror(errno));
+
+ if (unshare(CLONE_NEWNS) != 0)
+ die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno));
+}
+
+static void setup_symlink(void)
+{
+ int data, err;
+
+ data = creat(DATA, O_RDWR);
+ if (data < 0)
+ die("creat failed: %s\n", strerror(errno));
+
+ err = symlink(DATA, LINK);
+ if (err < 0)
+ die("symlink failed: %s\n", strerror(errno));
+
+ if (close(data) != 0)
+ die("close of %s failed: %s\n", DATA, strerror(errno));
+}
+
+static void test_link_traversal(bool nosymfollow)
+{
+ int link;
+
+ link = open(LINK, 0, O_RDWR);
+ if (nosymfollow) {
+ if ((link != -1 || errno != ELOOP)) {
+ die("link traversal unexpected result: %d, %s\n",
+ link, strerror(errno));
+ }
+ } else {
+ if (link < 0)
+ die("link traversal failed: %s\n", strerror(errno));
+
+ if (close(link) != 0)
+ die("close of link failed: %s\n", strerror(errno));
+ }
+}
+
+static void test_readlink(void)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ bzero(buf, sizeof(buf));
+
+ ret = readlink(LINK, buf, sizeof(buf));
+ if (ret < 0)
+ die("readlink failed: %s\n", strerror(errno));
+ if (strcmp(buf, DATA) != 0)
+ die("readlink strcmp failed: '%s' '%s'\n", buf, DATA);
+}
+
+static void test_realpath(void)
+{
+ char *path = realpath(LINK, NULL);
+
+ if (!path)
+ die("realpath failed: %s\n", strerror(errno));
+ if (strcmp(path, DATA) != 0)
+ die("realpath strcmp failed\n");
+
+ free(path);
+}
+
+static void test_statfs(bool nosymfollow)
+{
+ struct statfs buf;
+ int ret;
+
+ ret = statfs(TMP, &buf);
+ if (ret)
+ die("statfs failed: %s\n", strerror(errno));
+
+ if (nosymfollow) {
+ if ((buf.f_flags & ST_NOSYMFOLLOW) == 0)
+ die("ST_NOSYMFOLLOW not set on %s\n", TMP);
+ } else {
+ if ((buf.f_flags & ST_NOSYMFOLLOW) != 0)
+ die("ST_NOSYMFOLLOW set on %s\n", TMP);
+ }
+}
+
+static void run_tests(bool nosymfollow)
+{
+ test_link_traversal(nosymfollow);
+ test_readlink();
+ test_realpath();
+ test_statfs(nosymfollow);
+}
+
+int main(int argc, char **argv)
+{
+ create_and_enter_ns();
+
+ if (mount("testing", TMP, "ramfs", 0, NULL) != 0)
+ die("mount failed: %s\n", strerror(errno));
+
+ setup_symlink();
+ run_tests(false);
+
+ if (mount("testing", TMP, "ramfs", MS_REMOUNT|MS_NOSYMFOLLOW, NULL) != 0)
+ die("remount failed: %s\n", strerror(errno));
+
+ run_tests(true);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/mount/run_nosymfollow.sh b/tools/testing/selftests/mount/run_nosymfollow.sh
new file mode 100755
index 000000000000..5fbbf03043a2
--- /dev/null
+++ b/tools/testing/selftests/mount/run_nosymfollow.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./nosymfollow-test
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_unprivileged_remount.sh
index 4ab8f507dcba..4ab8f507dcba 100755
--- a/tools/testing/selftests/mount/run_tests.sh
+++ b/tools/testing/selftests/mount/run_unprivileged_remount.sh
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index ef352477cac6..fa5fa425d148 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh
TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
+TEST_PROGS += bareudp.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
new file mode 100755
index 000000000000..f366cadbc5e8
--- /dev/null
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -0,0 +1,546 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test various bareudp tunnel configurations.
+#
+# The bareudp module allows to tunnel network protocols like IP or MPLS over
+# UDP, without adding any intermediate header. This scripts tests several
+# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting
+# IPv4, IPv6 or MPLS packets on the overlay).
+#
+# Network topology:
+#
+# * A chain of 4 network namespaces, connected with veth pairs. Each veth
+# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to
+# join its peer.
+#
+# * NS0 and NS3 are at the extremities of the chain. They have additional
+# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0
+# and NS3, so that they can communicate using these overlay IP addresses.
+# For IPv4 and IPv6 reachability tests, the route simply sets the peer's
+# veth address as gateway. For MPLS reachability tests, an MPLS header is
+# also pushed before the IP header.
+#
+# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to
+# encapsulate the traffic into UDP.
+#
+# +-----------------------------------------------------------------------+
+# | NS0 |
+# | |
+# | lo: |
+# | * IPv4 address: 192.0.2.100/32 |
+# | * IPv6 address: 2001:db8::100/128 |
+# | * IPv6 address: 2001:db8::200/128 |
+# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 |
+# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 |
+# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 |
+# | (encapsulated with MPLS label 203) |
+# | |
+# | veth01: |
+# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 |
+# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test)
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS1 |
+# | | |
+# | v |
+# | veth10: |
+# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 |
+# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 |
+# | |
+# | bareudp_ns1: |
+# | * Encapsulate IP or MPLS packets received on veth10 into UDP |
+# | and send the resulting packets through veth12. |
+# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) |
+# | received on veth12 and send the inner packets through veth10. |
+# | |
+# | veth12: |
+# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 |
+# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test), over UDP
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS2 |
+# | | |
+# | v |
+# | veth21: |
+# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 |
+# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 |
+# | |
+# | bareudp_ns2: |
+# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) |
+# | received on veth21 and send the inner packets through veth23. |
+# | * Encapsulate IP or MPLS packets received on veth23 into UDP |
+# | and send the resulting packets through veth21. |
+# | |
+# | veth23: |
+# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 |
+# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test)
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS3 |
+# | v |
+# | veth32: |
+# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 |
+# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 |
+# | |
+# | lo: |
+# | * IPv4 address: 192.0.2.103/32 |
+# | * IPv6 address: 2001:db8::103/128 |
+# | * IPv6 address: 2001:db8::203/128 |
+# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 |
+# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 |
+# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 |
+# | (encapsulated with MPLS label 200) |
+# | |
+# +-----------------------------------------------------------------------+
+
+ERR=4 # Return 4 by default, which is the SKIP code for kselftest
+PING6="ping"
+PAUSE_ON_FAIL="no"
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+readonly NS2=$(mktemp -u ns2-XXXXXXXX)
+readonly NS3=$(mktemp -u ns3-XXXXXXXX)
+
+# Exit the script after having removed the network namespaces it created
+#
+# Parameters:
+#
+# * The list of network namespaces to delete before exiting.
+#
+exit_cleanup()
+{
+ for ns in "$@"; do
+ ip netns delete "${ns}" 2>/dev/null || true
+ done
+
+ if [ "${ERR}" -eq 4 ]; then
+ echo "Error: Setting up the testing environment failed." >&2
+ fi
+
+ exit "${ERR}"
+}
+
+# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3)
+#
+# New namespaces are cleaned up manually in case of error, to ensure that only
+# namespaces created by this script are deleted.
+create_namespaces()
+{
+ ip netns add "${NS0}" || exit_cleanup
+ ip netns add "${NS1}" || exit_cleanup "${NS0}"
+ ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
+ ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+}
+
+# Configure a network interface using a host route
+#
+# Parameters
+#
+# * $1: the netns the network interface resides in,
+# * $2: the network interface name,
+# * $3: the local IPv4 address to assign to this interface,
+# * $4: the IPv4 address of the remote network interface,
+# * $5: the local IPv6 address to assign to this interface,
+# * $6: the IPv6 address of the remote network interface.
+#
+iface_config()
+{
+ local NS="${1}"; readonly NS
+ local DEV="${2}"; readonly DEV
+ local LOCAL_IP4="${3}"; readonly LOCAL_IP4
+ local PEER_IP4="${4}"; readonly PEER_IP4
+ local LOCAL_IP6="${5}"; readonly LOCAL_IP6
+ local PEER_IP6="${6}"; readonly PEER_IP6
+
+ ip -netns "${NS}" link set dev "${DEV}" up
+ ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}"
+ ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad
+}
+
+# Create base networking topology:
+#
+# * set up the loopback device in all network namespaces (NS0..NS3),
+# * set up a veth pair to connect each netns in sequence (NS0 with NS1,
+# NS1 with NS2, etc.),
+# * add and IPv4 and an IPv6 address on each veth interface,
+# * prepare the ingress qdiscs in the intermediate namespaces.
+#
+setup_underlay()
+{
+ for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
+ ip -netns "${ns}" link set dev lo up
+ done;
+
+ ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
+ ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
+ ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
+ iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128
+ iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128
+ iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128
+ iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128
+ iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128
+ iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128
+
+ tc -netns "${NS1}" qdisc add dev veth10 ingress
+ tc -netns "${NS2}" qdisc add dev veth23 ingress
+}
+
+# Set up the IPv4, IPv6 and MPLS overlays.
+#
+# Configuration is similar for all protocols:
+#
+# * add an overlay IP address on the loopback interface of each edge
+# namespace,
+# * route these IP addresses via the intermediate namespaces (for the MPLS
+# tests, this is also where MPLS encapsulation is done),
+# * add routes for these IP addresses (or MPLS labels) in the intermediate
+# namespaces.
+#
+# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be
+# done just before running the reachability tests.
+
+setup_overlay_ipv4()
+{
+ # Add the overlay IP addresses and route them through the veth devices
+ ip -netns "${NS0}" address add 192.0.2.100/32 dev lo
+ ip -netns "${NS3}" address add 192.0.2.103/32 dev lo
+ ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11
+ ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32
+
+ # Route the overlay addresses in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
+ ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
+ ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+
+ # The intermediate namespaces don't have routes for the reverse path,
+ # as it will be handled by tc. So we need to ensure that rp_filter is
+ # not going to block the traffic.
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+}
+
+setup_overlay_ipv6()
+{
+ # Add the overlay IP addresses and route them through the veth devices
+ ip -netns "${NS0}" address add 2001:db8::100/128 dev lo
+ ip -netns "${NS3}" address add 2001:db8::103/128 dev lo
+ ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11
+ ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32
+
+ # Route the overlay addresses in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10
+ ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33
+}
+
+setup_overlay_mpls()
+{
+ # Add specific overlay IP addresses, routed over MPLS
+ ip -netns "${NS0}" address add 2001:db8::200/128 dev lo
+ ip -netns "${NS3}" address add 2001:db8::203/128 dev lo
+ ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11
+ ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32
+
+ # Route the MPLS packets in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256
+ ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256
+ ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10
+ ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33
+}
+
+# Run "ping" from NS0 and print the result
+#
+# Parameters:
+#
+# * $1: the variant of ping to use (normally either "ping" or "ping6"),
+# * $2: the IP address to ping,
+# * $3: a human readable description of the purpose of the test.
+#
+# If the test fails and PAUSE_ON_FAIL is active, the user is given the
+# possibility to continue with the next test or to quit immediately.
+#
+ping_test_one()
+{
+ local PING="$1"; readonly PING
+ local IP="$2"; readonly IP
+ local MSG="$3"; readonly MSG
+ local RET
+
+ printf "TEST: %-60s " "${MSG}"
+
+ set +e
+ ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1
+ RET=$?
+ set -e
+
+ if [ "${RET}" -eq 0 ]; then
+ printf "[ OK ]\n"
+ else
+ ERR=1
+ printf "[FAIL]\n"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ printf "\nHit enter to continue, 'q' to quit\n"
+ read a
+ if [ "$a" = "q" ]; then
+ exit 1
+ fi
+ fi
+ fi
+}
+
+# Run reachability tests
+#
+# Parameters:
+#
+# * $1: human readable string describing the underlay protocol.
+#
+# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling
+# function.
+#
+ping_test()
+{
+ local UNDERLAY="$1"; readonly UNDERLAY
+ local MODE
+ local MSG
+
+ if [ "${MULTIPROTO}" = "multiproto" ]; then
+ MODE=" (multiproto mode)"
+ else
+ MODE=""
+ fi
+
+ if [ $IPV4 ]; then
+ ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}"
+ fi
+ if [ $IPV6 ]; then
+ ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}"
+ fi
+ if [ $MPLS_UC ]; then
+ ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}"
+ fi
+}
+
+# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6
+#
+# Parameters:
+#
+# * $1: the packet type (protocol) to be handled by bareudp,
+# * $2: a flag to activate or deactivate bareudp's "multiproto" mode.
+#
+test_overlay()
+{
+ local ETHERTYPE="$1"; readonly ETHERTYPE
+ local MULTIPROTO="$2"; readonly MULTIPROTO
+ local IPV4
+ local IPV6
+ local MPLS_UC
+
+ case "${ETHERTYPE}" in
+ "ipv4")
+ IPV4="ipv4"
+ if [ "${MULTIPROTO}" = "multiproto" ]; then
+ IPV6="ipv6"
+ else
+ IPV6=""
+ fi
+ MPLS_UC=""
+ ;;
+ "ipv6")
+ IPV6="ipv6"
+ IPV4=""
+ MPLS_UC=""
+ ;;
+ "mpls_uc")
+ MPLS_UC="mpls_uc"
+ IPV4=""
+ IPV6=""
+ ;;
+ *)
+ exit 1
+ ;;
+ esac
+ readonly IPV4
+ readonly IPV6
+ readonly MPLS_UC
+
+ # Create the bareudp devices in the intermediate namespaces
+ ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+ ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+
+ # IPv4 over UDPv4
+ if [ $IPV4 ]; then
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.103/32 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.100/32 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # IPv6 over UDPv4
+ if [ $IPV6 ]; then
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::103/128 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::100/128 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # MPLS (unicast) over UDPv4
+ if [ $MPLS_UC ]; then
+ ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1
+ ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1
+
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \
+ flower mpls_label 203 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \
+ flower mpls_label 200 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # Test IPv4 underlay
+ ping_test "UDPv4"
+
+ # Cleanup bareudp encapsulation instructions, as they were specific to
+ # the IPv4 underlay, before setting up and testing the IPv6 underlay
+ tc -netns "${NS1}" filter delete dev veth10 ingress
+ tc -netns "${NS2}" filter delete dev veth23 ingress
+
+ # IPv4 over UDPv6
+ if [ $IPV4 ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.103/32 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.100/32 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # IPv6 over UDPv6
+ if [ $IPV6 ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::103/128 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::100/128 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # MPLS (unicast) over UDPv6
+ if [ $MPLS_UC ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \
+ flower mpls_label 203 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \
+ flower mpls_label 200 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # Test IPv6 underlay
+ ping_test "UDPv6"
+
+ tc -netns "${NS1}" filter delete dev veth10 ingress
+ tc -netns "${NS2}" filter delete dev veth23 ingress
+ ip -netns "${NS1}" link delete bareudp_ns1
+ ip -netns "${NS2}" link delete bareudp_ns2
+}
+
+check_features()
+{
+ ip link help 2>&1 | grep -q bareudp
+ if [ $? -ne 0 ]; then
+ echo "Missing bareudp support in iproute2" >&2
+ exit_cleanup
+ fi
+
+ # Use ping6 on systems where ping doesn't handle IPv6
+ ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6"
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+check_features
+
+# Create namespaces before setting up the exit trap.
+# Otherwise, exit_cleanup_all() could delete namespaces that were not created
+# by this script.
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_underlay
+setup_overlay_ipv4
+setup_overlay_ipv6
+setup_overlay_mpls
+
+test_overlay ipv4 nomultiproto
+test_overlay ipv6 nomultiproto
+test_overlay ipv4 multiproto
+test_overlay mpls_uc nomultiproto
+
+if [ "${ERR}" -eq 1 ]; then
+ echo "Some tests failed." >&2
+else
+ ERR=0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 4d5df8e1eee7..614d5477365a 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y
CONFIG_NET_DROP_MONITOR=m
CONFIG_NETDEVSIM=m
CONFIG_NET_FOU=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_BAREUDP=m
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 88d2472ba151..675eff45b037 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,11 +1,37 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="reportleave_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
+ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
+ v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
+ v3exc_timeout_test v3star_ex_auto_add_test"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+ALL_GROUP="224.0.0.1"
+ALL_MAC="01:00:5e:00:00:01"
+
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
+MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30
+MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
+MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
+MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30
+MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+
source lib.sh
h1_create()
@@ -79,38 +105,7 @@ cleanup()
vrf_cleanup
}
-# return 0 if the packet wasn't seen on host2_if or 1 if it was
-mcast_packet_test()
-{
- local mac=$1
- local ip=$2
- local host1_if=$3
- local host2_if=$4
- local seen=0
-
- # Add an ACL on `host2_if` which will tell us whether the packet
- # was received by it or not.
- tc qdisc add dev $host2_if ingress
- tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
- flower dst_mac $mac action drop
-
- $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
- sleep 1
-
- tc -j -s filter show dev $host2_if ingress \
- | jq -e ".[] | select(.options.handle == 101) \
- | select(.options.actions[0].stats.packets == 1)" &> /dev/null
- if [[ $? -eq 0 ]]; then
- seen=1
- fi
-
- tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
- tc qdisc del dev $host2_if ingress
-
- return $seen
-}
-
-reportleave_test()
+v2reportleave_test()
{
RET=0
ip address add dev $h2 $TEST_GROUP/32 autojoin
@@ -118,12 +113,12 @@ reportleave_test()
sleep 5
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
- check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+ check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
- mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
- log_test "IGMP report $TEST_GROUP"
+ log_test "IGMPv2 report $TEST_GROUP"
RET=0
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
@@ -136,10 +131,424 @@ reportleave_test()
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
- mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
- log_test "IGMP leave $TEST_GROUP"
+ log_test "IGMPv2 leave $TEST_GROUP"
+}
+
+v3include_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+ ip link set dev br0 type bridge mcast_igmp_version 3
+ check_err $? "Could not change bridge IGMP version to 3"
+
+ $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+ check_err $? "Missing *,G entry with source list"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+v3exclude_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local pkt=$4
+ local X=("192.0.2.1" "192.0.2.2")
+ local Y=("192.0.2.20" "192.0.2.21")
+
+ v3include_prepare $host1_if $mac $group
+
+ $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.3\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+}
+
+v3cleanup()
+{
+ local port=$1
+ local group=$2
+
+ bridge mdb del dev br0 port $port grp $group
+ ip link set dev br0 type bridge mcast_igmp_version 2
+}
+
+v3include_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_allow_test()
+{
+ RET=0
+ local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> allow"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_include_test()
+{
+ RET=0
+ local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_exclude_test()
+{
+ RET=0
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_to_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.1")
+ local Y=("192.0.2.20" "192.0.2.30")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.21\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_allow_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+ local Y=("192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_include_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+ local Y=("192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.30")
+ local Y=("192.0.2.20")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
+ sleep 1
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_to_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.30")
+ local Y=("192.0.2.20")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+ sleep 1
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_block_test()
+{
+ RET=0
+ local X=("192.0.2.2" "192.0.2.3")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+ # make sure the lowered timers have expired (by default 2 seconds)
+ sleep 3
+ brmcast_check_sg_entries "block" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> block"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_block_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.30")
+ local Y=("192.0.2.20" "192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+ sleep 1
+ brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> block"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_timeout_test()
+{
+ RET=0
+ local X=("192.0.2.20" "192.0.2.30")
+
+ # GMI should be 3 seconds
+ ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+ ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 192.0.2.100
+
+ log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+ mcast_query_response_interval 1000
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3star_ex_auto_add_test()
+{
+ RET=0
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+ .port == \"$swp1\")" &>/dev/null
+ check_err $? "S,G entry for *,G port doesn't exist"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+ .port == \"$swp1\" and \
+ .flags[] == \"added_by_star_ex\")" &>/dev/null
+ check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+ brmcast_check_sg_fwding 1 192.0.2.3
+
+ log_test "IGMPv3 S,G port entry automatic add to a *,G port"
+
+ v3cleanup $swp1 $TEST_GROUP
+ v3cleanup $swp2 $TEST_GROUP
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
new file mode 100755
index 000000000000..ffdcfa87ca2b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -0,0 +1,558 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
+ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
+ mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
+ mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="ff02::cc"
+TEST_GROUP_MAC="33:33:00:00:00:cc"
+
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3
+MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\
+00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
+00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
+00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\
+05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
+00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
+00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
+00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
+02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
+00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
+00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30
+MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
+MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\
+00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\
+00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \
+ mcast_mld_version 2 mcast_startup_query_interval 300 \
+ mcast_querier 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ # make sure a query has been generated
+ sleep 5
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+mldv2include_prepare()
+{
+ local host1_if=$1
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+ ip link set dev br0 type bridge mcast_mld_version 2
+ check_err $? "Could not change bridge MLD version to 2"
+
+ $MZ $host1_if $MZPKT_IS_INC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+ check_err $? "Missing *,G entry with source list"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+mldv2exclude_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local pkt=$4
+ local X=("2001:db8:1::1" "2001:db8:1::2")
+ local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+ mldv2include_prepare $h1
+
+ $MZ $host1_if -c 1 $MZPKT_IS_EXC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::3\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists"
+}
+
+mldv2cleanup()
+{
+ local port=$1
+
+ bridge mdb del dev br0 port $port grp $TEST_GROUP
+ ip link set dev br0 type bridge mcast_mld_version 1
+}
+
+mldv2include_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+ mldv2include_prepare $h1
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_allow_test()
+{
+ RET=0
+ local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_ALLOW -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP include -> allow"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_is_include_test()
+{
+ RET=0
+ local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_INC2 -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP include -> is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_is_exclude_test()
+{
+ RET=0
+
+ mldv2exclude_prepare $h1
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP include -> is_exclude"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_to_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::1")
+ local Y=("2001:db8:1::20" "2001:db8:1::30")
+
+ mldv2include_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_TO_EXC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::21\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP include -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_allow_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+ local Y=("2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> allow"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_is_include_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+ local Y=("2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_INC3 -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_is_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::30")
+ local Y=("2001:db8:1::20")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q
+ sleep 1
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_to_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::30")
+ local Y=("2001:db8:1::20")
+
+ mldv2exclude_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_TO_EXC -q
+ sleep 1
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_block_test()
+{
+ RET=0
+ local X=("2001:db8:1::2" "2001:db8:1::3")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_BLOCK -q
+ # make sure the lowered timers have expired (by default 2 seconds)
+ sleep 3
+ brmcast_check_sg_entries "block" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 2001:db8:1::100
+
+ log_test "MLDv2 report $TEST_GROUP include -> block"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_block_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30")
+ local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_BLOCK -q
+ sleep 1
+ brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> block"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_timeout_test()
+{
+ RET=0
+ local X=("2001:db8:1::20" "2001:db8:1::30")
+
+ # GMI should be 3 seconds
+ ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+
+ mldv2exclude_prepare $h1
+ ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+ $MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 2001:db8:1::100
+
+ log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+ mcast_query_response_interval 1000
+
+ mldv2cleanup $swp1
+}
+
+mldv2star_ex_auto_add_test()
+{
+ RET=0
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h2 -c 1 $MZPKT_IS_INC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+ .port == \"$swp1\")" &>/dev/null
+ check_err $? "S,G entry for *,G port doesn't exist"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+ .port == \"$swp1\" and \
+ .flags[] == \"added_by_star_ex\")" &>/dev/null
+ check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+ brmcast_check_sg_fwding 1 2001:db8:1::3
+
+ log_test "MLDv2 S,G port entry automatic add to a *,G port"
+
+ mldv2cleanup $swp1
+ mldv2cleanup $swp2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 927f9ba49e08..98ea37d26c44 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1270,3 +1270,110 @@ tcpdump_show()
{
tcpdump -e -n -r $capfile 2>&1
}
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+ local mac=$1
+ local src_ip=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local seen=0
+ local tc_proto="ip"
+ local mz_v6arg=""
+
+ # basic check to see if we were passed an IPv4 address, if not assume IPv6
+ if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+ tc_proto="ipv6"
+ mz_v6arg="-6"
+ fi
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was received by it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
+ flower ip_proto udp dst_mac $mac action drop
+
+ $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -eq 0 ]]; then
+ seen=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $seen
+}
+
+brmcast_check_sg_entries()
+{
+ local report=$1; shift
+ local slist=("$@")
+ local sarg=""
+
+ for src in "${slist[@]}"; do
+ sarg="${sarg} and .source_list[].address == \"$src\""
+ done
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+ check_err $? "Wrong *,G entry source list after $report report"
+
+ for sgent in "${slist[@]}"; do
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+ check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+ done
+}
+
+brmcast_check_sg_fwding()
+{
+ local should_fwd=$1; shift
+ local sources=("$@")
+
+ for src in "${sources[@]}"; do
+ local retval=0
+
+ mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+ retval=$?
+ if [ $should_fwd -eq 1 ]; then
+ check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+ else
+ check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+ fi
+ done
+}
+
+brmcast_check_sg_state()
+{
+ local is_blocked=$1; shift
+ local sources=("$@")
+ local should_fail=1
+
+ if [ $is_blocked -eq 1 ]; then
+ should_fail=0
+ fi
+
+ for src in "${sources[@]}"; do
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+ .source_list[] |
+ select(.address == \"$src\") |
+ select(.timer == \"0.00\")" &>/dev/null
+ check_err_fail $should_fail $? "Entry $src has zero timer"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+ .flags[] == \"blocked\")" &>/dev/null
+ check_err_fail $should_fail $? "Entry $src has blocked flag"
+ done
+}
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 741a1c4f4ae8..0faaccd21447 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_COUNTER=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 08f53d86dedc..0d93b243695f 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -13,6 +13,24 @@ capture=0
TEST_COUNT=0
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
+# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
+CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 3 64,
+ 48 0 0 54,
+ 84 0 0 240,
+ 21 6 7 48,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 4 96,
+ 48 0 0 74,
+ 84 0 0 240,
+ 21 0 1 48,
+ 6 0 0 65535,
+ 6 0 0 0"
+
init()
{
capout=$(mktemp)
@@ -82,6 +100,26 @@ reset_with_cookies()
done
}
+reset_with_add_addr_timeout()
+{
+ local ip="${1:-4}"
+ local tables
+
+ tables="iptables"
+ if [ $ip -eq 6 ]; then
+ tables="ip6tables"
+ fi
+
+ reset
+
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+ ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+ -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+ -j DROP
+}
+
for arg in "$@"; do
if [ "$arg" = "-c" ]; then
capture=1
@@ -94,6 +132,17 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
+iptables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run all tests without iptables tool"
+ exit $ksft_skip
+fi
+
+ip6tables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run all tests without ip6tables tool"
+ exit $ksft_skip
+fi
check_transfer()
{
@@ -135,6 +184,7 @@ do_transfer()
connect_addr="$5"
rm_nr_ns1="$6"
rm_nr_ns2="$7"
+ speed="$8"
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
@@ -159,7 +209,7 @@ do_transfer()
sleep 1
fi
- if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+ if [ $speed = "fast" ]; then
mptcp_connect="./mptcp_connect -j"
else
mptcp_connect="./mptcp_connect -r"
@@ -250,26 +300,13 @@ run_tests()
listener_ns="$1"
connector_ns="$2"
connect_addr="$3"
+ rm_nr_ns1="${4:-0}"
+ rm_nr_ns2="${5:-0}"
+ speed="${6:-fast}"
lret=0
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
- lret=$?
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
- fi
-}
-
-run_remove_tests()
-{
- listener_ns="$1"
- connector_ns="$2"
- connect_addr="$3"
- rm_nr_ns1="$4"
- rm_nr_ns2="$5"
- lret=0
-
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \
+ ${rm_nr_ns1} ${rm_nr_ns2} ${speed}
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
chk_add_nr 1 1
+# add_addr timeout
+reset_with_add_addr_timeout
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1 0 0 slow
+chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1
+chk_add_nr 4 0
+
# single subflow, remove
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl limits 0 1
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+run_tests $ns1 $ns2 10.0.1.1 0 1 slow
chk_join_nr "remove single subflow" 1 1 1
chk_rm_nr 1 1
@@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl limits 0 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+run_tests $ns1 $ns2 10.0.1.1 0 2 slow
chk_join_nr "remove multiple subflows" 2 2 2
chk_rm_nr 2 2
@@ -515,7 +561,7 @@ reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+run_tests $ns1 $ns2 10.0.1.1 1 0 slow
chk_join_nr "remove single address" 1 1 1
chk_add_nr 1 1
chk_rm_nr 0 0
@@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2
ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 2
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+run_tests $ns1 $ns2 10.0.1.1 1 1 slow
chk_join_nr "remove subflow and signal" 2 2 2
chk_add_nr 1 1
chk_rm_nr 1 1
@@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
ip netns exec $ns2 ./pm_nl_ctl limits 1 3
ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+run_tests $ns1 $ns2 10.0.1.1 1 2 slow
chk_join_nr "remove subflows and signal" 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 6bbf69a28e12..464e31eabc73 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -355,7 +355,7 @@ setup_fou_or_gue() {
encap="${3}"
if [ "${outer}" = "4" ]; then
- modprobe fou || return 2
+ modprobe fou || return $ksft_skip
a_addr="${prefix4}.${a_r1}.1"
b_addr="${prefix4}.${b_r1}.1"
if [ "${inner}" = "4" ]; then
@@ -366,7 +366,7 @@ setup_fou_or_gue() {
ipproto="41"
fi
else
- modprobe fou6 || return 2
+ modprobe fou6 || return $ksft_skip
a_addr="${prefix6}:${a_r1}::1"
b_addr="${prefix6}:${b_r1}::1"
if [ "${inner}" = "4" ]; then
@@ -380,8 +380,8 @@ setup_fou_or_gue() {
fi
fi
- run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
- run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+ run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
+ run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
@@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() {
fi
fi
- run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+ run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
run_cmd ${ns_a} ip link set ip_a up
@@ -713,7 +713,7 @@ setup_routing() {
}
setup_bridge() {
- run_cmd ${ns_a} ip link add br0 type bridge || return 2
+ run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
run_cmd ${ns_a} ip link set br0 up
run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -765,7 +765,7 @@ setup_ovs_vxlan6() {
}
setup_ovs_bridge() {
- run_cmd ovs-vsctl add-br ovs_br0 || return 2
+ run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
run_cmd ip link set ovs_br0 up
run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -887,7 +887,7 @@ check_pmtu_value() {
test_pmtu_ipvX() {
family=${1}
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
ll_mtu=4000
if [ ${outer_family} -eq 4 ]; then
- setup namespaces routing ${type}4 || return 2
+ setup namespaces routing ${type}4 || return $ksft_skip
# IPv4 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
else
- setup namespaces routing ${type}6 || return 2
+ setup namespaces routing ${type}6 || return $ksft_skip
# IPv6 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
fi
@@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
ll_mtu=4000
if [ ${outer_family} -eq 4 ]; then
- setup namespaces routing bridge bridged_${type}4 || return 2
+ setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
# IPv4 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
else
- setup namespaces routing bridge bridged_${type}6 || return 2
+ setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
# IPv6 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
fi
@@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
ll_mtu=4000
if [ ${outer_family} -eq 4 ]; then
- setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+ setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
# IPv4 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
else
- setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+ setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
# IPv6 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
fi
@@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() {
encap=${3}
ll_mtu=4000
- setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+ setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
@@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() {
outer=${2}
ll_mtu=4000
- setup namespaces routing ip${inner}ip${outer} || return 2
+ setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
trace "${ns_a}" ip_a "${ns_b}" ip_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
@@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() {
}
test_pmtu_vti4_exception() {
- setup namespaces veth vti4 xfrm4 || return 2
+ setup namespaces veth vti4 xfrm4 || return $ksft_skip
trace "${ns_a}" veth_a "${ns_b}" veth_b \
"${ns_a}" vti4_a "${ns_b}" vti4_b
@@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() {
}
test_pmtu_vti6_exception() {
- setup namespaces veth vti6 xfrm6 || return 2
+ setup namespaces veth vti6 xfrm6 || return $ksft_skip
trace "${ns_a}" veth_a "${ns_b}" veth_b \
"${ns_a}" vti6_a "${ns_b}" vti6_b
fail=0
@@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() {
}
test_pmtu_vti4_default_mtu() {
- setup namespaces veth vti4 || return 2
+ setup namespaces veth vti4 || return $ksft_skip
# Check that MTU of vti device is MTU of veth minus IPv4 header length
veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() {
}
test_pmtu_vti6_default_mtu() {
- setup namespaces veth vti6 || return 2
+ setup namespaces veth vti6 || return $ksft_skip
# Check that MTU of vti device is MTU of veth minus IPv6 header length
veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() {
}
test_pmtu_vti4_link_add_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
- [ $? -ne 0 ] && err " vti not supported" && return 2
+ [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip
run_cmd ${ns_a} ip link del vti4_a
fail=0
@@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() {
}
test_pmtu_vti6_link_add_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
- [ $? -ne 0 ] && err " vti6 not supported" && return 2
+ [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip
run_cmd ${ns_a} ip link del vti6_a
fail=0
@@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() {
}
test_pmtu_vti6_link_change_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
- [ $? -ne 0 ] && err " dummy not supported" && return 2
+ [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip
run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
run_cmd ${ns_a} ip link set dummy0 up
run_cmd ${ns_a} ip link set dummy1 up
@@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() {
encap="vxlan"
ll_mtu=4000
- check_command taskset || return 2
+ check_command taskset || return $ksft_skip
cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
- setup namespaces routing ${encap}${outer} || return 2
+ setup namespaces routing ${encap}${outer} || return $ksft_skip
trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
@@ -1644,7 +1644,7 @@ run_test() {
fi
err_flush
exit 1
- elif [ $ret -eq 2 ]; then
+ elif [ $ret -eq $ksft_skip ]; then
printf "TEST: %-60s [SKIP]\n" "${tdesc}"
err_flush
fi
@@ -1652,7 +1652,19 @@ run_test() {
return $ret
)
ret=$?
- [ $ret -ne 0 ] && exitcode=1
+ case $ret in
+ 0)
+ all_skipped=false
+ [ $exitcode=$ksft_skip ] && exitcode=0
+ ;;
+ $ksft_skip)
+ [ $all_skipped = true ] && exitcode=$ksft_skip
+ ;;
+ *)
+ all_skipped=false
+ exitcode=1
+ ;;
+ esac
return $ret
}
@@ -1667,7 +1679,7 @@ run_test_nh() {
}
test_list_flush_ipv4_exception() {
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -1721,7 +1733,7 @@ test_list_flush_ipv4_exception() {
}
test_list_flush_ipv6_exception() {
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -1786,6 +1798,7 @@ usage() {
#
exitcode=0
desc=0
+all_skipped=true
while getopts :ptv o
do
@@ -1840,7 +1853,7 @@ for t in ${tests}; do
if [ $run_this -eq 1 ]; then
run_test "${name}" "${desc}"
# if test was skipped no need to retry with nexthop objects
- [ $? -eq 2 ] && rerun_nh=0
+ [ $? -eq $ksft_skip ] && rerun_nh=0
if [ "${rerun_nh}" = "1" ]; then
run_test_nh "${name}" "${desc}"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 2c522f7a0aec..db4521335722 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -56,12 +56,15 @@
#define RING_NUM_FRAMES 20
+static uint32_t cfg_max_num_members;
+
/* Open a socket in a given fanout mode.
* @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
{
struct sockaddr_ll addr = {0};
- int fd, val;
+ struct fanout_args args;
+ int fd, val, err;
fd = socket(PF_PACKET, SOCK_RAW, 0);
if (fd < 0) {
@@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
exit(1);
}
- val = (((int) typeflags) << 16) | group_id;
- if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+ if (cfg_max_num_members) {
+ args.id = group_id;
+ args.type_flags = typeflags;
+ args.max_num_members = cfg_max_num_members;
+ err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
+ sizeof(args));
+ } else {
+ val = (((int) typeflags) << 16) | group_id;
+ err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
+ sizeof(val));
+ }
+ if (err) {
if (close(fd)) {
perror("close packet");
exit(1);
@@ -286,6 +299,56 @@ static void test_control_group(void)
}
}
+/* Test illegal max_num_members values */
+static void test_control_group_max_num_members(void)
+{
+ int fds[3];
+
+ fprintf(stderr, "test: control multiple sockets, max_num_members\n");
+
+ /* expected failure on greater than PACKET_FANOUT_MAX */
+ cfg_max_num_members = (1 << 16) + 1;
+ if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+ fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
+ exit(1);
+ }
+
+ cfg_max_num_members = 256;
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed open\n");
+ exit(1);
+ }
+
+ /* expected failure on joining group with different max_num_members */
+ cfg_max_num_members = 257;
+ if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+ fprintf(stderr, "ERROR: set different max_num_members\n");
+ exit(1);
+ }
+
+ /* success on joining group with same max_num_members */
+ cfg_max_num_members = 256;
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+
+ /* success on joining group with max_num_members unspecified */
+ cfg_max_num_members = 0;
+ fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[2] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+
+ if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
/* Test creating a unique fanout group ids */
static void test_unique_fanout_group_ids(void)
{
@@ -426,8 +489,11 @@ int main(int argc, char **argv)
test_control_single();
test_control_group();
+ test_control_group_max_num_members();
test_unique_fanout_group_ids();
+ /* PACKET_FANOUT_MAX */
+ cfg_max_num_members = 1 << 16;
/* find a set of ports that do not collide onto the same socket */
ret = test_datapath(PACKET_FANOUT_HASH, port_off,
expect_hash[0], expect_hash[1]);
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index f4bb4fef0f39..21091be70688 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -59,7 +59,8 @@ static void usage(const char *error)
" SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
" SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
" SIOCGSTAMP - check last socket time stamp\n"
- " SIOCGSTAMPNS - more accurate socket time stamp\n");
+ " SIOCGSTAMPNS - more accurate socket time stamp\n"
+ " PTPV2 - use PTPv2 messages\n");
exit(1);
}
@@ -115,13 +116,28 @@ static const unsigned char sync[] = {
0x00, 0x00, 0x00, 0x00
};
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+static const unsigned char sync_v2[] = {
+ 0x00, 0x02, 0x00, 0x2C,
+ 0x00, 0x00, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xFF,
+ 0xFE, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2)
{
+ size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+ const void *sync_p = ptpv2 ? sync_v2 : sync;
struct timeval now;
int res;
- res = sendto(sock, sync, sizeof(sync), 0,
- addr, addr_len);
+ res = sendto(sock, sync_p, sync_len, 0, addr, addr_len);
gettimeofday(&now, 0);
if (res < 0)
printf("%s: %s\n", "send", strerror(errno));
@@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
static void printpacket(struct msghdr *msg, int res,
char *data,
int sock, int recvmsg_flags,
- int siocgstamp, int siocgstampns)
+ int siocgstamp, int siocgstampns, int ptpv2)
{
struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+ size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+ const void *sync_p = ptpv2 ? sync_v2 : sync;
struct cmsghdr *cmsg;
struct timeval tv;
struct timespec ts;
@@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res,
"probably SO_EE_ORIGIN_TIMESTAMPING"
#endif
);
- if (res < sizeof(sync))
+ if (res < sync_len)
printf(" => truncated data?!");
- else if (!memcmp(sync, data + res - sizeof(sync),
- sizeof(sync)))
+ else if (!memcmp(sync_p, data + res - sync_len, sync_len))
printf(" => GOT OUR DATA BACK (HURRAY!)");
break;
}
@@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res,
}
static void recvpacket(int sock, int recvmsg_flags,
- int siocgstamp, int siocgstampns)
+ int siocgstamp, int siocgstampns, int ptpv2)
{
char data[256];
struct msghdr msg;
@@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags,
} else {
printpacket(&msg, res, data,
sock, recvmsg_flags,
- siocgstamp, siocgstampns);
+ siocgstamp, siocgstampns, ptpv2);
}
}
@@ -300,6 +317,7 @@ int main(int argc, char **argv)
int siocgstamp = 0;
int siocgstampns = 0;
int ip_multicast_loop = 0;
+ int ptpv2 = 0;
char *interface;
int i;
int enabled = 1;
@@ -335,6 +353,8 @@ int main(int argc, char **argv)
siocgstampns = 1;
else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
ip_multicast_loop = 1;
+ else if (!strcasecmp(argv[i], "PTPV2"))
+ ptpv2 = 1;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
@@ -369,6 +389,7 @@ int main(int argc, char **argv)
HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
hwconfig.rx_filter =
(so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
hwconfig_requested = hwconfig;
if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
@@ -496,16 +517,16 @@ int main(int argc, char **argv)
printf("has error\n");
recvpacket(sock, 0,
siocgstamp,
- siocgstampns);
+ siocgstampns, ptpv2);
recvpacket(sock, MSG_ERRQUEUE,
siocgstamp,
- siocgstampns);
+ siocgstampns, ptpv2);
}
} else {
/* write one packet */
sendpacket(sock,
(struct sockaddr *)&addr,
- sizeof(addr));
+ sizeof(addr), ptpv2);
next.tv_sec += 5;
continue;
}
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index bb11de90c0c9..f6f2965e17af 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,3 +4,4 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 7758c98be015..0930e2411dfb 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -204,7 +204,10 @@ TEST_F(child, fetch_fd)
fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
ASSERT_GE(fd, 0);
- EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
+ ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd);
+ if (ret < 0 && errno == ENOSYS)
+ SKIP(return, "kcmp() syscall not supported");
+ EXPECT_EQ(ret, 0);
ret = fcntl(fd, F_GETFD);
ASSERT_GE(ret, 0);
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index b9fe75fc3e51..8a59438ccc78 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -6,7 +6,6 @@
#include <inttypes.h>
#include <limits.h>
#include <linux/types.h>
-#include <linux/wait.h>
#include <sched.h>
#include <signal.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 4b115444dfe9..610811275357 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -3,7 +3,6 @@
#define _GNU_SOURCE
#include <errno.h>
#include <linux/types.h>
-#include <linux/wait.h>
#include <poll.h>
#include <signal.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 1f085b922c6e..6e2f2cd400ca 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,7 +16,6 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
-#include <linux/kcmp.h>
#include "pidfd.h"
#include "../clone3/clone3_selftests.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index c585aaa2acd8..529eb700ac26 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
test_name, PID_RECYCLE);
case PIDFD_SKIP:
- ksft_print_msg("%s test: Skipping test\n", test_name);
+ ksft_test_result_skip("%s test: Skipping test\n", test_name);
ret = 0;
break;
case PIDFD_XFAIL:
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 2a0503bc7e49..cb53a8b777e6 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -266,8 +266,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
}
rc = 0;
- /* offset = 0 no alignment fault, so skip */
- for (offset = 1; offset < 16; offset++) {
+ /*
+ * offset = 0 is aligned but tests the workaround for the P9N
+ * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s:
+ * Add workaround for P9 vector CI load issue")
+ */
+ for (offset = 0; offset < 16; offset++) {
width = 16; /* vsx == 16 bytes */
r = 0;
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
index 471e2aa28077..fb4fe9188806 100644
--- a/tools/testing/selftests/proc/proc-loadavg-001.c
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -14,7 +14,6 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
-#define _GNU_SOURCE
#include <errno.h>
#include <sched.h>
#include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
index 9f6d000c0245..8511dcfe67c7 100644
--- a/tools/testing/selftests/proc/proc-self-syscall.c
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -13,7 +13,6 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
index 30e2b7849089..e7ceabed7f51 100644
--- a/tools/testing/selftests/proc/proc-uptime-002.c
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -15,7 +15,6 @@
*/
// Test that values in /proc/uptime increment monotonically
// while shifting across CPUs.
-#define _GNU_SOURCE
#undef NDEBUG
#include <assert.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index bb543bf69d69..361235ad574b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -100,7 +100,7 @@
],
"cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
"expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower.*handle",
"matchCount": "1",
"teardown": [
@@ -119,7 +119,7 @@
],
"cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
"expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
"matchPattern": " dst_mac e4:11:22:11:4a:51",
"matchCount": "0",
"teardown": [
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index b4fd9a934654..3a5936cc10ab 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c
new file mode 100644
index 000000000000..6b2b9264e851
--- /dev/null
+++ b/tools/testing/selftests/timens/futex.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <linux/unistd.h>
+#include <linux/futex.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static int run_test(int clockid)
+{
+ int futex_op = FUTEX_WAIT_BITSET;
+ struct timespec timeout, end;
+ int val = 0;
+
+ if (clockid == CLOCK_REALTIME)
+ futex_op |= FUTEX_CLOCK_REALTIME;
+
+ clock_gettime(clockid, &timeout);
+ timeout.tv_nsec += NSEC_PER_SEC / 10; // 100ms
+ if (timeout.tv_nsec > NSEC_PER_SEC) {
+ timeout.tv_sec++;
+ timeout.tv_nsec -= NSEC_PER_SEC;
+ }
+
+ if (syscall(__NR_futex, &val, futex_op, 0,
+ &timeout, 0, FUTEX_BITSET_MATCH_ANY) >= 0) {
+ ksft_test_result_fail("futex didn't return ETIMEDOUT\n");
+ return 1;
+ }
+
+ if (errno != ETIMEDOUT) {
+ ksft_test_result_fail("futex didn't return ETIMEDOUT: %s\n",
+ strerror(errno));
+ return 1;
+ }
+
+ clock_gettime(clockid, &end);
+
+ if (end.tv_sec < timeout.tv_sec ||
+ (end.tv_sec == timeout.tv_sec && end.tv_nsec < timeout.tv_nsec)) {
+ ksft_test_result_fail("futex slept less than 100ms\n");
+ return 1;
+ }
+
+
+ ksft_test_result_pass("futex with the %d clockid\n", clockid);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int status, len, fd;
+ char buf[4096];
+ pid_t pid;
+ struct timespec mtime_now;
+
+ nscheck();
+
+ ksft_set_plan(2);
+
+ clock_gettime(CLOCK_MONOTONIC, &mtime_now);
+
+ if (unshare_timens())
+ return 1;
+
+ len = snprintf(buf, sizeof(buf), "%d %d 0",
+ CLOCK_MONOTONIC, 70 * 24 * 3600);
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("Unable to fork");
+ if (pid == 0) {
+ int ret = 0;
+
+ ret |= run_test(CLOCK_REALTIME);
+ ret |= run_test(CLOCK_MONOTONIC);
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return 0;
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("Unable to wait the child process");
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index d77f4829f1e0..74c69b75f6f5 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -316,6 +316,14 @@ pp sleep 3
n2 ping -W 1 -c 1 192.168.241.1
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+# Test that sk_bound_dev_if works
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2
+# What about when the mark changes and the packet must be rerouted?
+n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1
+n1 ping -c 1 -W 1 192.168.241.2 # First the boring case
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case
+n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1
+
# Test that onion routing works, even when it loops
n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
ip1 addr add 192.168.242.1/24 dev wg0
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index d531de13c95b..4eecb432a66c 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -18,10 +18,12 @@ CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
+CONFIG_NETFILTER_XT_MARK=y
CONFIG_NF_CONNTRACK_IPV4=y
CONFIG_NF_NAT_IPV4=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_MANGLE=y
CONFIG_IP_NF_NAT=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y