diff options
Diffstat (limited to 'tools')
148 files changed, 6398 insertions, 703 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index ba85bb23f060..1c17c3a24411 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -159,6 +159,21 @@ struct kvm_sync_regs { struct kvm_arch_memory_slot { }; +/* + * PMU filter structure. Describe a range of events with a particular + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. + */ +struct kvm_pmu_event_filter { + __u16 base_event; + __u16 nevents; + +#define KVM_PMU_EVENT_ALLOW 0 +#define KVM_PMU_EVENT_DENY 1 + + __u8 action; + __u8 pad[3]; +}; + /* for KVM_GET/SET_VCPU_EVENTS */ struct kvm_vcpu_events { struct { @@ -242,6 +257,15 @@ struct kvm_vcpu_events { #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2 + +/* + * Only two states can be presented by the host kernel: + * - NOT_REQUIRED: the guest doesn't need to do anything + * - NOT_AVAIL: the guest isn't mitigated (it can still use SSBS if available) + * + * All the other values are deprecated. The host still accepts all + * values (they are ABI), but will narrow them to the above two. + */ #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2) #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0 #define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1 @@ -329,6 +353,7 @@ struct kvm_vcpu_events { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/s390/include/uapi/asm/sie.h b/tools/arch/s390/include/uapi/asm/sie.h index 6ca1e68d7103..ede318653c87 100644 --- a/tools/arch/s390/include/uapi/asm/sie.h +++ b/tools/arch/s390/include/uapi/asm/sie.h @@ -29,7 +29,7 @@ { 0x13, "SIGP conditional emergency signal" }, \ { 0x15, "SIGP sense running" }, \ { 0x16, "SIGP set multithreading"}, \ - { 0x17, "SIGP store additional status ait address"} + { 0x17, "SIGP store additional status at address"} #define icpt_prog_codes \ { 0x0001, "Prog Operation" }, \ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2901d5df4366..dad350d42ecf 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -96,7 +96,7 @@ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -/* free ( 3*32+17) */ +#define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ @@ -236,6 +236,7 @@ #define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ #define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ #define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_SEV_ES ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ @@ -288,6 +289,7 @@ #define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ #define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ #define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -353,6 +355,7 @@ #define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ #define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ #define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ @@ -368,6 +371,7 @@ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 4ea8584682f9..5861d34f9771 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -56,6 +56,12 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif +#ifdef CONFIG_IOMMU_SUPPORT +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -75,7 +81,8 @@ #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 #define DISABLED_MASK15 0 -#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP) +#define DISABLED_MASK16 (DISABLE_PKU|DISABLE_OSPKE|DISABLE_LA57|DISABLE_UMIP| \ + DISABLE_ENQCMD) #define DISABLED_MASK17 0 #define DISABLED_MASK18 0 #define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 19) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 2859ee4f39a8..972a34d93505 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -257,6 +257,9 @@ #define MSR_IA32_LASTINTFROMIP 0x000001dd #define MSR_IA32_LASTINTTOIP 0x000001de +#define MSR_IA32_PASID 0x00000d93 +#define MSR_IA32_PASID_VALID BIT_ULL(31) + /* DEBUGCTLMSR bits (others vary by model): */ #define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ #define DEBUGCTLMSR_BTF_SHIFT 1 @@ -464,11 +467,15 @@ #define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_ICIBSEXTDCTL 0xc001103c #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 +#define MSR_AMD64_SEV_ES_ENABLED_BIT 1 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f @@ -857,11 +864,14 @@ #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a #define MSR_CORE_PERF_FIXED_CTR2 0x0000030b +#define MSR_CORE_PERF_FIXED_CTR3 0x0000030c #define MSR_CORE_PERF_FIXED_CTR_CTRL 0x0000038d #define MSR_CORE_PERF_GLOBAL_STATUS 0x0000038e #define MSR_CORE_PERF_GLOBAL_CTRL 0x0000038f #define MSR_CORE_PERF_GLOBAL_OVF_CTRL 0x00000390 +#define MSR_PERF_METRICS 0x00000329 + /* PERF_GLOBAL_OVF_CTL bits */ #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT 55 #define MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI (1ULL << MSR_CORE_PERF_GLOBAL_OVF_CTRL_TRACE_TOPA_PMI_BIT) diff --git a/tools/arch/x86/include/asm/required-features.h b/tools/arch/x86/include/asm/required-features.h index 6847d85400a8..3ff0d48469f2 100644 --- a/tools/arch/x86/include/asm/required-features.h +++ b/tools/arch/x86/include/asm/required-features.h @@ -54,7 +54,7 @@ #endif #ifdef CONFIG_X86_64 -#ifdef CONFIG_PARAVIRT +#ifdef CONFIG_PARAVIRT_XXL /* Paravirtualized systems may not have PSE or PGE available */ #define NEED_PSE 0 #define NEED_PGE 0 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 0780f97c1850..89e5f3d1bba8 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -192,6 +192,26 @@ struct kvm_msr_list { __u32 indices[0]; }; +/* Maximum size of any access bitmap in bytes */ +#define KVM_MSR_FILTER_MAX_BITMAP_SIZE 0x600 + +/* for KVM_X86_SET_MSR_FILTER */ +struct kvm_msr_filter_range { +#define KVM_MSR_FILTER_READ (1 << 0) +#define KVM_MSR_FILTER_WRITE (1 << 1) + __u32 flags; + __u32 nmsrs; /* number of msrs in bitmap */ + __u32 base; /* MSR index the bitmap starts at */ + __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */ +}; + +#define KVM_MSR_FILTER_MAX_RANGES 16 +struct kvm_msr_filter { +#define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) + __u32 flags; + struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; +}; struct kvm_cpuid_entry { __u32 function; diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 2e8a30f06c74..f1d8307454e0 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -29,6 +29,7 @@ #define SVM_EXIT_WRITE_DR6 0x036 #define SVM_EXIT_WRITE_DR7 0x037 #define SVM_EXIT_EXCP_BASE 0x040 +#define SVM_EXIT_LAST_EXCP 0x05f #define SVM_EXIT_INTR 0x060 #define SVM_EXIT_NMI 0x061 #define SVM_EXIT_SMI 0x062 @@ -76,10 +77,21 @@ #define SVM_EXIT_MWAIT_COND 0x08c #define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_RDPRU 0x08e +#define SVM_EXIT_INVPCID 0x0a2 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 +/* SEV-ES software-defined VMGEXIT events */ +#define SVM_VMGEXIT_MMIO_READ 0x80000001 +#define SVM_VMGEXIT_MMIO_WRITE 0x80000002 +#define SVM_VMGEXIT_NMI_COMPLETE 0x80000003 +#define SVM_VMGEXIT_AP_HLT_LOOP 0x80000004 +#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005 +#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0 +#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1 +#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff + #define SVM_EXIT_ERR -1 #define SVM_EXIT_REASONS \ @@ -171,6 +183,7 @@ { SVM_EXIT_MONITOR, "monitor" }, \ { SVM_EXIT_MWAIT, "mwait" }, \ { SVM_EXIT_XSETBV, "xsetbv" }, \ + { SVM_EXIT_INVPCID, "invpcid" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index a43a6f10b564..359960a8f1de 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -843,9 +843,14 @@ static int handle_perms(void) else p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'", capability_msg(bpf_caps, 0), +#ifdef CAP_BPF capability_msg(bpf_caps, 1), capability_msg(bpf_caps, 2), - capability_msg(bpf_caps, 3)); + capability_msg(bpf_caps, 3) +#else + "", "", "", "", "", "" +#endif /* CAP_BPF */ + ); goto exit_free; } diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index d942c1e3372c..acdb2c245f0a 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -940,7 +940,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, } if (*attach_type == BPF_FLOW_DISSECTOR) { - *mapfd = -1; + *mapfd = 0; return 0; } diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 4e3512f700c0..ce5b65e07ab1 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -70,7 +70,7 @@ int BPF_PROG(fentry_XXX) static inline void fexit_update_maps(u32 id, struct bpf_perf_event_value *after) { - struct bpf_perf_event_value *before, diff, *accum; + struct bpf_perf_event_value *before, diff; before = bpf_map_lookup_elem(&fentry_readings, &id); /* only account samples with a valid fentry_reading */ @@ -95,7 +95,7 @@ int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value readings[MAX_NUM_MATRICS]; u32 cpu = bpf_get_smp_processor_id(); - u32 i, one = 1, zero = 0; + u32 i, zero = 0; int err; u64 *count; diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index a04e81321c66..464873883396 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -185,7 +185,6 @@ int main(int argc, char *argv[]) main_test_libperl(); main_test_hello(); main_test_libelf(); - main_test_libelf_mmap(); main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index b9d4322e1e65..95c072b70d0e 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -27,18 +27,6 @@ #define __pure __attribute__((pure)) #endif #define noinline __attribute__((noinline)) -#ifdef __has_attribute -#if __has_attribute(disable_tail_calls) -#define __no_tail_call __attribute__((disable_tail_calls)) -#endif -#endif -#ifndef __no_tail_call -#if GCC_VERSION > 40201 -#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls"))) -#else -#define __no_tail_call -#endif -#endif #ifndef __packed #define __packed __attribute__((packed)) #endif diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 2b3f7353e891..d22a974372c0 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -47,9 +47,6 @@ #ifndef noinline #define noinline #endif -#ifndef __no_tail_call -#define __no_tail_call -#endif /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h index ab82c793c897..577f51436cf9 100644 --- a/tools/include/linux/objtool.h +++ b/tools/include/linux/objtool.h @@ -60,7 +60,7 @@ struct unwind_hint { * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(.discard.func_stack_frame_non_standard) \ + static void __used __section(".discard.func_stack_frame_non_standard") \ *__func_stack_frame_non_standard_##func = func #else /* __ASSEMBLY__ */ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index f2b5d72a46c2..2056318988f7 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -857,9 +857,11 @@ __SYSCALL(__NR_openat2, sys_openat2) __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) #define __NR_faccessat2 439 __SYSCALL(__NR_faccessat2, sys_faccessat2) +#define __NR_process_madvise 440 +__SYSCALL(__NR_process_madvise, sys_process_madvise) #undef __NR_syscalls -#define __NR_syscalls 440 +#define __NR_syscalls 441 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index 00546062e023..fa1f3d62f9a6 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -619,6 +619,12 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_PERF_REVISION 54 +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * timeline syncobj through drm_i915_gem_execbuffer_ext_timeline_fences. See + * I915_EXEC_USE_EXTENSIONS. + */ +#define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -1046,6 +1052,38 @@ struct drm_i915_gem_exec_fence { __u32 flags; }; +/** + * See drm_i915_gem_execbuffer_ext_timeline_fences. + */ +#define DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES 0 + +/** + * This structure describes an array of drm_syncobj and associated points for + * timeline variants of drm_syncobj. It is invalid to append this structure to + * the execbuf if I915_EXEC_FENCE_ARRAY is set. + */ +struct drm_i915_gem_execbuffer_ext_timeline_fences { + struct i915_user_extension base; + + /** + * Number of element in the handles_ptr & value_ptr arrays. + */ + __u64 fence_count; + + /** + * Pointer to an array of struct drm_i915_gem_exec_fence of length + * fence_count. + */ + __u64 handles_ptr; + + /** + * Pointer to an array of u64 values of length fence_count. Values + * must be 0 for a binary drm_syncobj. A Value of 0 for a timeline + * drm_syncobj is invalid as it turns a drm_syncobj into a binary one. + */ + __u64 values_ptr; +}; + struct drm_i915_gem_execbuffer2 { /** * List of gem_exec_object2 structs @@ -1062,8 +1100,14 @@ struct drm_i915_gem_execbuffer2 { __u32 num_cliprects; /** * This is a struct drm_clip_rect *cliprects if I915_EXEC_FENCE_ARRAY - * is not set. If I915_EXEC_FENCE_ARRAY is set, then this is a - * struct drm_i915_gem_exec_fence *fences. + * & I915_EXEC_USE_EXTENSIONS are not set. + * + * If I915_EXEC_FENCE_ARRAY is set, then this is a pointer to an array + * of struct drm_i915_gem_exec_fence and num_cliprects is the length + * of the array. + * + * If I915_EXEC_USE_EXTENSIONS is set, then this is a pointer to a + * single struct i915_user_extension and num_cliprects is 0. */ __u64 cliprects_ptr; #define I915_EXEC_RING_MASK (0x3f) @@ -1181,7 +1225,16 @@ struct drm_i915_gem_execbuffer2 { */ #define I915_EXEC_FENCE_SUBMIT (1 << 20) -#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_FENCE_SUBMIT << 1)) +/* + * Setting I915_EXEC_USE_EXTENSIONS implies that + * drm_i915_gem_execbuffer2.cliprects_ptr is treated as a pointer to an linked + * list of i915_user_extension. Each i915_user_extension node is the base of a + * larger structure. The list of supported structures are listed in the + * drm_i915_gem_execbuffer_ext enum. + */ +#define I915_EXEC_USE_EXTENSIONS (1 << 21) + +#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_USE_EXTENSIONS << 1)) #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff) #define i915_execbuffer2_set_context_id(eb2, context) \ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7875709ccfeb..e5de60336938 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -45,7 +45,6 @@ struct fscrypt_policy_v1 { __u8 flags; __u8 master_key_descriptor[FSCRYPT_KEY_DESCRIPTOR_SIZE]; }; -#define fscrypt_policy fscrypt_policy_v1 /* * Process-subscribed "logon" key description prefix and payload format. @@ -156,9 +155,9 @@ struct fscrypt_get_key_status_arg { __u32 __out_reserved[13]; }; -#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy) +#define FS_IOC_SET_ENCRYPTION_POLICY _IOR('f', 19, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_PWSALT _IOW('f', 20, __u8[16]) -#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy) +#define FS_IOC_GET_ENCRYPTION_POLICY _IOW('f', 21, struct fscrypt_policy_v1) #define FS_IOC_GET_ENCRYPTION_POLICY_EX _IOWR('f', 22, __u8[9]) /* size + version */ #define FS_IOC_ADD_ENCRYPTION_KEY _IOWR('f', 23, struct fscrypt_add_key_arg) #define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg) @@ -170,6 +169,7 @@ struct fscrypt_get_key_status_arg { /* old names; don't add anything new here! */ #ifndef __KERNEL__ +#define fscrypt_policy fscrypt_policy_v1 #define FS_KEY_DESCRIPTOR_SIZE FSCRYPT_KEY_DESCRIPTOR_SIZE #define FS_POLICY_FLAGS_PAD_4 FSCRYPT_POLICY_FLAGS_PAD_4 #define FS_POLICY_FLAGS_PAD_8 FSCRYPT_POLICY_FLAGS_PAD_8 diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 7d8eced6f459..ca41220b40b8 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -248,6 +248,8 @@ struct kvm_hyperv_exit { #define KVM_EXIT_IOAPIC_EOI 26 #define KVM_EXIT_HYPERV 27 #define KVM_EXIT_ARM_NISV 28 +#define KVM_EXIT_X86_RDMSR 29 +#define KVM_EXIT_X86_WRMSR 30 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -413,6 +415,17 @@ struct kvm_run { __u64 esr_iss; __u64 fault_ipa; } arm_nisv; + /* KVM_EXIT_X86_RDMSR / KVM_EXIT_X86_WRMSR */ + struct { + __u8 error; /* user -> kernel */ + __u8 pad[7]; +#define KVM_MSR_EXIT_REASON_INVAL (1 << 0) +#define KVM_MSR_EXIT_REASON_UNKNOWN (1 << 1) +#define KVM_MSR_EXIT_REASON_FILTER (1 << 2) + __u32 reason; /* kernel -> user */ + __u32 index; /* kernel -> user */ + __u64 data; /* kernel <-> user */ + } msr; /* Fix the size of the union. */ char padding[256]; }; @@ -1037,6 +1050,9 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SMALLER_MAXPHYADDR 185 #define KVM_CAP_S390_DIAG318 186 #define KVM_CAP_STEAL_TIME 187 +#define KVM_CAP_X86_USER_SPACE_MSR 188 +#define KVM_CAP_X86_MSR_FILTER 189 +#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #ifdef KVM_CAP_IRQ_ROUTING @@ -1538,6 +1554,9 @@ struct kvm_pv_cmd { /* Available with KVM_CAP_S390_PROTECTED */ #define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd) +/* Available with KVM_CAP_X86_MSR_FILTER */ +#define KVM_X86_SET_MSR_FILTER _IOW(KVMIO, 0xc6, struct kvm_msr_filter) + /* Secure Encrypted Virtualization command */ enum sev_cmd_id { /* Guest initialization commands */ diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h index 923cc162609c..f55bc680b5b0 100644 --- a/tools/include/uapi/linux/mman.h +++ b/tools/include/uapi/linux/mman.h @@ -27,6 +27,7 @@ #define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT #define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK +#define MAP_HUGE_16KB HUGETLB_FLAG_ENCODE_16KB #define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB #define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB #define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index 96a0240f23fe..dd8306ea336c 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -16,6 +16,7 @@ #define MS_REMOUNT 32 /* Alter flags of a mounted FS */ #define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ #define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ #define MS_NOATIME 1024 /* Do not update access times. */ #define MS_NODIRATIME 2048 /* Do not update directory access times */ #define MS_BIND 4096 diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3e5dcdd48a49..b95d3c485d27 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1196,7 +1196,7 @@ union perf_mem_data_src { #define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ /* 1 free */ -#define PERF_MEM_SNOOPX_SHIFT 38 +#define PERF_MEM_SNOOPX_SHIFT 38 /* locked instruction */ #define PERF_MEM_LOCK_NA 0x01 /* not available */ diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 07b4f8131e36..7f0827705c9a 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -233,6 +233,15 @@ struct prctl_mm_map { #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) +/* MTE tag check fault modes */ +# define PR_MTE_TCF_SHIFT 1 +# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +/* MTE tag inclusion mask */ +# define PR_MTE_TAG_SHIFT 3 +# define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h index 75232185324a..c998860d7bbc 100644 --- a/tools/include/uapi/linux/vhost.h +++ b/tools/include/uapi/linux/vhost.h @@ -146,4 +146,8 @@ /* Set event fd for config interrupt*/ #define VHOST_VDPA_SET_CONFIG_CALL _IOW(VHOST_VIRTIO, 0x77, int) + +/* Get the valid iova range */ +#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \ + struct vhost_vdpa_iova_range) #endif diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index d9b385fe808c..10a4c4cd13cf 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -15,6 +15,9 @@ static inline size_t hash_bits(size_t h, int bits) { /* shuffle bits and return requested number of upper bits */ + if (bits == 0) + return 0; + #if (__SIZEOF_SIZE_T__ == __SIZEOF_LONG_LONG__) /* LP64 case */ return (h * 11400714819323198485llu) >> (__SIZEOF_LONG_LONG__ * 8 - bits); @@ -174,17 +177,17 @@ bool hashmap__find(const struct hashmap *map, const void *key, void **value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur; \ cur = cur->next) \ if (map->equal_fn(cur->key, (_key), map->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\ - map->cap_bits); \ - cur = map->buckets ? map->buckets[bkt] : NULL; }); \ + for (cur = map->buckets \ + ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ if (map->equal_fn(cur->key, (_key), map->ctx)) diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index e3c98c007825..9bc537d0b92d 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -891,13 +891,16 @@ int xsk_umem__delete(struct xsk_umem *umem) void xsk_socket__delete(struct xsk_socket *xsk) { size_t desc_sz = sizeof(struct xdp_desc); - struct xsk_ctx *ctx = xsk->ctx; struct xdp_mmap_offsets off; + struct xsk_umem *umem; + struct xsk_ctx *ctx; int err; if (!xsk) return; + ctx = xsk->ctx; + umem = ctx->umem; if (ctx->prog_fd != -1) { xsk_delete_bpf_maps(xsk); close(ctx->prog_fd); @@ -917,11 +920,11 @@ void xsk_socket__delete(struct xsk_socket *xsk) xsk_put_ctx(ctx); - ctx->umem->refcount--; + umem->refcount--; /* Do not close an fd that also has an associated umem connected * to it. */ - if (xsk->fd != ctx->umem->fd) + if (xsk->fd != umem->fd) close(xsk->fd); free(xsk); } diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6890fc4b063a..ce8516e4de34 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -749,6 +749,7 @@ else PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_CCOPTS := $(filter-out -flto=auto -ffat-lto-objects, $(PERL_EMBED_CCOPTS)) PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 347809649ba2..379819244b91 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -361,12 +361,13 @@ 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd 439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise # -# x32-specific system call numbers start at 512 to avoid cache impact -# for native 64-bit operation. The __x32_compat_sys stubs are created -# on-the-fly for compat_sys_*() compatibility system calls if X86_X32 -# is defined. +# Due to a historical design error, certain syscalls are numbered differently +# in x32 as compared to native x86_64. These syscalls have numbers 512-547. +# Do not add new syscalls to this range. Numbers 548 and above are available +# for non-x32 use. # 512 x32 rt_sigaction compat_sys_rt_sigaction 513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn @@ -404,3 +405,5 @@ 545 x32 execveat compat_sys_execveat 546 x32 preadv2 compat_sys_preadv64v2 547 x32 pwritev2 compat_sys_pwritev64v2 +# This is the end of the legacy x32 range. Numbers 548 and above are +# not special and are not to be used for x32-specific syscalls. diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 44a75f234db1..de80534473af 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -4639,9 +4639,9 @@ do_concat: err = 0; if (lists[0]) { - struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", - "event selector. use 'perf list' to list available events", - parse_events_option); + struct option o = { + .value = &trace->evlist, + }; err = parse_events_option(&o, lists[0], 0); } out: @@ -4655,9 +4655,12 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u { struct trace *trace = opt->value; - if (!list_empty(&trace->evlist->core.entries)) - return parse_cgroups(opt, str, unset); - + if (!list_empty(&trace->evlist->core.entries)) { + struct option o = { + .value = &trace->evlist, + }; + return parse_cgroups(&o, str, unset); + } trace->cgroup = evlist__findnew_cgroup(trace->evlist, str); return 0; diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json index de3193552277..00f4fcffa815 100644 --- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json @@ -329,7 +329,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json index f31794d3b926..0dd8b13b5cfb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json @@ -323,7 +323,7 @@ }, { "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]", - "MetricExpr": "( 64 * ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) / 1000000000 ) / duration_time", + "MetricExpr": "( ( ( uncore_imc@cas_count_read@ + uncore_imc@cas_count_write@ ) * 1048576 ) / 1000000000 ) / duration_time", "MetricGroup": "Memory_BW;SoC", "MetricName": "DRAM_BW_Use" }, diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 2491d167bf76..83638097c3bc 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -95,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) +noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -126,7 +126,7 @@ __no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) +noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -145,7 +145,7 @@ __no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) +noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -164,12 +164,12 @@ __no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread) +noinline int test_dwarf_unwind__krava_2(struct thread *thread) { return test_dwarf_unwind__krava_3(thread); } -__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread) +noinline int test_dwarf_unwind__krava_1(struct thread *thread) { return test_dwarf_unwind__krava_2(thread); } diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index a07626f07208..b0e1880cf992 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -2963,7 +2963,7 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events, struct popup_action actions[MAX_OPTIONS]; int nr_options = 0; int key = -1; - char buf[64]; + char buf[128]; int delay_secs = hbt ? hbt->refresh : 0; #define HIST_BROWSER_HELP_COMMON \ diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 8763772f1095..6b410c3d52dc 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -102,6 +102,8 @@ int build_id__sprintf(const struct build_id *build_id, char *bf) const u8 *raw = build_id->data; size_t i; + bf[0] = 0x0; + for (i = 0; i < build_id->size; ++i) { sprintf(bid, "%02x", *raw); ++raw; diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c index a405dad068f5..3c20b126d60d 100644 --- a/tools/perf/util/hashmap.c +++ b/tools/perf/util/hashmap.c @@ -15,6 +15,9 @@ /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 +/* prevent accidental re-addition of reallocarray() */ +#pragma GCC poison reallocarray + /* start with 4 buckets */ #define HASHMAP_MIN_CAP_BITS 2 diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index e0af36b0e5d8..d9b385fe808c 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -25,6 +25,18 @@ static inline size_t hash_bits(size_t h, int bits) #endif } +/* generic C-string hashing function */ +static inline size_t str_hash(const char *s) +{ + size_t h = 0; + + while (*s) { + h = h * 31 + *s; + s++; + } + return h; +} + typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx); typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx); diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 7d4194ffc5b0..15385ea00190 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -786,11 +786,20 @@ static int machine__process_ksymbol_unregister(struct machine *machine, union perf_event *event, struct perf_sample *sample __maybe_unused) { + struct symbol *sym; struct map *map; map = maps__find(&machine->kmaps, event->ksymbol.addr); - if (map) + if (!map) + return 0; + + if (map != machine->vmlinux_map) maps__remove(&machine->kmaps, map); + else { + sym = dso__find_symbol(map->dso, map->map_ip(map, map->start)); + if (sym) + dso__delete_symbol(map->dso, sym); + } return 0; } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 7cbd024e3e63..c83c2c6564e0 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -1592,7 +1592,6 @@ static void _free_command_line(wchar_t **command_line, int num) static int python_start_script(const char *script, int argc, const char **argv) { struct tables *tables = &tables_global; - PyMODINIT_FUNC (*initfunc)(void); #if PY_MAJOR_VERSION < 3 const char **command_line; #else @@ -1607,20 +1606,18 @@ static int python_start_script(const char *script, int argc, const char **argv) FILE *fp; #if PY_MAJOR_VERSION < 3 - initfunc = initperf_trace_context; command_line = malloc((argc + 1) * sizeof(const char *)); command_line[0] = script; for (i = 1; i < argc + 1; i++) command_line[i] = argv[i - 1]; + PyImport_AppendInittab(name, initperf_trace_context); #else - initfunc = PyInit_perf_trace_context; command_line = malloc((argc + 1) * sizeof(wchar_t *)); command_line[0] = Py_DecodeLocale(script, NULL); for (i = 1; i < argc + 1; i++) command_line[i] = Py_DecodeLocale(argv[i - 1], NULL); + PyImport_AppendInittab(name, PyInit_perf_trace_context); #endif - - PyImport_AppendInittab(name, initfunc); Py_Initialize(); #if PY_MAJOR_VERSION < 3 diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 7a5f03764702..098080287c68 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -595,6 +595,7 @@ static void perf_event__mmap2_swap(union perf_event *event, event->mmap2.maj = bswap_32(event->mmap2.maj); event->mmap2.min = bswap_32(event->mmap2.min); event->mmap2.ino = bswap_64(event->mmap2.ino); + event->mmap2.ino_generation = bswap_64(event->mmap2.ino_generation); if (sample_id_all) { void *data = &event->mmap2.filename; @@ -710,6 +711,18 @@ static void perf_event__namespaces_swap(union perf_event *event, swap_sample_id_all(event, &event->namespaces.link_info[i]); } +static void perf_event__cgroup_swap(union perf_event *event, bool sample_id_all) +{ + event->cgroup.id = bswap_64(event->cgroup.id); + + if (sample_id_all) { + void *data = &event->cgroup.path; + + data += PERF_ALIGN(strlen(data) + 1, sizeof(u64)); + swap_sample_id_all(event, data); + } +} + static u8 revbyte(u8 b) { int rev = (b >> 4) | ((b & 0xf) << 4); @@ -952,6 +965,7 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_SWITCH] = perf_event__switch_swap, [PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap, [PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap, + [PERF_RECORD_CGROUP] = perf_event__cgroup_swap, [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 6138866665df..0d14abdf3d72 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -515,6 +515,13 @@ void dso__insert_symbol(struct dso *dso, struct symbol *sym) } } +void dso__delete_symbol(struct dso *dso, struct symbol *sym) +{ + rb_erase_cached(&sym->rb_node, &dso->symbols); + symbol__delete(sym); + dso__reset_find_symbol_cache(dso); +} + struct symbol *dso__find_symbol(struct dso *dso, u64 addr) { if (dso->last_find_result.addr != addr || dso->last_find_result.symbol == NULL) { diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index f4801c488def..954d6a049ee2 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -131,6 +131,8 @@ int dso__load_kallsyms(struct dso *dso, const char *filename, struct map *map); void dso__insert_symbol(struct dso *dso, struct symbol *sym); +void dso__delete_symbol(struct dso *dso, + struct symbol *sym); struct symbol *dso__find_symbol(struct dso *dso, u64 addr); struct symbol *dso__find_symbol_by_name(struct dso *dso, const char *name); diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index c8622497ef23..c7bcddbd486d 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -51,7 +51,7 @@ DESTDIR ?= # Package-related definitions. Distributions can modify the version # and _should_ modify the PACKAGE_BUGREPORT definition -VERSION= $(shell ./utils/version-gen.sh) +VERSION:= $(shell ./utils/version-gen.sh) LIB_MAJ= 0.0.1 LIB_MIN= 0 diff --git a/tools/power/cpupower/debug/i386/intel_gsic.c b/tools/power/cpupower/debug/i386/intel_gsic.c index e5e926f46d6b..befd837f07f8 100644 --- a/tools/power/cpupower/debug/i386/intel_gsic.c +++ b/tools/power/cpupower/debug/i386/intel_gsic.c @@ -71,7 +71,7 @@ int main (void) printf("\tsmi_cmd=0x?? smi_port=0x?? smi_sig=1\n"); printf("\nUnfortunately, you have to know what exactly are " "smi_cmd and smi_port, and this\nis system " - "dependant.\n"); + "dependent.\n"); } return 1; } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index 2b6551269e43..f3e3c94ab9bd 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -12,11 +12,12 @@ turbostat : turbostat.c override CFLAGS += -O2 -Wall -I../../../include override CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' override CFLAGS += -DINTEL_FAMILY_HEADER='"../../../../arch/x86/include/asm/intel-family.h"' +override CFLAGS += -D_FILE_OFFSET_BITS=64 override CFLAGS += -D_FORTIFY_SOURCE=2 %: %.c @mkdir -p $(BUILD_OUTPUT) - $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap + $(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ $(LDFLAGS) -lcap -lrt .PHONY : clean clean : diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index a6db83a88e85..f6b7e85b121c 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -335,7 +335,7 @@ that they count at TSC rate, which is true on all processors tested to date. .SH REFERENCES Volume 3B: System Programming Guide" -http://www.intel.com/products/processor/manuals/ +https://www.intel.com/products/processor/manuals/ .SH FILES .ta diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 33b370865d16..f3a1746f7f45 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -79,6 +79,7 @@ unsigned long long gfx_cur_rc6_ms; unsigned long long cpuidle_cur_cpu_lpi_us; unsigned long long cpuidle_cur_sys_lpi_us; unsigned int gfx_cur_mhz; +unsigned int gfx_act_mhz; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; double rapl_power_units, rapl_time_units; @@ -210,13 +211,14 @@ struct pkg_data { unsigned long long pkg_both_core_gfxe_c0; long long gfx_rc6_ms; unsigned int gfx_mhz; + unsigned int gfx_act_mhz; unsigned int package_id; - unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ - unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ - unsigned int energy_cores; /* MSR_PP0_ENERGY_STATUS */ - unsigned int energy_gfx; /* MSR_PP1_ENERGY_STATUS */ - unsigned int rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ - unsigned int rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ + unsigned long long energy_pkg; /* MSR_PKG_ENERGY_STATUS */ + unsigned long long energy_dram; /* MSR_DRAM_ENERGY_STATUS */ + unsigned long long energy_cores; /* MSR_PP0_ENERGY_STATUS */ + unsigned long long energy_gfx; /* MSR_PP1_ENERGY_STATUS */ + unsigned long long rapl_pkg_perf_status; /* MSR_PKG_PERF_STATUS */ + unsigned long long rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned long long counter[MAX_ADDED_COUNTERS]; } *package_even, *package_odd; @@ -259,6 +261,113 @@ struct msr_counter { #define SYSFS_PERCPU (1 << 1) }; +/* + * The accumulated sum of MSR is defined as a monotonic + * increasing MSR, it will be accumulated periodically, + * despite its register's bit width. + */ +enum { + IDX_PKG_ENERGY, + IDX_DRAM_ENERGY, + IDX_PP0_ENERGY, + IDX_PP1_ENERGY, + IDX_PKG_PERF, + IDX_DRAM_PERF, + IDX_COUNT, +}; + +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr); + +struct msr_sum_array { + /* get_msr_sum() = sum + (get_msr() - last) */ + struct { + /*The accumulated MSR value is updated by the timer*/ + unsigned long long sum; + /*The MSR footprint recorded in last timer*/ + unsigned long long last; + } entries[IDX_COUNT]; +}; + +/* The percpu MSR sum array.*/ +struct msr_sum_array *per_cpu_msr_sum; + +int idx_to_offset(int idx) +{ + int offset; + + switch (idx) { + case IDX_PKG_ENERGY: + offset = MSR_PKG_ENERGY_STATUS; + break; + case IDX_DRAM_ENERGY: + offset = MSR_DRAM_ENERGY_STATUS; + break; + case IDX_PP0_ENERGY: + offset = MSR_PP0_ENERGY_STATUS; + break; + case IDX_PP1_ENERGY: + offset = MSR_PP1_ENERGY_STATUS; + break; + case IDX_PKG_PERF: + offset = MSR_PKG_PERF_STATUS; + break; + case IDX_DRAM_PERF: + offset = MSR_DRAM_PERF_STATUS; + break; + default: + offset = -1; + } + return offset; +} + +int offset_to_idx(int offset) +{ + int idx; + + switch (offset) { + case MSR_PKG_ENERGY_STATUS: + idx = IDX_PKG_ENERGY; + break; + case MSR_DRAM_ENERGY_STATUS: + idx = IDX_DRAM_ENERGY; + break; + case MSR_PP0_ENERGY_STATUS: + idx = IDX_PP0_ENERGY; + break; + case MSR_PP1_ENERGY_STATUS: + idx = IDX_PP1_ENERGY; + break; + case MSR_PKG_PERF_STATUS: + idx = IDX_PKG_PERF; + break; + case MSR_DRAM_PERF_STATUS: + idx = IDX_DRAM_PERF; + break; + default: + idx = -1; + } + return idx; +} + +int idx_valid(int idx) +{ + switch (idx) { + case IDX_PKG_ENERGY: + return do_rapl & RAPL_PKG; + case IDX_DRAM_ENERGY: + return do_rapl & RAPL_DRAM; + case IDX_PP0_ENERGY: + return do_rapl & RAPL_CORES_ENERGY_STATUS; + case IDX_PP1_ENERGY: + return do_rapl & RAPL_GFX; + case IDX_PKG_PERF: + return do_rapl & RAPL_PKG_PERF_STATUS; + case IDX_DRAM_PERF: + return do_rapl & RAPL_DRAM_PERF_STATUS; + default: + return 0; + } +} struct sys_counters { unsigned int added_thread_counters; unsigned int added_core_counters; @@ -451,6 +560,7 @@ struct msr_counter bic[] = { { 0x0, "APIC" }, { 0x0, "X2APIC" }, { 0x0, "Die" }, + { 0x0, "GFXAMHz" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -505,6 +615,7 @@ struct msr_counter bic[] = { #define BIC_APIC (1ULL << 48) #define BIC_X2APIC (1ULL << 49) #define BIC_Die (1ULL << 50) +#define BIC_GFXACTMHz (1ULL << 51) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -724,6 +835,9 @@ void print_header(char *delim) if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%sGFXMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%sGFXAMHz", (printed++ ? delim : "")); + if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%sTotl%%C0", (printed++ ? delim : "")); if (DO_BIC(BIC_Any_c0)) @@ -858,13 +972,13 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "pc10: %016llX\n", p->pc10); outp += sprintf(outp, "cpu_lpi: %016llX\n", p->cpu_lpi); outp += sprintf(outp, "sys_lpi: %016llX\n", p->sys_lpi); - outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); - outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); - outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); - outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram); - outp += sprintf(outp, "Throttle PKG: %0X\n", + outp += sprintf(outp, "Joules PKG: %0llX\n", p->energy_pkg); + outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores); + outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx); + outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram); + outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status); - outp += sprintf(outp, "Throttle RAM: %0X\n", + outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status); outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); @@ -1062,14 +1176,7 @@ int format_counters(struct thread_data *t, struct core_data *c, } } - /* - * If measurement interval exceeds minimum RAPL Joule Counter range, - * indicate that results are suspect by printing "**" in fraction place. - */ - if (interval_float < rapl_joule_counter_range) - fmt8 = "%s%.2f"; - else - fmt8 = "%6.0f**"; + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY)) outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float); @@ -1098,6 +1205,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_GFXMHz)) outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_mhz); + /* GFXACTMHz */ + if (DO_BIC(BIC_GFXACTMHz)) + outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), p->gfx_act_mhz); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ if (DO_BIC(BIC_Totl_c0)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc); @@ -1210,11 +1321,7 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ } #define DELTA_WRAP32(new, old) \ - if (new > old) { \ - old = new - old; \ - } else { \ - old = 0x100000000 + new - old; \ - } + old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32); int delta_package(struct pkg_data *new, struct pkg_data *old) @@ -1253,13 +1360,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old) old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms; old->gfx_mhz = new->gfx_mhz; + old->gfx_act_mhz = new->gfx_act_mhz; - DELTA_WRAP32(new->energy_pkg, old->energy_pkg); - DELTA_WRAP32(new->energy_cores, old->energy_cores); - DELTA_WRAP32(new->energy_gfx, old->energy_gfx); - DELTA_WRAP32(new->energy_dram, old->energy_dram); - DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); - DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); + old->energy_pkg = new->energy_pkg - old->energy_pkg; + old->energy_cores = new->energy_cores - old->energy_cores; + old->energy_gfx = new->energy_gfx - old->energy_gfx; + old->energy_dram = new->energy_dram - old->energy_dram; + old->rapl_pkg_perf_status = new->rapl_pkg_perf_status - old->rapl_pkg_perf_status; + old->rapl_dram_perf_status = new->rapl_dram_perf_status - old->rapl_dram_perf_status; for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1469,6 +1577,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->gfx_rc6_ms = 0; p->gfx_mhz = 0; + p->gfx_act_mhz = 0; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) t->counter[i] = 0; @@ -1564,6 +1673,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.packages.gfx_rc6_ms = p->gfx_rc6_ms; average.packages.gfx_mhz = p->gfx_mhz; + average.packages.gfx_act_mhz = p->gfx_act_mhz; average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); @@ -1784,7 +1894,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int i; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "get_counters: Could not migrate to CPU %d\n", cpu); return -1; } @@ -1966,39 +2076,39 @@ retry: p->sys_lpi = cpuidle_cur_sys_lpi_us; if (do_rapl & RAPL_PKG) { - if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STATUS, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (do_rapl & RAPL_CORES_ENERGY_STATUS) { - if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP0_ENERGY_STATUS, &msr)) return -14; - p->energy_cores = msr & 0xFFFFFFFF; + p->energy_cores = msr; } if (do_rapl & RAPL_DRAM) { - if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) return -15; - p->energy_dram = msr & 0xFFFFFFFF; + p->energy_dram = msr; } if (do_rapl & RAPL_GFX) { - if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PP1_ENERGY_STATUS, &msr)) return -16; - p->energy_gfx = msr & 0xFFFFFFFF; + p->energy_gfx = msr; } if (do_rapl & RAPL_PKG_PERF_STATUS) { - if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_PKG_PERF_STATUS, &msr)) return -16; - p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; + p->rapl_pkg_perf_status = msr; } if (do_rapl & RAPL_DRAM_PERF_STATUS) { - if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) + if (get_msr_sum(cpu, MSR_DRAM_PERF_STATUS, &msr)) return -16; - p->rapl_dram_perf_status = msr & 0xFFFFFFFF; + p->rapl_dram_perf_status = msr; } if (do_rapl & RAPL_AMD_F17H) { - if (get_msr(cpu, MSR_PKG_ENERGY_STAT, &msr)) + if (get_msr_sum(cpu, MSR_PKG_ENERGY_STAT, &msr)) return -13; - p->energy_pkg = msr & 0xFFFFFFFF; + p->energy_pkg = msr; } if (DO_BIC(BIC_PkgTmp)) { if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) @@ -2012,6 +2122,9 @@ retry: if (DO_BIC(BIC_GFXMHz)) p->gfx_mhz = gfx_cur_mhz; + if (DO_BIC(BIC_GFXACTMHz)) + p->gfx_act_mhz = gfx_act_mhz; + for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) { if (get_mp(cpu, mp, &p->counter[i])) return -10; @@ -2173,6 +2286,7 @@ int has_turbo_ratio_group_limits(int family, int model) case INTEL_FAM6_ATOM_GOLDMONT: case INTEL_FAM6_SKYLAKE_X: case INTEL_FAM6_ATOM_GOLDMONT_D: + case INTEL_FAM6_ATOM_TREMONT_D: return 1; } return 0; @@ -2650,7 +2764,12 @@ int get_thread_siblings(struct cpu_topology *thiscpu) sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu); - filep = fopen_or_die(path, "r"); + filep = fopen(path, "r"); + + if (!filep) { + warnx("%s: open failed", path); + return -1; + } do { offset -= BITMASK_SIZE; if (fscanf(filep, "%lx%c", &map, &character) != 2) @@ -2763,18 +2882,25 @@ void re_initialize(void) { free_all_buffers(); setup_all_buffers(); - printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); + fprintf(outf, "turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); } void set_max_cpu_num(void) { FILE *filep; + int base_cpu; unsigned long dummy; + char pathname[64]; + + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(1, "cannot find calling cpu ID"); + sprintf(pathname, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", + base_cpu); + filep = fopen_or_die(pathname, "r"); topo.max_cpu_num = 0; - filep = fopen_or_die( - "/sys/devices/system/cpu/cpu0/topology/thread_siblings", - "r"); while (fscanf(filep, "%lx,", &dummy) == 1) topo.max_cpu_num += BITMASK_SIZE; fclose(filep); @@ -2916,6 +3042,33 @@ int snapshot_gfx_mhz(void) } /* + * snapshot_gfx_cur_mhz() + * + * record snapshot of + * /sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz + * + * return 1 if config change requires a restart, else return 0 + */ +int snapshot_gfx_act_mhz(void) +{ + static FILE *fp; + int retval; + + if (fp == NULL) + fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", "r"); + else { + rewind(fp); + fflush(fp); + } + + retval = fscanf(fp, "%d", &gfx_act_mhz); + if (retval != 1) + err(1, "GFX ACT MHz"); + + return 0; +} + +/* * snapshot_cpu_lpi() * * record snapshot of @@ -2980,6 +3133,9 @@ int snapshot_proc_sysfs_files(void) if (DO_BIC(BIC_GFXMHz)) snapshot_gfx_mhz(); + if (DO_BIC(BIC_GFXACTMHz)) + snapshot_gfx_act_mhz(); + if (DO_BIC(BIC_CPU_LPI)) snapshot_cpu_lpi_us(); @@ -3057,6 +3213,111 @@ void do_sleep(void) } } +int get_msr_sum(int cpu, off_t offset, unsigned long long *msr) +{ + int ret, idx; + unsigned long long msr_cur, msr_last; + + if (!per_cpu_msr_sum) + return 1; + + idx = offset_to_idx(offset); + if (idx < 0) + return idx; + /* get_msr_sum() = sum + (get_msr() - last) */ + ret = get_msr(cpu, offset, &msr_cur); + if (ret) + return ret; + msr_last = per_cpu_msr_sum[cpu].entries[idx].last; + DELTA_WRAP32(msr_cur, msr_last); + *msr = msr_last + per_cpu_msr_sum[cpu].entries[idx].sum; + + return 0; +} + +timer_t timerid; + +/* Timer callback, update the sum of MSRs periodically. */ +static int update_msr_sum(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ + int i, ret; + int cpu = t->cpu_id; + + for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) { + unsigned long long msr_cur, msr_last; + int offset; + + if (!idx_valid(i)) + continue; + offset = idx_to_offset(i); + if (offset < 0) + continue; + ret = get_msr(cpu, offset, &msr_cur); + if (ret) { + fprintf(outf, "Can not update msr(0x%x)\n", offset); + continue; + } + + msr_last = per_cpu_msr_sum[cpu].entries[i].last; + per_cpu_msr_sum[cpu].entries[i].last = msr_cur & 0xffffffff; + + DELTA_WRAP32(msr_cur, msr_last); + per_cpu_msr_sum[cpu].entries[i].sum += msr_last; + } + return 0; +} + +static void +msr_record_handler(union sigval v) +{ + for_all_cpus(update_msr_sum, EVEN_COUNTERS); +} + +void msr_sum_record(void) +{ + struct itimerspec its; + struct sigevent sev; + + per_cpu_msr_sum = calloc(topo.max_cpu_num + 1, sizeof(struct msr_sum_array)); + if (!per_cpu_msr_sum) { + fprintf(outf, "Can not allocate memory for long time MSR.\n"); + return; + } + /* + * Signal handler might be restricted, so use thread notifier instead. + */ + memset(&sev, 0, sizeof(struct sigevent)); + sev.sigev_notify = SIGEV_THREAD; + sev.sigev_notify_function = msr_record_handler; + + sev.sigev_value.sival_ptr = &timerid; + if (timer_create(CLOCK_REALTIME, &sev, &timerid) == -1) { + fprintf(outf, "Can not create timer.\n"); + goto release_msr; + } + + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 1; + /* + * A wraparound time has been calculated early. + * Some sources state that the peak power for a + * microprocessor is usually 1.5 times the TDP rating, + * use 2 * TDP for safety. + */ + its.it_interval.tv_sec = rapl_joule_counter_range / 2; + its.it_interval.tv_nsec = 0; + + if (timer_settime(timerid, 0, &its, NULL) == -1) { + fprintf(outf, "Can not set timer.\n"); + goto release_timer; + } + return; + + release_timer: + timer_delete(timerid); + release_msr: + free(per_cpu_msr_sum); +} void turbostat_loop() { @@ -3075,7 +3336,7 @@ restart: if (retval < -1) { exit(retval); } else if (retval == -1) { - if (restarted > 1) { + if (restarted > 10) { exit(retval); } re_initialize(); @@ -3279,6 +3540,7 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ pkg_cstate_limits = glm_pkg_cstate_limits; break; default: @@ -3361,6 +3623,17 @@ int is_ehl(unsigned int family, unsigned int model) } return 0; } +int is_jvl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case INTEL_FAM6_ATOM_TREMONT_D: + return 1; + } + return 0; +} int has_turbo_ratio_limit(unsigned int family, unsigned int model) { @@ -3475,6 +3748,20 @@ int has_config_tdp(unsigned int family, unsigned int model) } static void +remove_underbar(char *s) +{ + char *to = s; + + while (*s) { + if (*s != '_') + *to++ = *s; + s++; + } + + *to = 0; +} + +static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model) { if (!do_nhm_platform_info) @@ -3530,9 +3817,6 @@ dump_sysfs_cstate_config(void) int state; char *sp; - if (!DO_BIC(BIC_sysfs)) - return; - if (access("/sys/devices/system/cpu/cpuidle", R_OK)) { fprintf(outf, "cpuidle not loaded\n"); return; @@ -3559,6 +3843,8 @@ dump_sysfs_cstate_config(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state); input = fopen(path, "r"); @@ -3645,7 +3931,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_epb: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3689,7 +3975,7 @@ int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p) return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_hwp: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3777,7 +4063,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_perf_limit: Could not migrate to CPU %d\n", cpu); return -1; } @@ -3881,13 +4167,8 @@ double get_tdp_intel(unsigned int model) double get_tdp_amd(unsigned int family) { - switch (family) { - case 0x17: - case 0x18: - default: - /* This is the max stock TDP of HEDT/Server Fam17h chips */ - return 250.0; - } + /* This is the max stock TDP of HEDT/Server Fam17h+ chips */ + return 280.0; } /* @@ -3959,6 +4240,14 @@ void rapl_probe_intel(unsigned int family, unsigned int model) BIC_PRESENT(BIC_GFXWatt); } break; + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ + do_rapl = RAPL_PKG | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + BIC_PRESENT(BIC_PKG__); + if (rapl_joules) + BIC_PRESENT(BIC_Pkg_J); + else + BIC_PRESENT(BIC_PkgWatt); + break; case INTEL_FAM6_SKYLAKE_L: /* SKL */ case INTEL_FAM6_CANNONLAKE_L: /* CNL */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO; @@ -4069,27 +4358,20 @@ void rapl_probe_amd(unsigned int family, unsigned int model) if (max_extended_level >= 0x80000007) { __cpuid(0x80000007, eax, ebx, ecx, edx); - /* RAPL (Fam 17h) */ + /* RAPL (Fam 17h+) */ has_rapl = edx & (1 << 14); } - if (!has_rapl) + if (!has_rapl || family < 0x17) return; - switch (family) { - case 0x17: /* Zen, Zen+ */ - case 0x18: /* Hygon Dhyana */ - do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; - if (rapl_joules) { - BIC_PRESENT(BIC_Pkg_J); - BIC_PRESENT(BIC_Cor_J); - } else { - BIC_PRESENT(BIC_PkgWatt); - BIC_PRESENT(BIC_CorWatt); - } - break; - default: - return; + do_rapl = RAPL_AMD_F17H | RAPL_PER_CORE_ENERGY; + if (rapl_joules) { + BIC_PRESENT(BIC_Pkg_J); + BIC_PRESENT(BIC_Cor_J); + } else { + BIC_PRESENT(BIC_PkgWatt); + BIC_PRESENT(BIC_CorWatt); } if (get_msr(base_cpu, MSR_RAPL_PWR_UNIT, &msr)) @@ -4162,7 +4444,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p return 0; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_thermal: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4234,7 +4516,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) cpu = t->cpu_id; if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); + fprintf(outf, "print_rapl: Could not migrate to CPU %d\n", cpu); return -1; } @@ -4361,6 +4643,7 @@ int has_snb_msrs(unsigned int family, unsigned int model) case INTEL_FAM6_ATOM_GOLDMONT_PLUS: case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */ case INTEL_FAM6_ATOM_TREMONT: /* EHL */ + case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */ return 1; } return 0; @@ -4507,12 +4790,33 @@ double discover_bclk(unsigned int family, unsigned int model) * below this value, including the Digital Thermal Sensor (DTS), * Package Thermal Management Sensor (PTM), and thermal event thresholds. */ -int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +int read_tcc_activation_temp() { unsigned long long msr; - unsigned int target_c_local; - int cpu; + unsigned int tcc, target_c, offset_c; + /* Temperature Target MSR is Nehalem and newer only */ + if (!do_nhm_platform_info) + return 0; + + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) + return 0; + + target_c = (msr >> 16) & 0xFF; + + offset_c = (msr >> 24) & 0xF; + + tcc = target_c - offset_c; + + if (!quiet) + fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n", + base_cpu, msr, tcc, target_c, offset_c); + + return tcc; +} + +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ /* tcc_activation_temp is used only for dts or ptm */ if (!(do_dts || do_ptm)) return 0; @@ -4521,43 +4825,18 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; - cpu = t->cpu_id; - if (cpu_migrate(cpu)) { - fprintf(outf, "Could not migrate to CPU %d\n", cpu); - return -1; - } - if (tcc_activation_temp_override != 0) { tcc_activation_temp = tcc_activation_temp_override; - fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", - cpu, tcc_activation_temp); + fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp); return 0; } - /* Temperature Target MSR is Nehalem and newer only */ - if (!do_nhm_platform_info) - goto guess; - - if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) - goto guess; - - target_c_local = (msr >> 16) & 0xFF; - - if (!quiet) - fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", - cpu, msr, target_c_local); - - if (!target_c_local) - goto guess; - - tcc_activation_temp = target_c_local; - - return 0; + tcc_activation_temp = read_tcc_activation_temp(); + if (tcc_activation_temp) + return 0; -guess: tcc_activation_temp = TJMAX_DEFAULT; - fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", - cpu, tcc_activation_temp); + fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp); return 0; } @@ -4685,19 +4964,46 @@ unsigned int intel_model_duplicates(unsigned int model) case INTEL_FAM6_ICELAKE_NNPI: case INTEL_FAM6_TIGERLAKE_L: case INTEL_FAM6_TIGERLAKE: + case INTEL_FAM6_ROCKETLAKE: + case INTEL_FAM6_LAKEFIELD: + case INTEL_FAM6_ALDERLAKE: return INTEL_FAM6_CANNONLAKE_L; - case INTEL_FAM6_ATOM_TREMONT_D: - return INTEL_FAM6_ATOM_GOLDMONT_D; - case INTEL_FAM6_ATOM_TREMONT_L: return INTEL_FAM6_ATOM_TREMONT; case INTEL_FAM6_ICELAKE_X: + case INTEL_FAM6_SAPPHIRERAPIDS_X: return INTEL_FAM6_SKYLAKE_X; } return model; } + +void print_dev_latency(void) +{ + char *path = "/dev/cpu_dma_latency"; + int fd; + int value; + int retval; + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn("fopen %s\n", path); + return; + } + + retval = read(fd, (void *)&value, sizeof(int)); + if (retval != sizeof(int)) { + warn("read %s\n", path); + close(fd); + return; + } + fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", + value, value == 2000000000 ? "default" : "constrained"); + + close(fd); +} + void process_cpuid() { unsigned int eax, ebx, ecx, edx; @@ -4916,6 +5222,14 @@ void process_cpuid() BIC_PRESENT(BIC_Mod_c6); use_c1_residency_msr = 1; } + if (is_jvl(family, model)) { + BIC_NOT_PRESENT(BIC_CPU_c3); + BIC_NOT_PRESENT(BIC_CPU_c7); + BIC_NOT_PRESENT(BIC_Pkgpc2); + BIC_NOT_PRESENT(BIC_Pkgpc3); + BIC_NOT_PRESENT(BIC_Pkgpc6); + BIC_NOT_PRESENT(BIC_Pkgpc7); + } if (is_dnv(family, model)) { BIC_PRESENT(BIC_CPU_c1); BIC_NOT_PRESENT(BIC_CPU_c3); @@ -4935,9 +5249,12 @@ void process_cpuid() BIC_NOT_PRESENT(BIC_Pkgpc7); } if (has_c8910_msrs(family, model)) { - BIC_PRESENT(BIC_Pkgpc8); - BIC_PRESENT(BIC_Pkgpc9); - BIC_PRESENT(BIC_Pkgpc10); + if (pkg_cstate_limit >= PCL__8) + BIC_PRESENT(BIC_Pkgpc8); + if (pkg_cstate_limit >= PCL__9) + BIC_PRESENT(BIC_Pkgpc9); + if (pkg_cstate_limit >= PCL_10) + BIC_PRESENT(BIC_Pkgpc10); } do_irtl_hsw = has_c8910_msrs(family, model); if (has_skl_msrs(family, model)) { @@ -4967,6 +5284,8 @@ void process_cpuid() dump_cstate_pstate_config_info(family, model); if (!quiet) + print_dev_latency(); + if (!quiet) dump_sysfs_cstate_config(); if (!quiet) dump_sysfs_pstate_config(); @@ -4980,6 +5299,9 @@ void process_cpuid() if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK)) BIC_PRESENT(BIC_GFXMHz); + if (!access("/sys/class/graphics/fb0/device/drm/card0/gt_act_freq_mhz", R_OK)) + BIC_PRESENT(BIC_GFXACTMHz); + if (!access("/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us", R_OK)) BIC_PRESENT(BIC_CPU_LPI); else @@ -5390,7 +5712,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 20.03.20" + fprintf(outf, "turbostat version 20.09.30" " - Len Brown <lenb@kernel.org>\n"); } @@ -5597,6 +5919,8 @@ void probe_sysfs(void) *sp = '%'; *(sp + 1) = '\0'; + remove_underbar(name_buf); + fclose(input); sprintf(path, "cpuidle/state%d/time", state); @@ -5624,6 +5948,8 @@ void probe_sysfs(void) *sp = '\0'; fclose(input); + remove_underbar(name_buf); + sprintf(path, "cpuidle/state%d/usage", state); if (is_deferred_skip(name_buf)) @@ -5868,6 +6194,7 @@ int main(int argc, char **argv) return 0; } + msr_sum_record(); /* * if any params left, it must be a command to fork */ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c index 3fe1eed900d4..ff6c6661f075 100644 --- a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -622,6 +622,57 @@ void cmdline(int argc, char **argv) } } +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ + FILE *filep = fopen(path, "r"); + + if (!filep) + err(1, "%s: open failed", path); + return filep; +} + +void err_on_hypervisor(void) +{ + FILE *cpuinfo; + char *flags, *hypervisor; + char *buffer; + + /* On VMs /proc/cpuinfo contains a "flags" entry for hypervisor */ + cpuinfo = fopen_or_die("/proc/cpuinfo", "ro"); + + buffer = malloc(4096); + if (!buffer) { + fclose(cpuinfo); + err(-ENOMEM, "buffer malloc fail"); + } + + if (!fread(buffer, 1024, 1, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + + flags = strstr(buffer, "flags"); + rewind(cpuinfo); + fseek(cpuinfo, flags - buffer, SEEK_SET); + if (!fgets(buffer, 4096, cpuinfo)) { + fclose(cpuinfo); + free(buffer); + err(1, "Reading /proc/cpuinfo failed"); + } + fclose(cpuinfo); + + hypervisor = strstr(buffer, "hypervisor"); + + free(buffer); + + if (hypervisor) + err(-1, + "not supported on this virtual machine"); +} int get_msr(int cpu, int offset, unsigned long long *msr) { @@ -635,8 +686,10 @@ int get_msr(int cpu, int offset, unsigned long long *msr) err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname); retval = pread(fd, msr, sizeof(*msr), offset); - if (retval != sizeof(*msr)) + if (retval != sizeof(*msr)) { + err_on_hypervisor(); err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset); + } if (debug > 1) fprintf(stderr, "get_msr(cpu%d, 0x%X, 0x%llX)\n", cpu, offset, *msr); @@ -1086,18 +1139,6 @@ int update_cpu_msrs(int cpu) return 0; } -/* - * Open a file, and exit on failure - */ -FILE *fopen_or_die(const char *path, const char *mode) -{ - FILE *filep = fopen(path, "r"); - - if (!filep) - err(1, "%s: open failed", path); - return filep; -} - unsigned int get_pkg_num(int cpu) { FILE *fp; diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index 8019e3dd4c32..84a1af2581f5 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -66,7 +66,6 @@ def isolate_kunit_output(kernel_output): def raw_output(kernel_output): for line in kernel_output: print(line) - yield line DIVIDER = '=' * 60 @@ -242,7 +241,7 @@ def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: return None test_suite.name = name expected_test_case_num = parse_subtest_plan(lines) - if not expected_test_case_num: + if expected_test_case_num is None: return None while expected_test_case_num > 0: test_case = parse_test_case(lines) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 99c3c5671ea4..0b60855fb819 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -179,7 +179,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('builtins.print').start() result = kunit_parser.parse_run_tests( kunit_parser.isolate_kunit_output(file.readlines())) - print_mock.assert_any_call(StrContains("no kunit output detected")) + print_mock.assert_any_call(StrContains('no tests run!')) print_mock.stop() file.close() @@ -198,39 +198,57 @@ class KUnitParserTest(unittest.TestCase): 'test_data/test_config_printk_time.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_ignores_multiple_prefixes(self): prefix_log = get_absolute_path( 'test_data/test_multiple_prefixes.log') with open(prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_mixed_kernel_output(self): mixed_prefix_log = get_absolute_path( 'test_data/test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_prefix_poundsign(self): pound_log = get_absolute_path('test_data/test_pound_sign.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_kernel_panic_end(self): panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log') with open(panic_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.TEST_CRASHED, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) def test_pound_no_prefix(self): pound_log = get_absolute_path('test_data/test_pound_no_prefix.log') with open(pound_log) as file: result = kunit_parser.parse_run_tests(file.readlines()) - self.assertEqual('kunit-resource-test', result.suites[0].name) + self.assertEqual( + kunit_parser.TestStatus.SUCCESS, + result.status) + self.assertEqual('kunit-resource-test', result.suites[0].name) class KUnitJsonTest(unittest.TestCase): diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log index c02ca773946d..6bdb57f76eac 100644 --- a/tools/testing/kunit/test_data/test_config_printk_time.log +++ b/tools/testing/kunit/test_data/test_config_printk_time.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log index 5c73fb3a1c6f..1fb677728abe 100644 --- a/tools/testing/kunit/test_data/test_interrupted_tap_output.log +++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -34,4 +35,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log index c045eee75f27..a014ffe9725e 100644 --- a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log +++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources @@ -22,4 +23,4 @@ [ 0.060000] Stack: [ 0.060000] 602086f8 601bc260 705c0000 705c0000 [ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file +[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log index bc48407dcc36..0ad78481a0b4 100644 --- a/tools/testing/kunit/test_data/test_multiple_prefixes.log +++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log @@ -1,6 +1,7 @@ [ 0.060000][ T1] printk: console [mc-1] enabled [ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000][ T1] TAP version 14 +[ 0.060000][ T1] 1..3 [ 0.060000][ T1] # Subtest: kunit-resource-test [ 0.060000][ T1] 1..5 [ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources @@ -28,4 +29,4 @@ [ 0.060000][ T1] Stack: [ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000 [ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab -[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file +[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log index 2ceb360be7d5..dc4cf09a96d0 100644 --- a/tools/testing/kunit/test_data/test_pound_no_prefix.log +++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log @@ -1,6 +1,7 @@ printk: console [mc-1] enabled random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 TAP version 14 + 1..3 # Subtest: kunit-resource-test 1..5 ok 1 - kunit_resource_test_init_resources @@ -30,4 +31,4 @@ Stack: 602086f8 601bc260 705c0000 705c0000 602086f8 6005fcec 705c0000 6002c6ab - 6005fcec 601bc260 705c0000 3000000010
\ No newline at end of file + 6005fcec 601bc260 705c0000 3000000010 diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log index 28ffa5ba03bf..3f358e3a7ba0 100644 --- a/tools/testing/kunit/test_data/test_pound_sign.log +++ b/tools/testing/kunit/test_data/test_pound_sign.log @@ -1,6 +1,7 @@ [ 0.060000] printk: console [mc-1] enabled [ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0 [ 0.060000] TAP version 14 +[ 0.060000] 1..3 [ 0.060000] # Subtest: kunit-resource-test [ 0.060000] 1..5 [ 0.060000] ok 1 - kunit_resource_test_init_resources diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 242635d79035..c9fa141ebdcc 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -417,6 +417,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(20); + /* Buffer by byte tests */ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), "Check buffer correctness by byte with sync err mode and mmap memory\n"); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 97bebdecd29e..43bd94f853ba 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -163,6 +163,9 @@ int main(int argc, char *argv[]) mte_register_signal(SIGSEGV, mte_default_handler); mte_register_signal(SIGBUS, mte_default_handler); + /* Set test plan */ + ksft_set_plan(12); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index bc41ae630c86..3b23c4d61d38 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -140,6 +140,10 @@ int main(int argc, char *argv[]) /* Register signal handlers */ mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + + /* Set test plan */ + ksft_set_plan(4); + /* Enable KSM */ mte_ksm_setup(); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 33b13b86199b..a04b12c21ac9 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -205,7 +205,11 @@ int main(int argc, char *argv[]) mte_register_signal(SIGBUS, mte_default_handler); mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(22); + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 94d245a0ed56..deaef1f61076 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -170,6 +170,9 @@ int main(int argc, char *argv[]) /* Register SIGSEGV handler */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), "Check an included tag value with sync mode\n"); evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index 594e98e76880..4bfa80f2a8c3 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -92,9 +92,13 @@ int main(int argc, char *argv[]) err = mte_default_setup(); if (err) return err; + /* Register signal handlers */ mte_register_signal(SIGSEGV, mte_default_handler); + /* Set test plan */ + ksft_set_plan(4); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c new file mode 100644 index 000000000000..14a31109dd0e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_init.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include <test_progs.h> +#include "test_map_init.skel.h" + +#define TEST_VALUE 0x1234 +#define FILL_VALUE 0xdeadbeef + +static int nr_cpus; +static int duration; + +typedef unsigned long long map_key_t; +typedef unsigned long long map_value_t; +typedef struct { + map_value_t v; /* padding */ +} __bpf_percpu_val_align pcpu_map_value_t; + + +static int map_populate(int map_fd, int num) +{ + pcpu_map_value_t value[nr_cpus]; + int i, err; + map_key_t key; + + for (i = 0; i < nr_cpus; i++) + bpf_percpu(value, i) = FILL_VALUE; + + for (key = 1; key <= num; key++) { + err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem")) + return -1; + } + + return 0; +} + +static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz, + int *map_fd, int populate) +{ + struct test_map_init *skel; + int err; + + skel = test_map_init__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return NULL; + + err = bpf_map__set_type(skel->maps.hashmap1, map_type); + if (!ASSERT_OK(err, "bpf_map__set_type")) + goto error; + + err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz); + if (!ASSERT_OK(err, "bpf_map__set_max_entries")) + goto error; + + err = test_map_init__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto error; + + *map_fd = bpf_map__fd(skel->maps.hashmap1); + if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n")) + goto error; + + err = map_populate(*map_fd, populate); + if (!ASSERT_OK(err, "map_populate")) + goto error_map; + + return skel; + +error_map: + close(*map_fd); +error: + test_map_init__destroy(skel); + return NULL; +} + +/* executes bpf program that updates map with key, value */ +static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key, + map_value_t value) +{ + struct test_map_init__bss *bss; + + bss = skel->bss; + + bss->inKey = key; + bss->inValue = value; + bss->inPid = getpid(); + + if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach")) + return -1; + + /* Let tracepoint trigger */ + syscall(__NR_getpgid); + + test_map_init__detach(skel); + + return 0; +} + +static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected) +{ + int i, nzCnt = 0; + map_value_t val; + + for (i = 0; i < nr_cpus; i++) { + val = bpf_percpu(value, i); + if (val) { + if (CHECK(val != expected, "map value", + "unexpected for cpu %d: 0x%llx\n", i, val)) + return -1; + nzCnt++; + } + } + + if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n", + nzCnt)) + return -1; + + return 0; +} + +/* Add key=1 elem with values set for all CPUs + * Delete elem key=1 + * Run bpf prog that inserts new key=1 elem with value=0x1234 + * (bpf prog can only set value for current CPU) + * Lookup Key=1 and check value is as expected for all CPUs: + * value set by bpf prog for one CPU, 0 for all others + */ +static void test_pcpu_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* max 1 elem in map so insertion is forced to reuse freed entry */ + skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* delete element so the entry can be re-used*/ + key = 1; + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "bpf_map_delete_elem")) + goto cleanup; + + /* run bpf prog that inserts new elem, re-using the slot just freed */ + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=1 was re-created by bpf prog */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +/* Add key=1 and key=2 elems with values set for all CPUs + * Run bpf prog that inserts new key=3 elem + * (only for current cpu; other cpus should have initial value = 0) + * Lookup Key=1 and check value is as expected for all CPUs + */ +static void test_pcpu_lru_map_init(void) +{ + pcpu_map_value_t value[nr_cpus]; + struct test_map_init *skel; + int map_fd, err; + map_key_t key; + + /* Set up LRU map with 2 elements, values filled for all CPUs. + * With these 2 elements, the LRU map is full + */ + skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2); + if (!ASSERT_OK_PTR(skel, "prog_setup")) + return; + + /* run bpf prog that inserts new key=3 element, re-using LRU slot */ + key = 3; + err = prog_run_insert_elem(skel, key, TEST_VALUE); + if (!ASSERT_OK(err, "prog_run_insert_elem")) + goto cleanup; + + /* check that key=3 replaced one of earlier elements */ + err = bpf_map_lookup_elem(map_fd, &key, value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto cleanup; + + /* and has expected values */ + check_values_one_cpu(value, TEST_VALUE); + +cleanup: + test_map_init__destroy(skel); +} + +void test_map_init(void) +{ + nr_cpus = bpf_num_possible_cpus(); + if (nr_cpus <= 1) { + printf("%s:SKIP: >1 cpu needed for this test\n", __func__); + test__skip(); + return; + } + + if (test__start_subtest("pcpu_map_init")) + test_pcpu_map_init(); + if (test__start_subtest("pcpu_lru_map_init")) + test_pcpu_lru_map_init(); +} diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h index 00578311a423..30982a7e4d0f 100644 --- a/tools/testing/selftests/bpf/progs/profiler.inc.h +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -243,7 +243,10 @@ static ino_t get_inode_from_kernfs(struct kernfs_node* node) } } -int pids_cgrp_id = 1; +extern bool CONFIG_CGROUP_PIDS __kconfig __weak; +enum cgroup_subsys_id___local { + pids_cgrp_id___local = 123, /* value doesn't matter */ +}; static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, struct task_struct* task, @@ -253,7 +256,9 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); - if (ENABLE_CGROUP_V1_RESOLVER) { + if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) { + int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local, + pids_cgrp_id___local); #ifdef UNROLL #pragma unroll #endif @@ -262,7 +267,7 @@ static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, BPF_CORE_READ(task, cgroups, subsys[i]); if (subsys != NULL) { int subsys_id = BPF_CORE_READ(subsys, ss, id); - if (subsys_id == pids_cgrp_id) { + if (subsys_id == cgrp_id) { proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); break; diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c new file mode 100644 index 000000000000..c89d28ead673 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_map_init.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +__u64 inKey = 0; +__u64 inValue = 0; +__u32 inPid = 0; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, 2); + __type(key, __u64); + __type(value, __u64); +} hashmap1 SEC(".maps"); + + +SEC("tp/syscalls/sys_enter_getpgid") +int sysenter_getpgid(const void *ctx) +{ + /* Just do it for once, when called from our own test prog. This + * ensures the map value is only updated for a single CPU. + */ + int cur_pid = bpf_get_current_pid_tgid() >> 32; + + if (cur_pid == inPid) + bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 55bd387ce7ec..52d3f0364bda 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -145,7 +145,7 @@ TEST(clone3_cap_checkpoint_restore) test_clone3_supported(); EXPECT_EQ(getuid(), 0) - XFAIL(return, "Skipping all tests as non-root\n"); + SKIP(return, "Skipping all tests as non-root"); memset(&set_tid, 0, sizeof(set_tid)); diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index c99b98b0d461..575b391ddc78 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -44,7 +44,7 @@ TEST(close_range) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -52,7 +52,7 @@ TEST(close_range) EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) { if (errno == ENOSYS) - XFAIL(return, "close_range() syscall not supported"); + SKIP(return, "close_range() syscall not supported"); } EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0)); @@ -108,7 +108,7 @@ TEST(close_range_unshare) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; @@ -197,7 +197,7 @@ TEST(close_range_unshare_capped) fd = open("/dev/null", O_RDONLY | O_CLOEXEC); ASSERT_GE(fd, 0) { if (errno == ENOENT) - XFAIL(return, "Skipping test since /dev/null does not exist"); + SKIP(return, "Skipping test since /dev/null does not exist"); } open_fds[i] = fd; diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh new file mode 100755 index 000000000000..be0c1b5ee6b8 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh @@ -0,0 +1,436 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking the nexthop offload API. It makes use of netdevsim +# which registers a listener to the nexthop notification chain. + +lib_dir=$(dirname $0)/../../../net/forwarding + +ALL_TESTS=" + nexthop_single_add_test + nexthop_single_add_err_test + nexthop_group_add_test + nexthop_group_add_err_test + nexthop_group_replace_test + nexthop_group_replace_err_test + nexthop_single_replace_test + nexthop_single_replace_err_test + nexthop_single_in_group_replace_test + nexthop_single_in_group_replace_err_test + nexthop_single_in_group_delete_test + nexthop_single_in_group_delete_err_test + nexthop_replay_test + nexthop_replay_err_test +" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} +SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/ +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh + +nexthop_check() +{ + local nharg="$1"; shift + local expected="$1"; shift + + out=$($IP nexthop show ${nharg} | sed -e 's/ *$//') + if [[ "$out" != "$expected" ]]; then + return 1 + fi + + return 0 +} + +nexthop_resource_check() +{ + local expected_occ=$1; shift + + occ=$($DEVLINK -jp resource show $DEVLINK_DEV \ + | jq '.[][][] | select(.name=="nexthops") | .["occ"]') + + if [ $expected_occ -ne $occ ]; then + return 1 + fi + + return 0 +} + +nexthop_resource_set() +{ + local size=$1; shift + + $DEVLINK resource set $DEVLINK_DEV path nexthops size $size + $DEVLINK dev reload $DEVLINK_DEV +} + +nexthop_single_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 1 + nexthop_resource_check 0 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Single nexthop add and delete" +} + +nexthop_single_add_err_test() +{ + RET=0 + + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop addition succeeded when should fail" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_add_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + $IP nexthop add id 10 group 1,20/2,39 + nexthop_check "id 10" "id 10 group 1,20/2,39 trap" + check_err $? "Unexpected weighted nexthop group entry" + + nexthop_resource_check 61 + check_err $? "Wrong weighted nexthop occupancy" + + $IP nexthop del id 10 + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy after delete" + + log_test "Nexthop group add and delete" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_add_err_test() +{ + RET=0 + + nexthop_resource_set 2 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + + $IP nexthop add id 10 group 1/2 &> /dev/null + check_fail $? "Nexthop group addition succeeded when should fail" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group add failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 + nexthop_check "id 10" "id 10 group 1/2/3 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 6 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop group replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 10 group 1/2/3 &> /dev/null + check_fail $? "Nexthop group replacement succeeded when should fail" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Nexthop group replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 + nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_replace_err_test() +{ + RET=0 + + # This is supposed to cause the replace to fail because the new nexthop + # is programmed before deleting the replaced one. + nexthop_resource_set 1 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + + $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null + check_fail $? "Nexthop replace succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_resource_check 1 + check_err $? "Wrong nexthop occupancy after failure" + + log_test "Single nexthop replace failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_replace_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 + check_err $? "Failed to replace nexthop when should not" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_replace_err_test() +{ + RET=0 + + nexthop_resource_set 5 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null + check_fail $? "Nexthop replacement succeeded when should fail" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after failure" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after failure" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop replace while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_single_in_group_delete_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $IP nexthop del id 1 + nexthop_check "id 10" "id 10 group 2 trap" + check_err $? "Unexpected nexthop group entry" + + nexthop_resource_check 2 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group" + + $IP nexthop flush &> /dev/null +} + +nexthop_single_in_group_delete_err_test() +{ + RET=0 + + # First, nexthop 1 will be deleted, which will reduce the occupancy to + # 5. Afterwards, a replace notification will be sent for nexthop group + # 10 with only two nexthops. Since the new group is allocated before + # the old is deleted, the replacement will fail as it will result in an + # occupancy of 7. + nexthop_resource_set 6 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 3 via 192.0.2.4 dev dummy1 + $IP nexthop add id 10 group 1/2/3 + + $IP nexthop del id 1 + + nexthop_resource_check 5 + check_err $? "Wrong nexthop occupancy" + + log_test "Single nexthop delete while in group failure" + + $IP nexthop flush &> /dev/null + nexthop_resource_set 9999 +} + +nexthop_replay_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap" + check_err $? "Unexpected nexthop entry after reload" + + nexthop_check "id 10" "id 10 group 1/2 trap" + check_err $? "Unexpected nexthop group entry after reload" + + nexthop_resource_check 4 + check_err $? "Wrong nexthop occupancy" + + log_test "Nexthop replay" + + $IP nexthop flush &> /dev/null +} + +nexthop_replay_err_test() +{ + RET=0 + + $IP nexthop add id 1 via 192.0.2.2 dev dummy1 + $IP nexthop add id 2 via 192.0.2.3 dev dummy1 + $IP nexthop add id 10 group 1/2 + + # Reduce size of nexthop resource so that reload will fail. + $DEVLINK resource set $DEVLINK_DEV path nexthops size 3 + $DEVLINK dev reload $DEVLINK_DEV &> /dev/null + check_fail $? "Reload succeeded when should fail" + + $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999 + $DEVLINK dev reload $DEVLINK_DEV + check_err $? "Failed to reload when should not" + + log_test "Nexthop replay failure" + + $IP nexthop flush &> /dev/null +} + +setup_prepare() +{ + local netdev + + modprobe netdevsim &> /dev/null + + echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + while [ ! -d $SYSFS_NET_DIR ] ; do :; done + + set -e + + ip netns add testns1 + devlink dev reload $DEVLINK_DEV netns testns1 + + IP="ip -netns testns1" + DEVLINK="devlink -N testns1" + + $IP link add name dummy1 up type dummy + $IP address add 192.0.2.1/24 dev dummy1 + + set +e +} + +cleanup() +{ + pre_cleanup + ip netns del testns1 + echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + modprobe -r netdevsim &> /dev/null +} + +trap cleanup EXIT + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 1d27f52c61e6..477cbb042f5b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -74,7 +74,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) ret = mount(NULL, binderfs_mntpt, "binder", 0, 0); EXPECT_EQ(ret, 0) { if (errno == ENODEV) - XFAIL(goto out, "binderfs missing"); + SKIP(goto out, "binderfs missing"); TH_LOG("%s - Failed to mount binderfs", strerror(errno)); goto rmdir; } @@ -475,10 +475,10 @@ TEST(binderfs_stress) TEST(binderfs_test_privileged) { if (geteuid() != 0) - XFAIL(return, "Tests are not run as root. Skipping privileged tests"); + SKIP(return, "Tests are not run as root. Skipping privileged tests"); if (__do_binderfs_test(_metadata)) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); } TEST(binderfs_test_unprivileged) @@ -511,7 +511,7 @@ TEST(binderfs_test_unprivileged) ret = wait_for_pid(pid); if (ret) { if (ret == 2) - XFAIL(return, "The Android binderfs filesystem is not available"); + SKIP(return, "The Android binderfs filesystem is not available"); ASSERT_EQ(ret, 0) { TH_LOG("wait_for_pid() failed"); } diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c index d979ff14775a..8f82f99f7748 100644 --- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c +++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c @@ -3282,4 +3282,99 @@ TEST(epoll60) close(ctx.epfd); } +struct epoll61_ctx { + int epfd; + int evfd; +}; + +static void *epoll61_write_eventfd(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + int64_t l = 1; + + usleep(10950); + write(ctx->evfd, &l, sizeof(l)); + return NULL; +} + +static void *epoll61_epoll_with_timeout(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + int n; + + n = epoll_wait(ctx->epfd, events, 1, 11); + /* + * If epoll returned the eventfd, write on the eventfd to wake up the + * blocking poller. + */ + if (n == 1) { + int64_t l = 1; + + write(ctx->evfd, &l, sizeof(l)); + } + return NULL; +} + +static void *epoll61_blocking_epoll(void *ctx_) +{ + struct epoll61_ctx *ctx = ctx_; + struct epoll_event events[1]; + + epoll_wait(ctx->epfd, events, 1, -1); + return NULL; +} + +TEST(epoll61) +{ + struct epoll61_ctx ctx; + struct epoll_event ev; + int i, r; + + ctx.epfd = epoll_create1(0); + ASSERT_GE(ctx.epfd, 0); + ctx.evfd = eventfd(0, EFD_NONBLOCK); + ASSERT_GE(ctx.evfd, 0); + + ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP; + ev.data.ptr = NULL; + r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev); + ASSERT_EQ(r, 0); + + /* + * We are testing a race. Repeat the test case 1000 times to make it + * more likely to fail in case of a bug. + */ + for (i = 0; i < 1000; i++) { + pthread_t threads[3]; + int n; + + /* + * Start 3 threads: + * Thread 1 sleeps for 10.9ms and writes to the evenfd. + * Thread 2 calls epoll with a timeout of 11ms. + * Thread 3 calls epoll with a timeout of -1. + * + * The eventfd write by Thread 1 should either wakeup Thread 2 + * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the + * eventfd to wake up Thread 3. + * + * If no events are missed, all three threads should eventually + * be joinable. + */ + ASSERT_EQ(pthread_create(&threads[0], NULL, + epoll61_write_eventfd, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[1], NULL, + epoll61_epoll_with_timeout, &ctx), 0); + ASSERT_EQ(pthread_create(&threads[2], NULL, + epoll61_blocking_epoll, &ctx), 0); + + for (n = 0; n < ARRAY_SIZE(threads); ++n) + ASSERT_EQ(pthread_join(threads[n], NULL), 0); + } + + close(ctx.epfd); + close(ctx.evfd); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 3bcd4c3624ee..b4da41d126d5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index 438961971b7e..3a0e2885fff5 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index a8603bd23e0d..d3e138e8377f 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=kernel_clone +PLACE=$FUNCTION_FORK setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc index acb17ce543d2..80541964b927 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_notrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc index 9f0a9687c773..2f7211254529 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc @@ -39,7 +39,7 @@ do_test() { disable_tracing echo do_execve* > set_ftrace_filter - echo *do_fork >> set_ftrace_filter + echo $FUNCTION_FORK >> set_ftrace_filter echo $PID > set_ftrace_pid echo function > current_tracer diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 98305d76bd04..191d116b7883 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo kernel_clone:stacktrace >> set_ftrace_filter +echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter -grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter +grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index c5dec55b7d95..a6fac927ee82 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -133,6 +133,13 @@ yield() { ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1 } +# The fork function in the kernel was renamed from "_do_fork" to +# "kernel_fork". As older tests should still work with older kernels +# as well as newer kernels, check which version of fork is used on this +# kernel so that the tests can use the fork function for the running kernel. +FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then + echo kernel_clone; else echo '_do_fork'; fi)` + # Since probe event command may include backslash, explicitly use printf "%s" # to NOT interpret it. ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index 9737cd0578a7..2428a3ed78c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index f9a40af76888..010a8b1d6c1d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent kernel_clone > kprobe_events +echo p:myevent $FUNCTION_FORK > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index eb543d3cfe5f..a96a1dc7014f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events +echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep 'kernel_clone' | \ +grep testprobe trace | grep "$FUNCTION_FORK" | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 4e5b63be51c9..a053ee2e7d77 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events +echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index a1d70588ab21..84285a6f60b0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test kernel_clone" >> kprobe_events +echo "p:test $FUNCTION_FORK" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index bd25dd0ba0d0..717130ed4feb 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 91fcce1c241c..25b7708eb559 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc index a30a9c07290d..d25d01a19778 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc @@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported :;: "user-memory access syntax and ustring working on user memory";: echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \ > kprobe_events +echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \ + >> kprobe_events grep myevent kprobe_events | \ grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string' echo 1 > events/kprobes/myevent/enable +echo 1 > events/kprobes/myevent2/enable echo > /dev/null echo 0 > events/kprobes/myevent/enable +echo 0 > events/kprobes/myevent2/enable grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"' diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 0d179094191f..5556292601a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo kernel_clone > set_ftrace_filter -echo 'p:testprobe kernel_clone' > kprobe_events +echo $FUNCTION_FORK > set_ftrace_filter +echo "p:testprobe $FUNCTION_FORK" > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep 'kernel_clone <-' trace +grep "$FUNCTION_FORK <-" trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep 'kernel_clone <-' trace +! grep "$FUNCTION_FORK <-" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 45d90b6c763d..f0d5b7777ed7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=kernel_clone +SYM1=$FUNCTION_FORK SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index 1b5550ef8a9b..fa928b431555 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,15 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent kernel_clone' > kprobe_events +echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events +check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE +check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE +check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE fi # %return suffix errors diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 7ae492c204a4..197cc2afd404 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 kernel_clone $retval' > kprobe_events +echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- kernel_clone' +cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK" echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index c4093fc1a773..98166fa3eb91 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent kernel_clone > kprobe_events +echo "p:myevent $FUNCTION_FORK" > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index f19804df244c..edce85420d19 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -126,7 +126,7 @@ snprintf(_metadata->results->reason, \ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \ if (TH_LOG_ENABLED) { \ - fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ + fprintf(TH_LOG_STREAM, "# SKIP %s\n", \ _metadata->results->reason); \ } \ _metadata->passed = 1; \ @@ -432,7 +432,7 @@ */ /** - * ASSERT_EQ(expected, seen) + * ASSERT_EQ() * * @expected: expected value * @seen: measured value @@ -443,7 +443,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 1) /** - * ASSERT_NE(expected, seen) + * ASSERT_NE() * * @expected: expected value * @seen: measured value @@ -454,7 +454,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 1) /** - * ASSERT_LT(expected, seen) + * ASSERT_LT() * * @expected: expected value * @seen: measured value @@ -465,7 +465,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 1) /** - * ASSERT_LE(expected, seen) + * ASSERT_LE() * * @expected: expected value * @seen: measured value @@ -476,7 +476,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 1) /** - * ASSERT_GT(expected, seen) + * ASSERT_GT() * * @expected: expected value * @seen: measured value @@ -487,7 +487,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 1) /** - * ASSERT_GE(expected, seen) + * ASSERT_GE() * * @expected: expected value * @seen: measured value @@ -498,7 +498,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 1) /** - * ASSERT_NULL(seen) + * ASSERT_NULL() * * @seen: measured value * @@ -508,7 +508,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 1) /** - * ASSERT_TRUE(seen) + * ASSERT_TRUE() * * @seen: measured value * @@ -518,7 +518,7 @@ __EXPECT(0, "0", seen, #seen, !=, 1) /** - * ASSERT_FALSE(seen) + * ASSERT_FALSE() * * @seen: measured value * @@ -528,7 +528,7 @@ __EXPECT(0, "0", seen, #seen, ==, 1) /** - * ASSERT_STREQ(expected, seen) + * ASSERT_STREQ() * * @expected: expected value * @seen: measured value @@ -539,7 +539,7 @@ __EXPECT_STR(expected, seen, ==, 1) /** - * ASSERT_STRNE(expected, seen) + * ASSERT_STRNE() * * @expected: expected value * @seen: measured value @@ -550,7 +550,7 @@ __EXPECT_STR(expected, seen, !=, 1) /** - * EXPECT_EQ(expected, seen) + * EXPECT_EQ() * * @expected: expected value * @seen: measured value @@ -561,7 +561,7 @@ __EXPECT(expected, #expected, seen, #seen, ==, 0) /** - * EXPECT_NE(expected, seen) + * EXPECT_NE() * * @expected: expected value * @seen: measured value @@ -572,7 +572,7 @@ __EXPECT(expected, #expected, seen, #seen, !=, 0) /** - * EXPECT_LT(expected, seen) + * EXPECT_LT() * * @expected: expected value * @seen: measured value @@ -583,7 +583,7 @@ __EXPECT(expected, #expected, seen, #seen, <, 0) /** - * EXPECT_LE(expected, seen) + * EXPECT_LE() * * @expected: expected value * @seen: measured value @@ -594,7 +594,7 @@ __EXPECT(expected, #expected, seen, #seen, <=, 0) /** - * EXPECT_GT(expected, seen) + * EXPECT_GT() * * @expected: expected value * @seen: measured value @@ -605,7 +605,7 @@ __EXPECT(expected, #expected, seen, #seen, >, 0) /** - * EXPECT_GE(expected, seen) + * EXPECT_GE() * * @expected: expected value * @seen: measured value @@ -616,7 +616,7 @@ __EXPECT(expected, #expected, seen, #seen, >=, 0) /** - * EXPECT_NULL(seen) + * EXPECT_NULL() * * @seen: measured value * @@ -626,7 +626,7 @@ __EXPECT(NULL, "NULL", seen, #seen, ==, 0) /** - * EXPECT_TRUE(seen) + * EXPECT_TRUE() * * @seen: measured value * @@ -636,7 +636,7 @@ __EXPECT(0, "0", seen, #seen, !=, 0) /** - * EXPECT_FALSE(seen) + * EXPECT_FALSE() * * @seen: measured value * @@ -646,7 +646,7 @@ __EXPECT(0, "0", seen, #seen, ==, 0) /** - * EXPECT_STREQ(expected, seen) + * EXPECT_STREQ() * * @expected: expected value * @seen: measured value @@ -657,7 +657,7 @@ __EXPECT_STR(expected, seen, ==, 0) /** - * EXPECT_STRNE(expected, seen) + * EXPECT_STRNE() * * @expected: expected value * @seen: measured value diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 307ceaadbbb9..7a2c242b7152 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -1,10 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only +/aarch64/get-reg-list +/aarch64/get-reg-list-sve /s390x/memop /s390x/resets /s390x/sync_regs_test /x86_64/cr4_cpuid_sync_test /x86_64/debug_regs /x86_64/evmcs_test +/x86_64/kvm_pv_test /x86_64/hyperv_cpuid /x86_64/mmio_warning_test /x86_64/platform_info_test @@ -15,6 +18,7 @@ /x86_64/vmx_preemption_timer_test /x86_64/svm_vmcall_test /x86_64/sync_regs_test +/x86_64/vmx_apic_access_test /x86_64/vmx_close_while_nested_test /x86_64/vmx_dirty_log_test /x86_64/vmx_set_nested_state_test @@ -23,6 +27,7 @@ /clear_dirty_log_test /demand_paging_test /dirty_log_test +/dirty_log_perf_test /kvm_create_max_vcpus /set_memory_region_test /steal_time diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 7ebe71fbca53..3d14ef77755e 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -34,13 +34,14 @@ ifeq ($(ARCH),s390) endif LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c -LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c +LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid +TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test @@ -49,6 +50,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test +TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test @@ -57,14 +59,15 @@ TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test TEST_GEN_PROGS_x86_64 += x86_64/debug_regs TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/user_msr_test -TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test +TEST_GEN_PROGS_x86_64 += dirty_log_perf_test TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time -TEST_GEN_PROGS_aarch64 += clear_dirty_log_test +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list +TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list-sve TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus @@ -110,14 +113,21 @@ LDFLAGS += -pthread $(no-pie-option) $(pgste-option) include ../lib.mk STATIC_LIBS := $(OUTPUT)/libkvm.a -LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM)) -EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.* +LIBKVM_C := $(filter %.c,$(LIBKVM)) +LIBKVM_S := $(filter %.S,$(LIBKVM)) +LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C)) +LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S)) +EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.* + +x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)))) +$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c + $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ)))) -$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c +$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@ -$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ) +LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) +$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS) $(AR) crs $@ $^ x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS)))) diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c new file mode 100644 index 000000000000..efba76682b4b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list-sve.c @@ -0,0 +1,3 @@ +// SPDX-License-Identifier: GPL-2.0 +#define REG_LIST_SVE +#include "get-reg-list.c" diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c new file mode 100644 index 000000000000..33218a395d9f --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -0,0 +1,841 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Check for KVM_GET_REG_LIST regressions. + * + * Copyright (C) 2020, Red Hat, Inc. + * + * When attempting to migrate from a host with an older kernel to a host + * with a newer kernel we allow the newer kernel on the destination to + * list new registers with get-reg-list. We assume they'll be unused, at + * least until the guest reboots, and so they're relatively harmless. + * However, if the destination host with the newer kernel is missing + * registers which the source host with the older kernel has, then that's + * a regression in get-reg-list. This test checks for that regression by + * checking the current list against a blessed list. We should never have + * missing registers, but if new ones appear then they can probably be + * added to the blessed list. A completely new blessed list can be created + * by running the test with the --list command line argument. + * + * Note, the blessed list should be created from the oldest possible + * kernel. We can't go older than v4.15, though, because that's the first + * release to expose the ID system registers in KVM_GET_REG_LIST, see + * commit 93390c0a1b20 ("arm64: KVM: Hide unsupported AArch64 CPU features + * from guests"). Also, one must use the --core-reg-fixup command line + * option when running on an older kernel that doesn't include df205b5c6328 + * ("KVM: arm64: Filter out invalid core register IDs in KVM_GET_REG_LIST") + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "kvm_util.h" +#include "test_util.h" +#include "processor.h" + +#ifdef REG_LIST_SVE +#define reg_list_sve() (true) +#else +#define reg_list_sve() (false) +#endif + +#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK) + +#define for_each_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) + +#define for_each_missing_reg(i) \ + for ((i) = 0; (i) < blessed_n; ++(i)) \ + if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) + +#define for_each_new_reg(i) \ + for ((i) = 0; (i) < reg_list->n; ++(i)) \ + if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i])) + + +static struct kvm_reg_list *reg_list; + +static __u64 base_regs[], vregs[], sve_regs[], rejects_set[]; +static __u64 base_regs_n, vregs_n, sve_regs_n, rejects_set_n; +static __u64 *blessed_reg, blessed_n; + +static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg) +{ + int i; + + for (i = 0; i < nr_regs; ++i) + if (reg == regs[i]) + return true; + return false; +} + +static const char *str_with_index(const char *template, __u64 index) +{ + char *str, *p; + int n; + + str = strdup(template); + p = strstr(str, "##"); + n = sprintf(p, "%lld", index); + strcat(p + n, strstr(template, "##") + 2); + + return (const char *)str; +} + +#define CORE_REGS_XX_NR_WORDS 2 +#define CORE_SPSR_XX_NR_WORDS 2 +#define CORE_FPREGS_XX_NR_WORDS 4 + +static const char *core_id_to_str(__u64 id) +{ + __u64 core_off = id & ~REG_MASK, idx; + + /* + * core_off is the offset into struct kvm_regs + */ + switch (core_off) { + case KVM_REG_ARM_CORE_REG(regs.regs[0]) ... + KVM_REG_ARM_CORE_REG(regs.regs[30]): + idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS; + TEST_ASSERT(idx < 31, "Unexpected regs.regs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(regs.regs[##])", idx); + case KVM_REG_ARM_CORE_REG(regs.sp): + return "KVM_REG_ARM_CORE_REG(regs.sp)"; + case KVM_REG_ARM_CORE_REG(regs.pc): + return "KVM_REG_ARM_CORE_REG(regs.pc)"; + case KVM_REG_ARM_CORE_REG(regs.pstate): + return "KVM_REG_ARM_CORE_REG(regs.pstate)"; + case KVM_REG_ARM_CORE_REG(sp_el1): + return "KVM_REG_ARM_CORE_REG(sp_el1)"; + case KVM_REG_ARM_CORE_REG(elr_el1): + return "KVM_REG_ARM_CORE_REG(elr_el1)"; + case KVM_REG_ARM_CORE_REG(spsr[0]) ... + KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]): + idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS; + TEST_ASSERT(idx < KVM_NR_SPSR, "Unexpected spsr index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(spsr[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS; + TEST_ASSERT(idx < 32, "Unexpected fp_regs.vregs index: %lld", idx); + return str_with_index("KVM_REG_ARM_CORE_REG(fp_regs.vregs[##])", idx); + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)"; + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)"; + } + + TEST_FAIL("Unknown core reg id: 0x%llx", id); + return NULL; +} + +static const char *sve_id_to_str(__u64 id) +{ + __u64 sve_off, n, i; + + if (id == KVM_REG_ARM64_SVE_VLS) + return "KVM_REG_ARM64_SVE_VLS"; + + sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1)); + i = id & (KVM_ARM64_SVE_MAX_SLICES - 1); + + TEST_ASSERT(i == 0, "Currently we don't expect slice > 0, reg id 0x%llx", id); + + switch (sve_off) { + case KVM_REG_ARM64_SVE_ZREG_BASE ... + KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0), + "Unexpected bits set in SVE ZREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_ZREG(##, 0)", n); + case KVM_REG_ARM64_SVE_PREG_BASE ... + KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1: + n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1); + TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0), + "Unexpected bits set in SVE PREG id: 0x%llx", id); + return str_with_index("KVM_REG_ARM64_SVE_PREG(##, 0)", n); + case KVM_REG_ARM64_SVE_FFR_BASE: + TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0), + "Unexpected bits set in SVE FFR id: 0x%llx", id); + return "KVM_REG_ARM64_SVE_FFR(0)"; + } + + return NULL; +} + +static void print_reg(__u64 id) +{ + unsigned op0, op1, crn, crm, op2; + const char *reg_size = NULL; + + TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64, + "KVM_REG_ARM64 missing in reg id: 0x%llx", id); + + switch (id & KVM_REG_SIZE_MASK) { + case KVM_REG_SIZE_U8: + reg_size = "KVM_REG_SIZE_U8"; + break; + case KVM_REG_SIZE_U16: + reg_size = "KVM_REG_SIZE_U16"; + break; + case KVM_REG_SIZE_U32: + reg_size = "KVM_REG_SIZE_U32"; + break; + case KVM_REG_SIZE_U64: + reg_size = "KVM_REG_SIZE_U64"; + break; + case KVM_REG_SIZE_U128: + reg_size = "KVM_REG_SIZE_U128"; + break; + case KVM_REG_SIZE_U256: + reg_size = "KVM_REG_SIZE_U256"; + break; + case KVM_REG_SIZE_U512: + reg_size = "KVM_REG_SIZE_U512"; + break; + case KVM_REG_SIZE_U1024: + reg_size = "KVM_REG_SIZE_U1024"; + break; + case KVM_REG_SIZE_U2048: + reg_size = "KVM_REG_SIZE_U2048"; + break; + default: + TEST_FAIL("Unexpected reg size: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id); + } + + switch (id & KVM_REG_ARM_COPROC_MASK) { + case KVM_REG_ARM_CORE: + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(id)); + break; + case KVM_REG_ARM_DEMUX: + TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)), + "Unexpected bits set in DEMUX reg id: 0x%llx", id); + printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n", + reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK); + break; + case KVM_REG_ARM64_SYSREG: + op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT; + op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT; + crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT; + crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT; + op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT; + TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2), + "Unexpected bits set in SYSREG reg id: 0x%llx", id); + printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2); + break; + case KVM_REG_ARM_FW: + TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff), + "Unexpected bits set in FW reg id: 0x%llx", id); + printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff); + break; + case KVM_REG_ARM64_SVE: + if (reg_list_sve()) + printf("\t%s,\n", sve_id_to_str(id)); + else + TEST_FAIL("KVM_REG_ARM64_SVE is an unexpected coproc type in reg id: 0x%llx", id); + break; + default: + TEST_FAIL("Unexpected coproc type: 0x%llx in reg id: 0x%llx", + (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id); + } +} + +/* + * Older kernels listed each 32-bit word of CORE registers separately. + * For 64 and 128-bit registers we need to ignore the extra words. We + * also need to fixup the sizes, because the older kernels stated all + * registers were 64-bit, even when they weren't. + */ +static void core_reg_fixup(void) +{ + struct kvm_reg_list *tmp; + __u64 id, core_off; + int i; + + tmp = calloc(1, sizeof(*tmp) + reg_list->n * sizeof(__u64)); + + for (i = 0; i < reg_list->n; ++i) { + id = reg_list->reg[i]; + + if ((id & KVM_REG_ARM_COPROC_MASK) != KVM_REG_ARM_CORE) { + tmp->reg[tmp->n++] = id; + continue; + } + + core_off = id & ~REG_MASK; + + switch (core_off) { + case 0x52: case 0xd2: case 0xd6: + /* + * These offsets are pointing at padding. + * We need to ignore them too. + */ + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ... + KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]): + if (core_off & 3) + continue; + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U128; + tmp->reg[tmp->n++] = id; + continue; + case KVM_REG_ARM_CORE_REG(fp_regs.fpsr): + case KVM_REG_ARM_CORE_REG(fp_regs.fpcr): + id &= ~KVM_REG_SIZE_MASK; + id |= KVM_REG_SIZE_U32; + tmp->reg[tmp->n++] = id; + continue; + default: + if (core_off & 1) + continue; + tmp->reg[tmp->n++] = id; + break; + } + } + + free(reg_list); + reg_list = tmp; +} + +static void prepare_vcpu_init(struct kvm_vcpu_init *init) +{ + if (reg_list_sve()) + init->features[0] |= 1 << KVM_ARM_VCPU_SVE; +} + +static void finalize_vcpu(struct kvm_vm *vm, uint32_t vcpuid) +{ + int feature; + + if (reg_list_sve()) { + feature = KVM_ARM_VCPU_SVE; + vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_FINALIZE, &feature); + } +} + +static void check_supported(void) +{ + if (reg_list_sve() && !kvm_check_cap(KVM_CAP_ARM_SVE)) { + fprintf(stderr, "SVE not available, skipping tests\n"); + exit(KSFT_SKIP); + } +} + +int main(int ac, char **av) +{ + struct kvm_vcpu_init init = { .target = -1, }; + int new_regs = 0, missing_regs = 0, i; + int failed_get = 0, failed_set = 0, failed_reject = 0; + bool print_list = false, fixup_core_regs = false; + struct kvm_vm *vm; + __u64 *vec_regs; + + check_supported(); + + for (i = 1; i < ac; ++i) { + if (strcmp(av[i], "--core-reg-fixup") == 0) + fixup_core_regs = true; + else if (strcmp(av[i], "--list") == 0) + print_list = true; + else + fprintf(stderr, "Ignoring unknown option: %s\n", av[i]); + } + + vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR); + prepare_vcpu_init(&init); + aarch64_vcpu_add_default(vm, 0, &init, NULL); + finalize_vcpu(vm, 0); + + reg_list = vcpu_get_reg_list(vm, 0); + + if (fixup_core_regs) + core_reg_fixup(); + + if (print_list) { + putchar('\n'); + for_each_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + return 0; + } + + /* + * We only test that we can get the register and then write back the + * same value. Some registers may allow other values to be written + * back, but others only allow some bits to be changed, and at least + * for ID registers set will fail if the value does not exactly match + * what was returned by get. If registers that allow other values to + * be written need to have the other values tested, then we should + * create a new set of tests for those in a new independent test + * executable. + */ + for_each_reg(i) { + uint8_t addr[2048 / 8]; + struct kvm_one_reg reg = { + .id = reg_list->reg[i], + .addr = (__u64)&addr, + }; + int ret; + + ret = _vcpu_ioctl(vm, 0, KVM_GET_ONE_REG, ®); + if (ret) { + puts("Failed to get "); + print_reg(reg.id); + putchar('\n'); + ++failed_get; + } + + /* rejects_set registers are rejected after KVM_ARM_VCPU_FINALIZE */ + if (find_reg(rejects_set, rejects_set_n, reg.id)) { + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret != -1 || errno != EPERM) { + printf("Failed to reject (ret=%d, errno=%d) ", ret, errno); + print_reg(reg.id); + putchar('\n'); + ++failed_reject; + } + continue; + } + + ret = _vcpu_ioctl(vm, 0, KVM_SET_ONE_REG, ®); + if (ret) { + puts("Failed to set "); + print_reg(reg.id); + putchar('\n'); + ++failed_set; + } + } + + if (reg_list_sve()) { + blessed_n = base_regs_n + sve_regs_n; + vec_regs = sve_regs; + } else { + blessed_n = base_regs_n + vregs_n; + vec_regs = vregs; + } + + blessed_reg = calloc(blessed_n, sizeof(__u64)); + for (i = 0; i < base_regs_n; ++i) + blessed_reg[i] = base_regs[i]; + for (i = 0; i < blessed_n - base_regs_n; ++i) + blessed_reg[base_regs_n + i] = vec_regs[i]; + + for_each_new_reg(i) + ++new_regs; + + for_each_missing_reg(i) + ++missing_regs; + + if (new_regs || missing_regs) { + printf("Number blessed registers: %5lld\n", blessed_n); + printf("Number registers: %5lld\n", reg_list->n); + } + + if (new_regs) { + printf("\nThere are %d new registers.\n" + "Consider adding them to the blessed reg " + "list with the following lines:\n\n", new_regs); + for_each_new_reg(i) + print_reg(reg_list->reg[i]); + putchar('\n'); + } + + if (missing_regs) { + printf("\nThere are %d missing registers.\n" + "The following lines are missing registers:\n\n", missing_regs); + for_each_missing_reg(i) + print_reg(blessed_reg[i]); + putchar('\n'); + } + + TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject, + "There are %d missing registers; " + "%d registers failed get; %d registers failed set; %d registers failed reject", + missing_regs, failed_get, failed_set, failed_reject); + + return 0; +} + +/* + * The current blessed list was primed with the output of kernel version + * v4.15 with --core-reg-fixup and then later updated with new registers. + */ +static __u64 base_regs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr), + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr), + KVM_REG_ARM_FW_REG(0), + KVM_REG_ARM_FW_REG(1), + KVM_REG_ARM_FW_REG(2), + ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */ + ARM64_SYS_REG(3, 3, 14, 0, 2), + ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */ + ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */ + ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */ + ARM64_SYS_REG(2, 0, 0, 0, 4), + ARM64_SYS_REG(2, 0, 0, 0, 5), + ARM64_SYS_REG(2, 0, 0, 0, 6), + ARM64_SYS_REG(2, 0, 0, 0, 7), + ARM64_SYS_REG(2, 0, 0, 1, 4), + ARM64_SYS_REG(2, 0, 0, 1, 5), + ARM64_SYS_REG(2, 0, 0, 1, 6), + ARM64_SYS_REG(2, 0, 0, 1, 7), + ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */ + ARM64_SYS_REG(2, 0, 0, 2, 4), + ARM64_SYS_REG(2, 0, 0, 2, 5), + ARM64_SYS_REG(2, 0, 0, 2, 6), + ARM64_SYS_REG(2, 0, 0, 2, 7), + ARM64_SYS_REG(2, 0, 0, 3, 4), + ARM64_SYS_REG(2, 0, 0, 3, 5), + ARM64_SYS_REG(2, 0, 0, 3, 6), + ARM64_SYS_REG(2, 0, 0, 3, 7), + ARM64_SYS_REG(2, 0, 0, 4, 4), + ARM64_SYS_REG(2, 0, 0, 4, 5), + ARM64_SYS_REG(2, 0, 0, 4, 6), + ARM64_SYS_REG(2, 0, 0, 4, 7), + ARM64_SYS_REG(2, 0, 0, 5, 4), + ARM64_SYS_REG(2, 0, 0, 5, 5), + ARM64_SYS_REG(2, 0, 0, 5, 6), + ARM64_SYS_REG(2, 0, 0, 5, 7), + ARM64_SYS_REG(2, 0, 0, 6, 4), + ARM64_SYS_REG(2, 0, 0, 6, 5), + ARM64_SYS_REG(2, 0, 0, 6, 6), + ARM64_SYS_REG(2, 0, 0, 6, 7), + ARM64_SYS_REG(2, 0, 0, 7, 4), + ARM64_SYS_REG(2, 0, 0, 7, 5), + ARM64_SYS_REG(2, 0, 0, 7, 6), + ARM64_SYS_REG(2, 0, 0, 7, 7), + ARM64_SYS_REG(2, 0, 0, 8, 4), + ARM64_SYS_REG(2, 0, 0, 8, 5), + ARM64_SYS_REG(2, 0, 0, 8, 6), + ARM64_SYS_REG(2, 0, 0, 8, 7), + ARM64_SYS_REG(2, 0, 0, 9, 4), + ARM64_SYS_REG(2, 0, 0, 9, 5), + ARM64_SYS_REG(2, 0, 0, 9, 6), + ARM64_SYS_REG(2, 0, 0, 9, 7), + ARM64_SYS_REG(2, 0, 0, 10, 4), + ARM64_SYS_REG(2, 0, 0, 10, 5), + ARM64_SYS_REG(2, 0, 0, 10, 6), + ARM64_SYS_REG(2, 0, 0, 10, 7), + ARM64_SYS_REG(2, 0, 0, 11, 4), + ARM64_SYS_REG(2, 0, 0, 11, 5), + ARM64_SYS_REG(2, 0, 0, 11, 6), + ARM64_SYS_REG(2, 0, 0, 11, 7), + ARM64_SYS_REG(2, 0, 0, 12, 4), + ARM64_SYS_REG(2, 0, 0, 12, 5), + ARM64_SYS_REG(2, 0, 0, 12, 6), + ARM64_SYS_REG(2, 0, 0, 12, 7), + ARM64_SYS_REG(2, 0, 0, 13, 4), + ARM64_SYS_REG(2, 0, 0, 13, 5), + ARM64_SYS_REG(2, 0, 0, 13, 6), + ARM64_SYS_REG(2, 0, 0, 13, 7), + ARM64_SYS_REG(2, 0, 0, 14, 4), + ARM64_SYS_REG(2, 0, 0, 14, 5), + ARM64_SYS_REG(2, 0, 0, 14, 6), + ARM64_SYS_REG(2, 0, 0, 14, 7), + ARM64_SYS_REG(2, 0, 0, 15, 4), + ARM64_SYS_REG(2, 0, 0, 15, 5), + ARM64_SYS_REG(2, 0, 0, 15, 6), + ARM64_SYS_REG(2, 0, 0, 15, 7), + ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */ + ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */ + ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 3), + ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */ + ARM64_SYS_REG(3, 0, 0, 3, 7), + ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 2), + ARM64_SYS_REG(3, 0, 0, 4, 3), + ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 4, 5), + ARM64_SYS_REG(3, 0, 0, 4, 6), + ARM64_SYS_REG(3, 0, 0, 4, 7), + ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 2), + ARM64_SYS_REG(3, 0, 0, 5, 3), + ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 5, 6), + ARM64_SYS_REG(3, 0, 0, 5, 7), + ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 6, 2), + ARM64_SYS_REG(3, 0, 0, 6, 3), + ARM64_SYS_REG(3, 0, 0, 6, 4), + ARM64_SYS_REG(3, 0, 0, 6, 5), + ARM64_SYS_REG(3, 0, 0, 6, 6), + ARM64_SYS_REG(3, 0, 0, 6, 7), + ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), + ARM64_SYS_REG(3, 0, 0, 7, 4), + ARM64_SYS_REG(3, 0, 0, 7, 5), + ARM64_SYS_REG(3, 0, 0, 7, 6), + ARM64_SYS_REG(3, 0, 0, 7, 7), + ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ + ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ + ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */ + ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */ + ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */ + ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */ + ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */ + ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ + ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */ + ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ + ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ + ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */ + ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */ + ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ + ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ + ARM64_SYS_REG(3, 3, 14, 8, 0), + ARM64_SYS_REG(3, 3, 14, 8, 1), + ARM64_SYS_REG(3, 3, 14, 8, 2), + ARM64_SYS_REG(3, 3, 14, 8, 3), + ARM64_SYS_REG(3, 3, 14, 8, 4), + ARM64_SYS_REG(3, 3, 14, 8, 5), + ARM64_SYS_REG(3, 3, 14, 8, 6), + ARM64_SYS_REG(3, 3, 14, 8, 7), + ARM64_SYS_REG(3, 3, 14, 9, 0), + ARM64_SYS_REG(3, 3, 14, 9, 1), + ARM64_SYS_REG(3, 3, 14, 9, 2), + ARM64_SYS_REG(3, 3, 14, 9, 3), + ARM64_SYS_REG(3, 3, 14, 9, 4), + ARM64_SYS_REG(3, 3, 14, 9, 5), + ARM64_SYS_REG(3, 3, 14, 9, 6), + ARM64_SYS_REG(3, 3, 14, 9, 7), + ARM64_SYS_REG(3, 3, 14, 10, 0), + ARM64_SYS_REG(3, 3, 14, 10, 1), + ARM64_SYS_REG(3, 3, 14, 10, 2), + ARM64_SYS_REG(3, 3, 14, 10, 3), + ARM64_SYS_REG(3, 3, 14, 10, 4), + ARM64_SYS_REG(3, 3, 14, 10, 5), + ARM64_SYS_REG(3, 3, 14, 10, 6), + ARM64_SYS_REG(3, 3, 14, 10, 7), + ARM64_SYS_REG(3, 3, 14, 11, 0), + ARM64_SYS_REG(3, 3, 14, 11, 1), + ARM64_SYS_REG(3, 3, 14, 11, 2), + ARM64_SYS_REG(3, 3, 14, 11, 3), + ARM64_SYS_REG(3, 3, 14, 11, 4), + ARM64_SYS_REG(3, 3, 14, 11, 5), + ARM64_SYS_REG(3, 3, 14, 11, 6), + ARM64_SYS_REG(3, 3, 14, 12, 0), + ARM64_SYS_REG(3, 3, 14, 12, 1), + ARM64_SYS_REG(3, 3, 14, 12, 2), + ARM64_SYS_REG(3, 3, 14, 12, 3), + ARM64_SYS_REG(3, 3, 14, 12, 4), + ARM64_SYS_REG(3, 3, 14, 12, 5), + ARM64_SYS_REG(3, 3, 14, 12, 6), + ARM64_SYS_REG(3, 3, 14, 12, 7), + ARM64_SYS_REG(3, 3, 14, 13, 0), + ARM64_SYS_REG(3, 3, 14, 13, 1), + ARM64_SYS_REG(3, 3, 14, 13, 2), + ARM64_SYS_REG(3, 3, 14, 13, 3), + ARM64_SYS_REG(3, 3, 14, 13, 4), + ARM64_SYS_REG(3, 3, 14, 13, 5), + ARM64_SYS_REG(3, 3, 14, 13, 6), + ARM64_SYS_REG(3, 3, 14, 13, 7), + ARM64_SYS_REG(3, 3, 14, 14, 0), + ARM64_SYS_REG(3, 3, 14, 14, 1), + ARM64_SYS_REG(3, 3, 14, 14, 2), + ARM64_SYS_REG(3, 3, 14, 14, 3), + ARM64_SYS_REG(3, 3, 14, 14, 4), + ARM64_SYS_REG(3, 3, 14, 14, 5), + ARM64_SYS_REG(3, 3, 14, 14, 6), + ARM64_SYS_REG(3, 3, 14, 14, 7), + ARM64_SYS_REG(3, 3, 14, 15, 0), + ARM64_SYS_REG(3, 3, 14, 15, 1), + ARM64_SYS_REG(3, 3, 14, 15, 2), + ARM64_SYS_REG(3, 3, 14, 15, 3), + ARM64_SYS_REG(3, 3, 14, 15, 4), + ARM64_SYS_REG(3, 3, 14, 15, 5), + ARM64_SYS_REG(3, 3, 14, 15, 6), + ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */ + ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */ + ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */ + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 0, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 1, + KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | 2, +}; +static __u64 base_regs_n = ARRAY_SIZE(base_regs); + +static __u64 vregs[] = { + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]), + KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]), +}; +static __u64 vregs_n = ARRAY_SIZE(vregs); + +static __u64 sve_regs[] = { + KVM_REG_ARM64_SVE_VLS, + KVM_REG_ARM64_SVE_ZREG(0, 0), + KVM_REG_ARM64_SVE_ZREG(1, 0), + KVM_REG_ARM64_SVE_ZREG(2, 0), + KVM_REG_ARM64_SVE_ZREG(3, 0), + KVM_REG_ARM64_SVE_ZREG(4, 0), + KVM_REG_ARM64_SVE_ZREG(5, 0), + KVM_REG_ARM64_SVE_ZREG(6, 0), + KVM_REG_ARM64_SVE_ZREG(7, 0), + KVM_REG_ARM64_SVE_ZREG(8, 0), + KVM_REG_ARM64_SVE_ZREG(9, 0), + KVM_REG_ARM64_SVE_ZREG(10, 0), + KVM_REG_ARM64_SVE_ZREG(11, 0), + KVM_REG_ARM64_SVE_ZREG(12, 0), + KVM_REG_ARM64_SVE_ZREG(13, 0), + KVM_REG_ARM64_SVE_ZREG(14, 0), + KVM_REG_ARM64_SVE_ZREG(15, 0), + KVM_REG_ARM64_SVE_ZREG(16, 0), + KVM_REG_ARM64_SVE_ZREG(17, 0), + KVM_REG_ARM64_SVE_ZREG(18, 0), + KVM_REG_ARM64_SVE_ZREG(19, 0), + KVM_REG_ARM64_SVE_ZREG(20, 0), + KVM_REG_ARM64_SVE_ZREG(21, 0), + KVM_REG_ARM64_SVE_ZREG(22, 0), + KVM_REG_ARM64_SVE_ZREG(23, 0), + KVM_REG_ARM64_SVE_ZREG(24, 0), + KVM_REG_ARM64_SVE_ZREG(25, 0), + KVM_REG_ARM64_SVE_ZREG(26, 0), + KVM_REG_ARM64_SVE_ZREG(27, 0), + KVM_REG_ARM64_SVE_ZREG(28, 0), + KVM_REG_ARM64_SVE_ZREG(29, 0), + KVM_REG_ARM64_SVE_ZREG(30, 0), + KVM_REG_ARM64_SVE_ZREG(31, 0), + KVM_REG_ARM64_SVE_PREG(0, 0), + KVM_REG_ARM64_SVE_PREG(1, 0), + KVM_REG_ARM64_SVE_PREG(2, 0), + KVM_REG_ARM64_SVE_PREG(3, 0), + KVM_REG_ARM64_SVE_PREG(4, 0), + KVM_REG_ARM64_SVE_PREG(5, 0), + KVM_REG_ARM64_SVE_PREG(6, 0), + KVM_REG_ARM64_SVE_PREG(7, 0), + KVM_REG_ARM64_SVE_PREG(8, 0), + KVM_REG_ARM64_SVE_PREG(9, 0), + KVM_REG_ARM64_SVE_PREG(10, 0), + KVM_REG_ARM64_SVE_PREG(11, 0), + KVM_REG_ARM64_SVE_PREG(12, 0), + KVM_REG_ARM64_SVE_PREG(13, 0), + KVM_REG_ARM64_SVE_PREG(14, 0), + KVM_REG_ARM64_SVE_PREG(15, 0), + KVM_REG_ARM64_SVE_FFR(0), + ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */ +}; +static __u64 sve_regs_n = ARRAY_SIZE(sve_regs); + +static __u64 rejects_set[] = { +#ifdef REG_LIST_SVE + KVM_REG_ARM64_SVE_VLS, +#endif +}; +static __u64 rejects_set_n = ARRAY_SIZE(rejects_set); diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c deleted file mode 100644 index 11672ec6f74e..000000000000 --- a/tools/testing/selftests/kvm/clear_dirty_log_test.c +++ /dev/null @@ -1,6 +0,0 @@ -#define USE_CLEAR_DIRTY_LOG -#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0) -#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1) -#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ - KVM_DIRTY_LOG_INITIALLY_SET) -#include "dirty_log_test.c" diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index 360cd3ea4cd6..3d96a7bfaff3 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -21,20 +21,12 @@ #include <linux/bitops.h> #include <linux/userfaultfd.h> -#include "test_util.h" -#include "kvm_util.h" +#include "perf_test_util.h" #include "processor.h" +#include "test_util.h" #ifdef __NR_userfaultfd -/* The memory slot index demand page */ -#define TEST_MEM_SLOT_INDEX 1 - -/* Default guest test virtual memory offset */ -#define DEFAULT_GUEST_TEST_MEM 0xc0000000 - -#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */ - #ifdef PRINT_PER_PAGE_UPDATES #define PER_PAGE_DEBUG(...) printf(__VA_ARGS__) #else @@ -47,77 +39,17 @@ #define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__) #endif -#define MAX_VCPUS 512 - -/* - * Guest/Host shared variables. Ensure addr_gva2hva() and/or - * sync_global_to/from_guest() are used when accessing from - * the host. READ/WRITE_ONCE() should also be used with anything - * that may change. - */ -static uint64_t host_page_size; -static uint64_t guest_page_size; - static char *guest_data_prototype; -/* - * Guest physical memory offset of the testing memory slot. - * This will be set to the topmost valid physical address minus - * the test memory size. - */ -static uint64_t guest_test_phys_mem; - -/* - * Guest virtual memory offset of the testing memory slot. - * Must not conflict with identity mapped test code. - */ -static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; - -struct vcpu_args { - uint64_t gva; - uint64_t pages; - - /* Only used by the host userspace part of the vCPU thread */ - int vcpu_id; - struct kvm_vm *vm; -}; - -static struct vcpu_args vcpu_args[MAX_VCPUS]; - -/* - * Continuously write to the first 8 bytes of each page in the demand paging - * memory region. - */ -static void guest_code(uint32_t vcpu_id) -{ - uint64_t gva; - uint64_t pages; - int i; - - /* Make sure vCPU args data structure is not corrupt. */ - GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id); - - gva = vcpu_args[vcpu_id].gva; - pages = vcpu_args[vcpu_id].pages; - - for (i = 0; i < pages; i++) { - uint64_t addr = gva + (i * guest_page_size); - - addr &= ~(host_page_size - 1); - *(uint64_t *)addr = 0x0123456789ABCDEF; - } - - GUEST_SYNC(1); -} - static void *vcpu_worker(void *data) { int ret; - struct vcpu_args *args = (struct vcpu_args *)data; - struct kvm_vm *vm = args->vm; - int vcpu_id = args->vcpu_id; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + struct kvm_vm *vm = perf_test_args.vm; struct kvm_run *run; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; vcpu_args_set(vm, vcpu_id, 1, vcpu_id); run = vcpu_state(vm, vcpu_id); @@ -133,52 +65,18 @@ static void *vcpu_worker(void *data) exit_reason_str(run->exit_reason)); } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id, ts_diff.tv_sec, ts_diff.tv_nsec); return NULL; } -#define PAGE_SHIFT_4K 12 -#define PTES_PER_4K_PT 512 - -static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, - uint64_t vcpu_memory_bytes) -{ - struct kvm_vm *vm; - uint64_t pages = DEFAULT_GUEST_PHY_PAGES; - - /* Account for a few pages per-vCPU for stacks */ - pages += DEFAULT_STACK_PGS * vcpus; - - /* - * Reserve twice the ammount of memory needed to map the test region and - * the page table / stacks region, at 4k, for page tables. Do the - * calculation with 4K page size: the smallest of all archs. (e.g., 64K - * page size guest will need even less memory for page tables). - */ - pages += (2 * pages) / PTES_PER_4K_PT; - pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / - PTES_PER_4K_PT; - pages = vm_adjust_num_guest_pages(mode, pages); - - pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - - vm = _vm_create(mode, pages, O_RDWR); - kvm_vm_elf_load(vm, program_invocation_name, 0, 0); -#ifdef __x86_64__ - vm_create_irqchip(vm); -#endif - return vm; -} - static int handle_uffd_page_request(int uffd, uint64_t addr) { pid_t tid; struct timespec start; - struct timespec end; + struct timespec ts_diff; struct uffdio_copy copy; int r; @@ -186,7 +84,7 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) copy.src = (uint64_t)guest_data_prototype; copy.dst = addr; - copy.len = host_page_size; + copy.len = perf_test_args.host_page_size; copy.mode = 0; clock_gettime(CLOCK_MONOTONIC, &start); @@ -198,12 +96,12 @@ static int handle_uffd_page_request(int uffd, uint64_t addr) return r; } - clock_gettime(CLOCK_MONOTONIC, &end); + ts_diff = timespec_diff_now(start); PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid, - timespec_to_ns(timespec_sub(end, start))); + timespec_to_ns(ts_diff)); PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n", - host_page_size, addr, tid); + perf_test_args.host_page_size, addr, tid); return 0; } @@ -223,7 +121,8 @@ static void *uffd_handler_thread_fn(void *arg) int pipefd = uffd_args->pipefd; useconds_t delay = uffd_args->delay; int64_t pages = 0; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; clock_gettime(CLOCK_MONOTONIC, &start); while (!quit_uffd_thread) { @@ -292,8 +191,7 @@ static void *uffd_handler_thread_fn(void *arg) pages++; } - clock_gettime(CLOCK_MONOTONIC, &end); - ts_diff = timespec_sub(end, start); + ts_diff = timespec_diff_now(start); PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n", pages, ts_diff.tv_sec, ts_diff.tv_nsec, pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); @@ -351,99 +249,54 @@ static int setup_demand_paging(struct kvm_vm *vm, } static void run_test(enum vm_guest_mode mode, bool use_uffd, - useconds_t uffd_delay, int vcpus, - uint64_t vcpu_memory_bytes) + useconds_t uffd_delay) { pthread_t *vcpu_threads; pthread_t *uffd_handler_threads = NULL; struct uffd_handler_args *uffd_args = NULL; - struct timespec start, end, ts_diff; + struct timespec start; + struct timespec ts_diff; int *pipefds = NULL; struct kvm_vm *vm; - uint64_t guest_num_pages; int vcpu_id; int r; - vm = create_vm(mode, vcpus, vcpu_memory_bytes); - - guest_page_size = vm_get_page_size(vm); - - TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0, - "Guest memory size is not guest page size aligned."); - - guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size; - guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); - - /* - * If there should be more memory in the guest test region than there - * can be pages in the guest, it will definitely cause problems. - */ - TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), - "Requested more guest memory than address space allows.\n" - " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", - guest_num_pages, vm_get_max_gfn(vm), vcpus, - vcpu_memory_bytes); - - host_page_size = getpagesize(); - TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0, - "Guest memory size is not host page size aligned."); + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); - guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * - guest_page_size; - guest_test_phys_mem &= ~(host_page_size - 1); + perf_test_args.wr_fract = 1; -#ifdef __s390x__ - /* Align to 1M (segment size) */ - guest_test_phys_mem &= ~((1 << 20) - 1); -#endif - - pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); - - /* Add an extra memory slot for testing demand paging */ - vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, - guest_test_phys_mem, - TEST_MEM_SLOT_INDEX, - guest_num_pages, 0); - - /* Do mapping for the demand paging memory slot */ - virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); - - ucall_init(vm, NULL); - - guest_data_prototype = malloc(host_page_size); + guest_data_prototype = malloc(perf_test_args.host_page_size); TEST_ASSERT(guest_data_prototype, "Failed to allocate buffer for guest data pattern"); - memset(guest_data_prototype, 0xAB, host_page_size); + memset(guest_data_prototype, 0xAB, perf_test_args.host_page_size); - vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads)); + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + if (use_uffd) { uffd_handler_threads = - malloc(vcpus * sizeof(*uffd_handler_threads)); + malloc(nr_vcpus * sizeof(*uffd_handler_threads)); TEST_ASSERT(uffd_handler_threads, "Memory allocation failed"); - uffd_args = malloc(vcpus * sizeof(*uffd_args)); + uffd_args = malloc(nr_vcpus * sizeof(*uffd_args)); TEST_ASSERT(uffd_args, "Memory allocation failed"); - pipefds = malloc(sizeof(int) * vcpus * 2); + pipefds = malloc(sizeof(int) * nr_vcpus * 2); TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd"); - } - - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { - vm_paddr_t vcpu_gpa; - void *vcpu_hva; - vm_vcpu_add_default(vm, vcpu_id, guest_code); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + vm_paddr_t vcpu_gpa; + void *vcpu_hva; - vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); - PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", - vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + vcpu_gpa = guest_test_phys_mem + (vcpu_id * guest_percpu_mem_size); + PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + guest_percpu_mem_size); - /* Cache the HVA pointer of the region */ - vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); + /* Cache the HVA pointer of the region */ + vcpu_hva = addr_gpa2hva(vm, vcpu_gpa); - if (use_uffd) { /* * Set up user fault fd to handle demand paging * requests. @@ -456,53 +309,41 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, &uffd_handler_threads[vcpu_id], pipefds[vcpu_id * 2], uffd_delay, &uffd_args[vcpu_id], - vcpu_hva, vcpu_memory_bytes); + vcpu_hva, guest_percpu_mem_size); if (r < 0) exit(-r); } - -#ifdef __x86_64__ - vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); -#endif - - vcpu_args[vcpu_id].vm = vm; - vcpu_args[vcpu_id].vcpu_id = vcpu_id; - vcpu_args[vcpu_id].gva = guest_test_virt_mem + - (vcpu_id * vcpu_memory_bytes); - vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size; } /* Export the shared variables to the guest */ - sync_global_to_guest(vm, host_page_size); - sync_global_to_guest(vm, guest_page_size); - sync_global_to_guest(vm, vcpu_args); + sync_global_to_guest(vm, perf_test_args); pr_info("Finished creating vCPUs and starting uffd threads\n"); clock_gettime(CLOCK_MONOTONIC, &start); - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, - &vcpu_args[vcpu_id]); + &perf_test_args.vcpu_args[vcpu_id]); } pr_info("Started all vCPUs\n"); /* Wait for the vcpu threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { pthread_join(vcpu_threads[vcpu_id], NULL); PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id); } - pr_info("All vCPU threads joined\n"); + ts_diff = timespec_diff_now(start); - clock_gettime(CLOCK_MONOTONIC, &end); + pr_info("All vCPU threads joined\n"); if (use_uffd) { char c; /* Tell the user fault fd handler threads to quit */ - for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { r = write(pipefds[vcpu_id * 2 + 1], &c, 1); TEST_ASSERT(r == 1, "Unable to write to pipefd"); @@ -510,11 +351,11 @@ static void run_test(enum vm_guest_mode mode, bool use_uffd, } } - ts_diff = timespec_sub(end, start); pr_info("Total guest execution time: %ld.%.9lds\n", ts_diff.tv_sec, ts_diff.tv_nsec); pr_info("Overall demand paging rate: %f pgs/sec\n", - guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); + perf_test_args.vcpu_args[0].pages * nr_vcpus / + ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0)); ucall_uninit(vm); kvm_vm_free(vm); @@ -568,9 +409,8 @@ static void help(char *name) int main(int argc, char *argv[]) { + int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); bool mode_selected = false; - uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE; - int vcpus = 1; unsigned int mode; int opt, i; bool use_uffd = false; @@ -619,15 +459,12 @@ int main(int argc, char *argv[]) "A negative UFFD delay is not supported."); break; case 'b': - vcpu_memory_bytes = parse_size(optarg); + guest_percpu_mem_size = parse_size(optarg); break; case 'v': - vcpus = atoi(optarg); - TEST_ASSERT(vcpus > 0, - "Must have a positive number of vCPUs"); - TEST_ASSERT(vcpus <= MAX_VCPUS, - "This test does not currently support\n" - "more than %d vCPUs.", MAX_VCPUS); + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus, + "Invalid number of vcpus, must be between 1 and %d", max_vcpus); break; case 'h': default: @@ -642,7 +479,7 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes); + run_test(i, use_uffd, uffd_delay); } return 0; diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c new file mode 100644 index 000000000000..85c9b8f73142 --- /dev/null +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -0,0 +1,376 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KVM dirty page logging performance test + * + * Based on dirty_log_test.c + * + * Copyright (C) 2018, Red Hat, Inc. + * Copyright (C) 2020, Google, Inc. + */ + +#define _GNU_SOURCE /* for program_invocation_name */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <pthread.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> + +#include "kvm_util.h" +#include "perf_test_util.h" +#include "processor.h" +#include "test_util.h" + +/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/ +#define TEST_HOST_LOOP_N 2UL + +/* Host variables */ +static bool host_quit; +static uint64_t iteration; +static uint64_t vcpu_last_completed_iteration[MAX_VCPUS]; + +static void *vcpu_worker(void *data) +{ + int ret; + struct kvm_vm *vm = perf_test_args.vm; + uint64_t pages_count = 0; + struct kvm_run *run; + struct timespec start; + struct timespec ts_diff; + struct timespec total = (struct timespec){0}; + struct timespec avg; + struct vcpu_args *vcpu_args = (struct vcpu_args *)data; + int vcpu_id = vcpu_args->vcpu_id; + + vcpu_args_set(vm, vcpu_id, 1, vcpu_id); + run = vcpu_state(vm, vcpu_id); + + while (!READ_ONCE(host_quit)) { + uint64_t current_iteration = READ_ONCE(iteration); + + clock_gettime(CLOCK_MONOTONIC, &start); + ret = _vcpu_run(vm, vcpu_id); + ts_diff = timespec_diff_now(start); + + TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret); + TEST_ASSERT(get_ucall(vm, vcpu_id, NULL) == UCALL_SYNC, + "Invalid guest sync status: exit_reason=%s\n", + exit_reason_str(run->exit_reason)); + + pr_debug("Got sync event from vCPU %d\n", vcpu_id); + vcpu_last_completed_iteration[vcpu_id] = current_iteration; + pr_debug("vCPU %d updated last completed iteration to %lu\n", + vcpu_id, vcpu_last_completed_iteration[vcpu_id]); + + if (current_iteration) { + pages_count += vcpu_args->pages; + total = timespec_add(total, ts_diff); + pr_debug("vCPU %d iteration %lu dirty memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } else { + pr_debug("vCPU %d iteration %lu populate memory time: %ld.%.9lds\n", + vcpu_id, current_iteration, ts_diff.tv_sec, + ts_diff.tv_nsec); + } + + while (current_iteration == READ_ONCE(iteration) && + !READ_ONCE(host_quit)) {} + } + + avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_id]); + pr_debug("\nvCPU %d dirtied 0x%lx pages over %lu iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + vcpu_id, pages_count, vcpu_last_completed_iteration[vcpu_id], + total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec); + + return NULL; +} + +#ifdef USE_CLEAR_DIRTY_LOG +static u64 dirty_log_manual_caps; +#endif + +static void run_test(enum vm_guest_mode mode, unsigned long iterations, + uint64_t phys_offset, int wr_fract) +{ + pthread_t *vcpu_threads; + struct kvm_vm *vm; + unsigned long *bmap; + uint64_t guest_num_pages; + uint64_t host_num_pages; + int vcpu_id; + struct timespec start; + struct timespec ts_diff; + struct timespec get_dirty_log_total = (struct timespec){0}; + struct timespec vcpu_dirty_total = (struct timespec){0}; + struct timespec avg; +#ifdef USE_CLEAR_DIRTY_LOG + struct kvm_enable_cap cap = {}; + struct timespec clear_dirty_log_total = (struct timespec){0}; +#endif + + vm = create_vm(mode, nr_vcpus, guest_percpu_mem_size); + + perf_test_args.wr_fract = wr_fract; + + guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + host_num_pages = vm_num_host_pages(mode, guest_num_pages); + bmap = bitmap_alloc(host_num_pages); + +#ifdef USE_CLEAR_DIRTY_LOG + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = dirty_log_manual_caps; + vm_enable_cap(vm, &cap); +#endif + + vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads)); + TEST_ASSERT(vcpu_threads, "Memory allocation failed"); + + add_vcpus(vm, nr_vcpus, guest_percpu_mem_size); + + sync_global_to_guest(vm, perf_test_args); + + /* Start the iterations */ + iteration = 0; + host_quit = false; + + clock_gettime(CLOCK_MONOTONIC, &start); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker, + &perf_test_args.vcpu_args[vcpu_id]); + } + + /* Allow the vCPU to populate memory */ + pr_debug("Starting iteration %lu - Populating\n", iteration); + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vcpu_last_completed_iteration == %lu\n", + iteration); + + ts_diff = timespec_diff_now(start); + pr_info("Populate memory time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + /* Enable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, + KVM_MEM_LOG_DIRTY_PAGES); + ts_diff = timespec_diff_now(start); + pr_info("Enabling dirty logging time: %ld.%.9lds\n\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + while (iteration < iterations) { + /* + * Incrementing the iteration number will start the vCPUs + * dirtying memory again. + */ + clock_gettime(CLOCK_MONOTONIC, &start); + iteration++; + + pr_debug("Starting iteration %lu\n", iteration); + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) { + while (READ_ONCE(vcpu_last_completed_iteration[vcpu_id]) != iteration) + pr_debug("Waiting for vCPU %d vcpu_last_completed_iteration == %lu\n", + vcpu_id, iteration); + } + + ts_diff = timespec_diff_now(start); + vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff); + pr_info("Iteration %lu dirty memory time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); + + ts_diff = timespec_diff_now(start); + get_dirty_log_total = timespec_add(get_dirty_log_total, + ts_diff); + pr_info("Iteration %lu get dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + clock_gettime(CLOCK_MONOTONIC, &start); + kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, + host_num_pages); + + ts_diff = timespec_diff_now(start); + clear_dirty_log_total = timespec_add(clear_dirty_log_total, + ts_diff); + pr_info("Iteration %lu clear dirty log time: %ld.%.9lds\n", + iteration, ts_diff.tv_sec, ts_diff.tv_nsec); +#endif + } + + /* Tell the vcpu thread to quit */ + host_quit = true; + for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) + pthread_join(vcpu_threads[vcpu_id], NULL); + + /* Disable dirty logging */ + clock_gettime(CLOCK_MONOTONIC, &start); + vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0); + ts_diff = timespec_diff_now(start); + pr_info("Disabling dirty logging time: %ld.%.9lds\n", + ts_diff.tv_sec, ts_diff.tv_nsec); + + avg = timespec_div(get_dirty_log_total, iterations); + pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, get_dirty_log_total.tv_sec, + get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); + +#ifdef USE_CLEAR_DIRTY_LOG + avg = timespec_div(clear_dirty_log_total, iterations); + pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n", + iterations, clear_dirty_log_total.tv_sec, + clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); +#endif + + free(bmap); + free(vcpu_threads); + ucall_uninit(vm); + kvm_vm_free(vm); +} + +struct guest_mode { + bool supported; + bool enabled; +}; +static struct guest_mode guest_modes[NUM_VM_MODES]; + +#define guest_mode_init(mode, supported, enabled) ({ \ + guest_modes[mode] = (struct guest_mode){ supported, enabled }; \ +}) + +static void help(char *name) +{ + int i; + + puts(""); + printf("usage: %s [-h] [-i iterations] [-p offset] " + "[-m mode] [-b vcpu bytes] [-v vcpus]\n", name); + puts(""); + printf(" -i: specify iteration counts (default: %"PRIu64")\n", + TEST_HOST_LOOP_N); + printf(" -p: specify guest physical test memory offset\n" + " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -m: specify the guest mode ID to test " + "(default: test all supported modes)\n" + " This option may be used multiple times.\n" + " Guest mode IDs:\n"); + for (i = 0; i < NUM_VM_MODES; ++i) { + printf(" %d: %s%s\n", i, vm_guest_mode_string(i), + guest_modes[i].supported ? " (supported)" : ""); + } + printf(" -b: specify the size of the memory region which should be\n" + " dirtied by each vCPU. e.g. 10M or 3G.\n" + " (default: 1G)\n"); + printf(" -f: specify the fraction of pages which should be written to\n" + " as opposed to simply read, in the form\n" + " 1/<fraction of pages to write>.\n" + " (default: 1 i.e. all pages are written to.)\n"); + printf(" -v: specify the number of vCPUs to run.\n"); + puts(""); + exit(0); +} + +int main(int argc, char *argv[]) +{ + unsigned long iterations = TEST_HOST_LOOP_N; + bool mode_selected = false; + uint64_t phys_offset = 0; + unsigned int mode; + int opt, i; + int wr_fract = 1; + +#ifdef USE_CLEAR_DIRTY_LOG + dirty_log_manual_caps = + kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + if (!dirty_log_manual_caps) { + print_skip("KVM_CLEAR_DIRTY_LOG not available"); + exit(KSFT_SKIP); + } + dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); +#endif + +#ifdef __x86_64__ + guest_mode_init(VM_MODE_PXXV48_4K, true, true); +#endif +#ifdef __aarch64__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); + guest_mode_init(VM_MODE_P40V48_64K, true, true); + + { + unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE); + + if (limit >= 52) + guest_mode_init(VM_MODE_P52V48_64K, true, true); + if (limit >= 48) { + guest_mode_init(VM_MODE_P48V48_4K, true, true); + guest_mode_init(VM_MODE_P48V48_64K, true, true); + } + } +#endif +#ifdef __s390x__ + guest_mode_init(VM_MODE_P40V48_4K, true, true); +#endif + + while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:")) != -1) { + switch (opt) { + case 'i': + iterations = strtol(optarg, NULL, 10); + break; + case 'p': + phys_offset = strtoull(optarg, NULL, 0); + break; + case 'm': + if (!mode_selected) { + for (i = 0; i < NUM_VM_MODES; ++i) + guest_modes[i].enabled = false; + mode_selected = true; + } + mode = strtoul(optarg, NULL, 10); + TEST_ASSERT(mode < NUM_VM_MODES, + "Guest mode ID %d too big", mode); + guest_modes[mode].enabled = true; + break; + case 'b': + guest_percpu_mem_size = parse_size(optarg); + break; + case 'f': + wr_fract = atoi(optarg); + TEST_ASSERT(wr_fract >= 1, + "Write fraction cannot be less than one"); + break; + case 'v': + nr_vcpus = atoi(optarg); + TEST_ASSERT(nr_vcpus > 0, + "Must have a positive number of vCPUs"); + TEST_ASSERT(nr_vcpus <= MAX_VCPUS, + "This test does not currently support\n" + "more than %d vCPUs.", MAX_VCPUS); + break; + case 'h': + default: + help(argv[0]); + break; + } + } + + TEST_ASSERT(iterations >= 2, "The test should have at least two iterations"); + + pr_info("Test iterations: %"PRIu64"\n", iterations); + + for (i = 0; i < NUM_VM_MODES; ++i) { + if (!guest_modes[i].enabled) + continue; + TEST_ASSERT(guest_modes[i].supported, + "Guest mode ID %d (%s) not supported.", + i, vm_guest_mode_string(i)); + run_test(i, iterations, phys_offset, wr_fract); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c index 752ec158ac59..54da9cc20db4 100644 --- a/tools/testing/selftests/kvm/dirty_log_test.c +++ b/tools/testing/selftests/kvm/dirty_log_test.c @@ -128,6 +128,78 @@ static uint64_t host_dirty_count; static uint64_t host_clear_count; static uint64_t host_track_next_count; +enum log_mode_t { + /* Only use KVM_GET_DIRTY_LOG for logging */ + LOG_MODE_DIRTY_LOG = 0, + + /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */ + LOG_MODE_CLEAR_LOG = 1, + + LOG_MODE_NUM, + + /* Run all supported modes */ + LOG_MODE_ALL = LOG_MODE_NUM, +}; + +/* Mode of logging to test. Default is to run all supported modes */ +static enum log_mode_t host_log_mode_option = LOG_MODE_ALL; +/* Logging mode for current run */ +static enum log_mode_t host_log_mode; + +static bool clear_log_supported(void) +{ + return kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); +} + +static void clear_log_create_vm_done(struct kvm_vm *vm) +{ + struct kvm_enable_cap cap = {}; + u64 manual_caps; + + manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); + TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!"); + manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | + KVM_DIRTY_LOG_INITIALLY_SET); + cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; + cap.args[0] = manual_caps; + vm_enable_cap(vm, &cap); +} + +static void dirty_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); +} + +static void clear_log_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + kvm_vm_get_dirty_log(vm, slot, bitmap); + kvm_vm_clear_dirty_log(vm, slot, bitmap, 0, num_pages); +} + +struct log_mode { + const char *name; + /* Return true if this mode is supported, otherwise false */ + bool (*supported)(void); + /* Hook when the vm creation is done (before vcpu creation) */ + void (*create_vm_done)(struct kvm_vm *vm); + /* Hook to collect the dirty pages into the bitmap provided */ + void (*collect_dirty_pages) (struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages); +} log_modes[LOG_MODE_NUM] = { + { + .name = "dirty-log", + .collect_dirty_pages = dirty_log_collect_dirty_pages, + }, + { + .name = "clear-log", + .supported = clear_log_supported, + .create_vm_done = clear_log_create_vm_done, + .collect_dirty_pages = clear_log_collect_dirty_pages, + }, +}; + /* * We use this bitmap to track some pages that should have its dirty * bit set in the _next_ iteration. For example, if we detected the @@ -137,6 +209,44 @@ static uint64_t host_track_next_count; */ static unsigned long *host_bmap_track; +static void log_modes_dump(void) +{ + int i; + + printf("all"); + for (i = 0; i < LOG_MODE_NUM; i++) + printf(", %s", log_modes[i].name); + printf("\n"); +} + +static bool log_mode_supported(void) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->supported) + return mode->supported(); + + return true; +} + +static void log_mode_create_vm_done(struct kvm_vm *vm) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + if (mode->create_vm_done) + mode->create_vm_done(vm); +} + +static void log_mode_collect_dirty_pages(struct kvm_vm *vm, int slot, + void *bitmap, uint32_t num_pages) +{ + struct log_mode *mode = &log_modes[host_log_mode]; + + TEST_ASSERT(mode->collect_dirty_pages != NULL, + "collect_dirty_pages() is required for any log mode!"); + mode->collect_dirty_pages(vm, slot, bitmap, num_pages); +} + static void generate_random_array(uint64_t *guest_array, uint64_t size) { uint64_t i; @@ -195,7 +305,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap) page); } - if (test_bit_le(page, bmap)) { + if (test_and_clear_bit_le(page, bmap)) { host_dirty_count++; /* * If the bit is set, the value written onto @@ -252,11 +362,12 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); - vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); + vm = vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); kvm_vm_elf_load(vm, program_invocation_name, 0, 0); #ifdef __x86_64__ vm_create_irqchip(vm); #endif + log_mode_create_vm_done(vm); vm_vcpu_add_default(vm, vcpuid, guest_code); return vm; } @@ -264,10 +375,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid, #define DIRTY_MEM_BITS 30 /* 1G */ #define PAGE_SHIFT_4K 12 -#ifdef USE_CLEAR_DIRTY_LOG -static u64 dirty_log_manual_caps; -#endif - static void run_test(enum vm_guest_mode mode, unsigned long iterations, unsigned long interval, uint64_t phys_offset) { @@ -275,6 +382,12 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, struct kvm_vm *vm; unsigned long *bmap; + if (!log_mode_supported()) { + print_skip("Log mode '%s' not supported", + log_modes[host_log_mode].name); + return; + } + /* * We reserve page table for 2 times of extra dirty mem which * will definitely cover the original (1G+) test range. Here @@ -317,14 +430,6 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, bmap = bitmap_alloc(host_num_pages); host_bmap_track = bitmap_alloc(host_num_pages); -#ifdef USE_CLEAR_DIRTY_LOG - struct kvm_enable_cap cap = {}; - - cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; - cap.args[0] = dirty_log_manual_caps; - vm_enable_cap(vm, &cap); -#endif - /* Add an extra memory slot for testing dirty logging */ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, guest_test_phys_mem, @@ -362,11 +467,8 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations, while (iteration < iterations) { /* Give the vcpu thread some time to dirty some pages */ usleep(interval * 1000); - kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap); -#ifdef USE_CLEAR_DIRTY_LOG - kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0, - host_num_pages); -#endif + log_mode_collect_dirty_pages(vm, TEST_MEM_SLOT_INDEX, + bmap, host_num_pages); vm_dirty_log_verify(mode, bmap); iteration++; sync_global_to_guest(vm, iteration); @@ -410,6 +512,9 @@ static void help(char *name) TEST_HOST_LOOP_INTERVAL); printf(" -p: specify guest physical test memory offset\n" " Warning: a low offset can conflict with the loaded test code.\n"); + printf(" -M: specify the host logging mode " + "(default: run all log modes). Supported modes: \n\t"); + log_modes_dump(); printf(" -m: specify the guest mode ID to test " "(default: test all supported modes)\n" " This option may be used multiple times.\n" @@ -429,18 +534,7 @@ int main(int argc, char *argv[]) bool mode_selected = false; uint64_t phys_offset = 0; unsigned int mode; - int opt, i; - -#ifdef USE_CLEAR_DIRTY_LOG - dirty_log_manual_caps = - kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2); - if (!dirty_log_manual_caps) { - print_skip("KVM_CLEAR_DIRTY_LOG not available"); - exit(KSFT_SKIP); - } - dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | - KVM_DIRTY_LOG_INITIALLY_SET); -#endif + int opt, i, j; #ifdef __x86_64__ guest_mode_init(VM_MODE_PXXV48_4K, true, true); @@ -464,7 +558,7 @@ int main(int argc, char *argv[]) guest_mode_init(VM_MODE_P40V48_4K, true, true); #endif - while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) { + while ((opt = getopt(argc, argv, "hi:I:p:m:M:")) != -1) { switch (opt) { case 'i': iterations = strtol(optarg, NULL, 10); @@ -486,6 +580,26 @@ int main(int argc, char *argv[]) "Guest mode ID %d too big", mode); guest_modes[mode].enabled = true; break; + case 'M': + if (!strcmp(optarg, "all")) { + host_log_mode_option = LOG_MODE_ALL; + break; + } + for (i = 0; i < LOG_MODE_NUM; i++) { + if (!strcmp(optarg, log_modes[i].name)) { + pr_info("Setting log mode to: '%s'\n", + optarg); + host_log_mode_option = i; + break; + } + } + if (i == LOG_MODE_NUM) { + printf("Log mode '%s' invalid. Please choose " + "from: ", optarg); + log_modes_dump(); + exit(1); + } + break; case 'h': default: help(argv[0]); @@ -507,7 +621,18 @@ int main(int argc, char *argv[]) TEST_ASSERT(guest_modes[i].supported, "Guest mode ID %d (%s) not supported.", i, vm_guest_mode_string(i)); - run_test(i, iterations, interval, phys_offset); + if (host_log_mode_option == LOG_MODE_ALL) { + /* Run each log mode */ + for (j = 0; j < LOG_MODE_NUM; j++) { + pr_info("Testing Log Mode '%s'\n", + log_modes[j].name); + host_log_mode = j; + run_test(i, iterations, interval, phys_offset); + } + } else { + host_log_mode = host_log_mode_option; + run_test(i, iterations, interval, phys_offset); + } } return 0; diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 919e161dd289..7d29aa786959 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -63,9 +63,11 @@ enum vm_mem_backing_src_type { int kvm_check_cap(long cap); int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap); +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap); +void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size); struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp, int perm); void kvm_vm_release(struct kvm_vm *vmp); @@ -149,6 +151,7 @@ void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_guest_debug *debug); void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_mp_state *mp_state); +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid); void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs); @@ -294,6 +297,8 @@ int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd); memcpy(&(g), _p, sizeof(g)); \ }) +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid); + /* Common ucalls */ enum { UCALL_NONE, diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h new file mode 100644 index 000000000000..2618052057b1 --- /dev/null +++ b/tools/testing/selftests/kvm/include/perf_test_util.h @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tools/testing/selftests/kvm/include/perf_test_util.h + * + * Copyright (C) 2020, Google LLC. + */ + +#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H +#define SELFTEST_KVM_PERF_TEST_UTIL_H + +#include "kvm_util.h" +#include "processor.h" + +#define MAX_VCPUS 512 + +#define PAGE_SHIFT_4K 12 +#define PTES_PER_4K_PT 512 + +#define TEST_MEM_SLOT_INDEX 1 + +/* Default guest test virtual memory offset */ +#define DEFAULT_GUEST_TEST_MEM 0xc0000000 + +#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */ + +/* + * Guest physical memory offset of the testing memory slot. + * This will be set to the topmost valid physical address minus + * the test memory size. + */ +static uint64_t guest_test_phys_mem; + +/* + * Guest virtual memory offset of the testing memory slot. + * Must not conflict with identity mapped test code. + */ +static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM; +static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE; + +/* Number of VCPUs for the test */ +static int nr_vcpus = 1; + +struct vcpu_args { + uint64_t gva; + uint64_t pages; + + /* Only used by the host userspace part of the vCPU thread */ + int vcpu_id; +}; + +struct perf_test_args { + struct kvm_vm *vm; + uint64_t host_page_size; + uint64_t guest_page_size; + int wr_fract; + + struct vcpu_args vcpu_args[MAX_VCPUS]; +}; + +static struct perf_test_args perf_test_args; + +/* + * Continuously write to the first 8 bytes of each page in the + * specified region. + */ +static void guest_code(uint32_t vcpu_id) +{ + struct vcpu_args *vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + uint64_t gva; + uint64_t pages; + int i; + + /* Make sure vCPU args data structure is not corrupt. */ + GUEST_ASSERT(vcpu_args->vcpu_id == vcpu_id); + + gva = vcpu_args->gva; + pages = vcpu_args->pages; + + while (true) { + for (i = 0; i < pages; i++) { + uint64_t addr = gva + (i * perf_test_args.guest_page_size); + + if (i % perf_test_args.wr_fract == 0) + *(uint64_t *)addr = 0x0123456789ABCDEF; + else + READ_ONCE(*(uint64_t *)addr); + } + + GUEST_SYNC(1); + } +} + +static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus, + uint64_t vcpu_memory_bytes) +{ + struct kvm_vm *vm; + uint64_t pages = DEFAULT_GUEST_PHY_PAGES; + uint64_t guest_num_pages; + + /* Account for a few pages per-vCPU for stacks */ + pages += DEFAULT_STACK_PGS * vcpus; + + /* + * Reserve twice the ammount of memory needed to map the test region and + * the page table / stacks region, at 4k, for page tables. Do the + * calculation with 4K page size: the smallest of all archs. (e.g., 64K + * page size guest will need even less memory for page tables). + */ + pages += (2 * pages) / PTES_PER_4K_PT; + pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) / + PTES_PER_4K_PT; + pages = vm_adjust_num_guest_pages(mode, pages); + + pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode)); + + vm = vm_create(mode, pages, O_RDWR); + kvm_vm_elf_load(vm, program_invocation_name, 0, 0); +#ifdef __x86_64__ + vm_create_irqchip(vm); +#endif + + perf_test_args.vm = vm; + perf_test_args.guest_page_size = vm_get_page_size(vm); + perf_test_args.host_page_size = getpagesize(); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.guest_page_size == 0, + "Guest memory size is not guest page size aligned."); + + guest_num_pages = (vcpus * vcpu_memory_bytes) / + perf_test_args.guest_page_size; + guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); + + /* + * If there should be more memory in the guest test region than there + * can be pages in the guest, it will definitely cause problems. + */ + TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm), + "Requested more guest memory than address space allows.\n" + " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n", + guest_num_pages, vm_get_max_gfn(vm), vcpus, + vcpu_memory_bytes); + + TEST_ASSERT(vcpu_memory_bytes % perf_test_args.host_page_size == 0, + "Guest memory size is not host page size aligned."); + + guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) * + perf_test_args.guest_page_size; + guest_test_phys_mem &= ~(perf_test_args.host_page_size - 1); + +#ifdef __s390x__ + /* Align to 1M (segment size) */ + guest_test_phys_mem &= ~((1 << 20) - 1); +#endif + + pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem); + + /* Add an extra memory slot for testing */ + vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, + guest_test_phys_mem, + TEST_MEM_SLOT_INDEX, + guest_num_pages, 0); + + /* Do mapping for the demand paging memory slot */ + virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0); + + ucall_init(vm, NULL); + + return vm; +} + +static void add_vcpus(struct kvm_vm *vm, int vcpus, uint64_t vcpu_memory_bytes) +{ + vm_paddr_t vcpu_gpa; + struct vcpu_args *vcpu_args; + int vcpu_id; + + for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) { + vcpu_args = &perf_test_args.vcpu_args[vcpu_id]; + + vm_vcpu_add_default(vm, vcpu_id, guest_code); + +#ifdef __x86_64__ + vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid()); +#endif + + vcpu_args->vcpu_id = vcpu_id; + vcpu_args->gva = guest_test_virt_mem + + (vcpu_id * vcpu_memory_bytes); + vcpu_args->pages = vcpu_memory_bytes / + perf_test_args.guest_page_size; + + vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes); + pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n", + vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes); + } +} + +#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index 5eb01bf51b86..ffffa560436b 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -64,5 +64,7 @@ int64_t timespec_to_ns(struct timespec ts); struct timespec timespec_add_ns(struct timespec ts, int64_t ns); struct timespec timespec_add(struct timespec ts1, struct timespec ts2); struct timespec timespec_sub(struct timespec ts1, struct timespec ts2); +struct timespec timespec_diff_now(struct timespec start); +struct timespec timespec_div(struct timespec ts, int divisor); #endif /* SELFTEST_KVM_TEST_UTIL_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 82b7fe16a824..8e61340b3911 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -36,6 +36,8 @@ #define X86_CR4_SMAP (1ul << 21) #define X86_CR4_PKE (1ul << 22) +#define UNEXPECTED_VECTOR_PORT 0xfff0u + /* General Registers in 64-Bit Mode */ struct gpr64_regs { u64 rax; @@ -59,7 +61,7 @@ struct gpr64_regs { struct desc64 { uint16_t limit0; uint16_t base0; - unsigned base1:8, s:1, type:4, dpl:2, p:1; + unsigned base1:8, type:4, s:1, dpl:2, p:1; unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8; uint32_t base3; uint32_t zero1; @@ -239,6 +241,11 @@ static inline struct desc_ptr get_idt(void) return idt; } +static inline void outl(uint16_t port, uint32_t value) +{ + __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value)); +} + #define SET_XMM(__var, __xmm) \ asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm) @@ -338,6 +345,35 @@ uint32_t kvm_get_cpuid_max_basic(void); uint32_t kvm_get_cpuid_max_extended(void); void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); +struct ex_regs { + uint64_t rax, rcx, rdx, rbx; + uint64_t rbp, rsi, rdi; + uint64_t r8, r9, r10, r11; + uint64_t r12, r13, r14, r15; + uint64_t vector; + uint64_t error_code; + uint64_t rip; + uint64_t cs; + uint64_t rflags; +}; + +void vm_init_descriptor_tables(struct kvm_vm *vm); +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid); +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)); + +/* + * set_cpuid() - overwrites a matching cpuid entry with the provided value. + * matches based on ent->function && ent->index. returns true + * if a match was found and successfully overwritten. + * @cpuid: the kvm cpuid list to modify. + * @ent: cpuid entry to insert + */ +bool set_cpuid(struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 *ent); + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3); + /* * Basic CPU control in CR0 */ diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h index 54d624dd6c10..e78d7e26ba61 100644 --- a/tools/testing/selftests/kvm/include/x86_64/vmx.h +++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h @@ -573,6 +573,10 @@ struct vmx_pages { void *eptp_hva; uint64_t eptp_gpa; void *eptp; + + void *apic_access_hva; + uint64_t apic_access_gpa; + void *apic_access; }; union vmx_basic { @@ -615,5 +619,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t memslot, uint32_t eptp_memslot); void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, uint32_t eptp_memslot); +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot); #endif /* SELFTEST_KVM_VMX_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 2afa6618b396..d6c32c328e9a 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -350,3 +350,7 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...) va_end(ap); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c index c8e0ec20d3bf..2f37b90ee1a9 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@ -94,6 +94,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_MMIO && run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) { vm_vaddr_t gva; diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 74776ee228f2..126c6727a6b0 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -14,6 +14,7 @@ #include <sys/mman.h> #include <sys/types.h> #include <sys/stat.h> +#include <unistd.h> #include <linux/kernel.h> #define KVM_UTIL_PGS_PER_HUGEPG 512 @@ -85,6 +86,34 @@ int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap) return ret; } +/* VCPU Enable Capability + * + * Input Args: + * vm - Virtual Machine + * vcpu_id - VCPU + * cap - Capability + * + * Output Args: None + * + * Return: On success, 0. On failure a TEST_ASSERT failure is produced. + * + * Enables a capability (KVM_CAP_*) on the VCPU. + */ +int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id, + struct kvm_enable_cap *cap) +{ + struct vcpu *vcpu = vcpu_find(vm, vcpu_id); + int r; + + TEST_ASSERT(vcpu, "cannot find vcpu %d", vcpu_id); + + r = ioctl(vcpu->fd, KVM_ENABLE_CAP, cap); + TEST_ASSERT(!r, "KVM_ENABLE_CAP vCPU ioctl failed,\n" + " rc: %i, errno: %i", r, errno); + + return r; +} + static void vm_open(struct kvm_vm *vm, int perm) { vm->kvm_fd = open(KVM_DEV_PATH, perm); @@ -151,7 +180,7 @@ _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) * descriptor to control the created VM is created with the permissions * given by perm (e.g. O_RDWR). */ -struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) +struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) { struct kvm_vm *vm; @@ -242,11 +271,6 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) return vm; } -struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) -{ - return _vm_create(mode, phy_pages, perm); -} - /* * VM Restart * @@ -664,13 +688,21 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm, /* As needed perform madvise */ if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { - ret = madvise(region->host_mem, npages * vm->page_size, - src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); - TEST_ASSERT(ret == 0, "madvise failed,\n" - " addr: %p\n" - " length: 0x%lx\n" - " src_type: %x", - region->host_mem, npages * vm->page_size, src_type); + struct stat statbuf; + + ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf); + TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT), + "stat /sys/kernel/mm/transparent_hugepage"); + + TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP, + "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel"); + + if (ret == 0) { + ret = madvise(region->host_mem, npages * vm->page_size, + src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE); + TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x", + region->host_mem, npages * vm->page_size, src_type); + } } region->unused_phy_pages = sparsebit_alloc(); @@ -1195,6 +1227,9 @@ int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid) do { rc = ioctl(vcpu->fd, KVM_RUN, NULL); } while (rc == -1 && errno == EINTR); + + assert_on_unhandled_exception(vm, vcpuid); + return rc; } @@ -1252,6 +1287,35 @@ void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid, } /* + * VM VCPU Get Reg List + * + * Input Args: + * vm - Virtual Machine + * vcpuid - VCPU ID + * + * Output Args: + * None + * + * Return: + * A pointer to an allocated struct kvm_reg_list + * + * Get the list of guest registers which are supported for + * KVM_GET_ONE_REG/KVM_SET_ONE_REG calls + */ +struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list; + int ret; + + ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, ®_list_n); + TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0"); + reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64)); + reg_list->n = reg_list_n.n; + vcpu_ioctl(vm, vcpuid, KVM_GET_REG_LIST, reg_list); + return reg_list; +} + +/* * VM VCPU Regs Get * * Input Args: diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h index 2ef446520748..f07d383d03a1 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h +++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h @@ -50,6 +50,8 @@ struct kvm_vm { vm_paddr_t pgd; vm_vaddr_t gdt; vm_vaddr_t tss; + vm_vaddr_t idt; + vm_vaddr_t handlers; }; struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid); diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index a88c5d665725..7349bb2e1a24 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -241,3 +241,7 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent) fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n", indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr); } + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ +} diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c index fd589dc9bfab..9d3b0f15249a 100644 --- a/tools/testing/selftests/kvm/lib/s390x/ucall.c +++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c @@ -38,6 +38,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_S390_SIEIC && run->s390_sieic.icptcode == 4 && (run->s390_sieic.ipa >> 8) == 0x83 && /* 0x83 means DIAGNOSE */ diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index 689e97c27ee2..8e04c0b1608e 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -4,10 +4,13 @@ * * Copyright (C) 2020, Google LLC. */ -#include <stdlib.h> + +#include <assert.h> #include <ctype.h> #include <limits.h> -#include <assert.h> +#include <stdlib.h> +#include <time.h> + #include "test_util.h" /* @@ -81,6 +84,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2) return timespec_add_ns((struct timespec){0}, ns1 - ns2); } +struct timespec timespec_diff_now(struct timespec start) +{ + struct timespec end; + + clock_gettime(CLOCK_MONOTONIC, &end); + return timespec_sub(end, start); +} + +struct timespec timespec_div(struct timespec ts, int divisor) +{ + int64_t ns = timespec_to_ns(ts) / divisor; + + return timespec_add_ns((struct timespec){0}, ns); +} + void print_skip(const char *fmt, ...) { va_list ap; diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S new file mode 100644 index 000000000000..aaf7bc7d2ce1 --- /dev/null +++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S @@ -0,0 +1,81 @@ +handle_exception: + push %r15 + push %r14 + push %r13 + push %r12 + push %r11 + push %r10 + push %r9 + push %r8 + + push %rdi + push %rsi + push %rbp + push %rbx + push %rdx + push %rcx + push %rax + mov %rsp, %rdi + + call route_exception + + pop %rax + pop %rcx + pop %rdx + pop %rbx + pop %rbp + pop %rsi + pop %rdi + pop %r8 + pop %r9 + pop %r10 + pop %r11 + pop %r12 + pop %r13 + pop %r14 + pop %r15 + + /* Discard vector and error code. */ + add $16, %rsp + iretq + +/* + * Build the handle_exception wrappers which push the vector/error code on the + * stack and an array of pointers to those wrappers. + */ +.pushsection .rodata +.globl idt_handlers +idt_handlers: +.popsection + +.macro HANDLERS has_error from to + vector = \from + .rept \to - \from + 1 + .align 8 + + /* Fetch current address and append it to idt_handlers. */ + current_handler = . +.pushsection .rodata +.quad current_handler +.popsection + + .if ! \has_error + pushq $0 + .endif + pushq $vector + jmp handle_exception + vector = vector + 1 + .endr +.endm + +.global idt_handler_code +idt_handler_code: + HANDLERS has_error=0 from=0 to=7 + HANDLERS has_error=1 from=8 to=8 + HANDLERS has_error=0 from=9 to=9 + HANDLERS has_error=1 from=10 to=14 + HANDLERS has_error=0 from=15 to=16 + HANDLERS has_error=1 from=17 to=17 + HANDLERS has_error=0 from=18 to=255 + +.section .note.GNU-stack, "", %progbits diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index f6eb34eaa0d2..d10c5c05bdf0 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -12,9 +12,18 @@ #include "../kvm_util_internal.h" #include "processor.h" +#ifndef NUM_INTERRUPTS +#define NUM_INTERRUPTS 256 +#endif + +#define DEFAULT_CODE_SELECTOR 0x8 +#define DEFAULT_DATA_SELECTOR 0x10 + /* Minimum physical address used for virtual translation tables. */ #define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000 +vm_vaddr_t exception_handlers; + /* Virtual translation table structure declarations */ struct pageMapL4Entry { uint64_t present:1; @@ -392,11 +401,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp) desc->limit0 = segp->limit & 0xFFFF; desc->base0 = segp->base & 0xFFFF; desc->base1 = segp->base >> 16; - desc->s = segp->s; desc->type = segp->type; + desc->s = segp->s; desc->dpl = segp->dpl; desc->p = segp->present; desc->limit1 = segp->limit >> 16; + desc->avl = segp->avl; desc->l = segp->l; desc->db = segp->db; desc->g = segp->g; @@ -556,9 +566,9 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); - kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds); - kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es); + kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds); + kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es); kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot); break; @@ -1118,3 +1128,131 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits) *va_bits = (entry->eax >> 8) & 0xff; } } + +struct idt_entry { + uint16_t offset0; + uint16_t selector; + uint16_t ist : 3; + uint16_t : 5; + uint16_t type : 4; + uint16_t : 1; + uint16_t dpl : 2; + uint16_t p : 1; + uint16_t offset1; + uint32_t offset2; uint32_t reserved; +}; + +static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, + int dpl, unsigned short selector) +{ + struct idt_entry *base = + (struct idt_entry *)addr_gva2hva(vm, vm->idt); + struct idt_entry *e = &base[vector]; + + memset(e, 0, sizeof(*e)); + e->offset0 = addr; + e->selector = selector; + e->ist = 0; + e->type = 14; + e->dpl = dpl; + e->p = 1; + e->offset1 = addr >> 16; + e->offset2 = addr >> 32; +} + +void kvm_exit_unexpected_vector(uint32_t value) +{ + outl(UNEXPECTED_VECTOR_PORT, value); +} + +void route_exception(struct ex_regs *regs) +{ + typedef void(*handler)(struct ex_regs *); + handler *handlers = (handler *)exception_handlers; + + if (handlers && handlers[regs->vector]) { + handlers[regs->vector](regs); + return; + } + + kvm_exit_unexpected_vector(regs->vector); +} + +void vm_init_descriptor_tables(struct kvm_vm *vm) +{ + extern void *idt_handlers; + int i; + + vm->idt = vm_vaddr_alloc(vm, getpagesize(), 0x2000, 0, 0); + vm->handlers = vm_vaddr_alloc(vm, 256 * sizeof(void *), 0x2000, 0, 0); + /* Handlers have the same address in both address spaces.*/ + for (i = 0; i < NUM_INTERRUPTS; i++) + set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, + DEFAULT_CODE_SELECTOR); +} + +void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid) +{ + struct kvm_sregs sregs; + + vcpu_sregs_get(vm, vcpuid, &sregs); + sregs.idt.base = vm->idt; + sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1; + sregs.gdt.base = vm->gdt; + sregs.gdt.limit = getpagesize() - 1; + kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs); + vcpu_sregs_set(vm, vcpuid, &sregs); + *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers; +} + +void vm_handle_exception(struct kvm_vm *vm, int vector, + void (*handler)(struct ex_regs *)) +{ + vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers); + + handlers[vector] = (vm_vaddr_t)handler; +} + +void assert_on_unhandled_exception(struct kvm_vm *vm, uint32_t vcpuid) +{ + if (vcpu_state(vm, vcpuid)->exit_reason == KVM_EXIT_IO + && vcpu_state(vm, vcpuid)->io.port == UNEXPECTED_VECTOR_PORT + && vcpu_state(vm, vcpuid)->io.size == 4) { + /* Grab pointer to io data */ + uint32_t *data = (void *)vcpu_state(vm, vcpuid) + + vcpu_state(vm, vcpuid)->io.data_offset; + + TEST_ASSERT(false, + "Unexpected vectored event in guest (vector:0x%x)", + *data); + } +} + +bool set_cpuid(struct kvm_cpuid2 *cpuid, + struct kvm_cpuid_entry2 *ent) +{ + int i; + + for (i = 0; i < cpuid->nent; i++) { + struct kvm_cpuid_entry2 *cur = &cpuid->entries[i]; + + if (cur->function != ent->function || cur->index != ent->index) + continue; + + memcpy(cur, ent, sizeof(struct kvm_cpuid_entry2)); + return true; + } + + return false; +} + +uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2, + uint64_t a3) +{ + uint64_t r; + + asm volatile("vmcall" + : "=a"(r) + : "b"(a0), "c"(a1), "d"(a2), "S"(a3)); + return r; +} diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c index da4d89ad5419..a3489973e290 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c +++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c @@ -40,6 +40,9 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) struct kvm_run *run = vcpu_state(vm, vcpu_id); struct ucall ucall = {}; + if (uc) + memset(uc, 0, sizeof(*uc)); + if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) { struct kvm_regs regs; diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index f1e00d43eea2..2448b30e8efa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -542,3 +542,12 @@ void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm, vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp); vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp); } + +void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm, + uint32_t eptp_memslot) +{ + vmx->apic_access = (void *)vm_vaddr_alloc(vm, getpagesize(), + 0x10000, 0, 0); + vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access); + vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access); +} diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c new file mode 100644 index 000000000000..b10a27485bad --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020, Google LLC. + * + * Tests for KVM paravirtual feature disablement + */ +#include <asm/kvm_para.h> +#include <linux/kvm_para.h> +#include <stdint.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +extern unsigned char rdmsr_start; +extern unsigned char rdmsr_end; + +static u64 do_rdmsr(u32 idx) +{ + u32 lo, hi; + + asm volatile("rdmsr_start: rdmsr;" + "rdmsr_end:" + : "=a"(lo), "=c"(hi) + : "c"(idx)); + + return (((u64) hi) << 32) | lo; +} + +extern unsigned char wrmsr_start; +extern unsigned char wrmsr_end; + +static void do_wrmsr(u32 idx, u64 val) +{ + u32 lo, hi; + + lo = val; + hi = val >> 32; + + asm volatile("wrmsr_start: wrmsr;" + "wrmsr_end:" + : : "a"(lo), "c"(idx), "d"(hi)); +} + +static int nr_gp; + +static void guest_gp_handler(struct ex_regs *regs) +{ + unsigned char *rip = (unsigned char *)regs->rip; + bool r, w; + + r = rip == &rdmsr_start; + w = rip == &wrmsr_start; + GUEST_ASSERT(r || w); + + nr_gp++; + + if (r) + regs->rip = (uint64_t)&rdmsr_end; + else + regs->rip = (uint64_t)&wrmsr_end; +} + +struct msr_data { + uint32_t idx; + const char *name; +}; + +#define TEST_MSR(msr) { .idx = msr, .name = #msr } +#define UCALL_PR_MSR 0xdeadbeef +#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr) + +/* + * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or + * written, as the KVM_CPUID_FEATURES leaf is cleared. + */ +static struct msr_data msrs_to_test[] = { + TEST_MSR(MSR_KVM_SYSTEM_TIME), + TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW), + TEST_MSR(MSR_KVM_WALL_CLOCK), + TEST_MSR(MSR_KVM_WALL_CLOCK_NEW), + TEST_MSR(MSR_KVM_ASYNC_PF_EN), + TEST_MSR(MSR_KVM_STEAL_TIME), + TEST_MSR(MSR_KVM_PV_EOI_EN), + TEST_MSR(MSR_KVM_POLL_CONTROL), + TEST_MSR(MSR_KVM_ASYNC_PF_INT), + TEST_MSR(MSR_KVM_ASYNC_PF_ACK), +}; + +static void test_msr(struct msr_data *msr) +{ + PR_MSR(msr); + do_rdmsr(msr->idx); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + + nr_gp = 0; + do_wrmsr(msr->idx, 0); + GUEST_ASSERT(READ_ONCE(nr_gp) == 1); + nr_gp = 0; +} + +struct hcall_data { + uint64_t nr; + const char *name; +}; + +#define TEST_HCALL(hc) { .nr = hc, .name = #hc } +#define UCALL_PR_HCALL 0xdeadc0de +#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc) + +/* + * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding + * features have been cleared in KVM_CPUID_FEATURES. + */ +static struct hcall_data hcalls_to_test[] = { + TEST_HCALL(KVM_HC_KICK_CPU), + TEST_HCALL(KVM_HC_SEND_IPI), + TEST_HCALL(KVM_HC_SCHED_YIELD), +}; + +static void test_hcall(struct hcall_data *hc) +{ + uint64_t r; + + PR_HCALL(hc); + r = kvm_hypercall(hc->nr, 0, 0, 0, 0); + GUEST_ASSERT(r == -KVM_ENOSYS); +} + +static void guest_main(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) { + test_msr(&msrs_to_test[i]); + } + + for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) { + test_hcall(&hcalls_to_test[i]); + } + + GUEST_DONE(); +} + +static void clear_kvm_cpuid_features(struct kvm_cpuid2 *cpuid) +{ + struct kvm_cpuid_entry2 ent = {0}; + + ent.function = KVM_CPUID_FEATURES; + TEST_ASSERT(set_cpuid(cpuid, &ent), + "failed to clear KVM_CPUID_FEATURES leaf"); +} + +static void pr_msr(struct ucall *uc) +{ + struct msr_data *msr = (struct msr_data *)uc->args[0]; + + pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx); +} + +static void pr_hcall(struct ucall *uc) +{ + struct hcall_data *hc = (struct hcall_data *)uc->args[0]; + + pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr); +} + +static void handle_abort(struct ucall *uc) +{ + TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0], + __FILE__, uc->args[1]); +} + +#define VCPU_ID 0 + +static void enter_guest(struct kvm_vm *vm) +{ + struct kvm_run *run; + struct ucall uc; + int r; + + run = vcpu_state(vm, VCPU_ID); + + while (true) { + r = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(!r, "vcpu_run failed: %d\n", r); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "unexpected exit reason: %u (%s)", + run->exit_reason, exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_PR_MSR: + pr_msr(&uc); + break; + case UCALL_PR_HCALL: + pr_hcall(&uc); + break; + case UCALL_ABORT: + handle_abort(&uc); + return; + case UCALL_DONE: + return; + } + } +} + +int main(void) +{ + struct kvm_enable_cap cap = {0}; + struct kvm_cpuid2 *best; + struct kvm_vm *vm; + + if (!kvm_check_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID)) { + pr_info("will skip kvm paravirt restriction tests.\n"); + return 0; + } + + vm = vm_create_default(VCPU_ID, 0, guest_main); + + cap.cap = KVM_CAP_ENFORCE_PV_FEATURE_CPUID; + cap.args[0] = 1; + vcpu_enable_cap(vm, VCPU_ID, &cap); + + best = kvm_get_supported_cpuid(); + clear_kvm_cpuid_features(best); + vcpu_set_cpuid(vm, VCPU_ID, best); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + vm_handle_exception(vm, GP_VECTOR, guest_gp_handler); + + enter_guest(vm); + kvm_vm_free(vm); +} diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c new file mode 100644 index 000000000000..1f65342d6cb7 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * vmx_apic_access_test + * + * Copyright (C) 2020, Google LLC. + * + * This work is licensed under the terms of the GNU GPL, version 2. + * + * The first subtest simply checks to see that an L2 guest can be + * launched with a valid APIC-access address that is backed by a + * page of L1 physical memory. + * + * The second subtest sets the APIC-access address to a (valid) L1 + * physical address that is not backed by memory. KVM can't handle + * this situation, so resuming L2 should result in a KVM exit for + * internal error (emulation). This is not an architectural + * requirement. It is just a shortcoming of KVM. The internal error + * is unfortunate, but it's better than what used to happen! + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "vmx.h" + +#include <string.h> +#include <sys/ioctl.h> + +#include "kselftest.h" + +#define VCPU_ID 0 + +/* The virtual machine object. */ +static struct kvm_vm *vm; + +static void l2_guest_code(void) +{ + /* Exit to L1 */ + __asm__ __volatile__("vmcall"); +} + +static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa) +{ +#define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + uint32_t control; + + GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); + GUEST_ASSERT(load_vmcs(vmx_pages)); + + /* Prepare the VMCS for L2 execution. */ + prepare_vmcs(vmx_pages, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + control = vmreadz(CPU_BASED_VM_EXEC_CONTROL); + control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + vmwrite(CPU_BASED_VM_EXEC_CONTROL, control); + control = vmreadz(SECONDARY_VM_EXEC_CONTROL); + control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmwrite(SECONDARY_VM_EXEC_CONTROL, control); + vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa); + + /* Try to launch L2 with the memory-backed APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmlaunch()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + vmwrite(APIC_ACCESS_ADDR, high_gpa); + + /* Try to resume L2 with the unbacked APIC-access address. */ + GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR)); + GUEST_ASSERT(!vmresume()); + GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + unsigned long apic_access_addr = ~0ul; + unsigned int paddr_width; + unsigned int vaddr_width; + vm_vaddr_t vmx_pages_gva; + unsigned long high_gpa; + struct vmx_pages *vmx; + bool done = false; + + nested_vmx_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid()); + + kvm_get_cpu_address_width(&paddr_width, &vaddr_width); + high_gpa = (1ul << paddr_width) - getpagesize(); + if ((unsigned long)DEFAULT_GUEST_PHY_PAGES * getpagesize() > high_gpa) { + print_skip("No unbacked physical page available"); + exit(KSFT_SKIP); + } + + vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva); + prepare_virtualize_apic_accesses(vmx, vm, 0); + vcpu_args_set(vm, VCPU_ID, 2, vmx_pages_gva, high_gpa); + + while (!done) { + volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + if (apic_access_addr == high_gpa) { + TEST_ASSERT(run->exit_reason == + KVM_EXIT_INTERNAL_ERROR, + "Got exit reason other than KVM_EXIT_INTERNAL_ERROR: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + TEST_ASSERT(run->internal.suberror == + KVM_INTERNAL_ERROR_EMULATION, + "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u\n", + run->internal.suberror); + break; + } + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0], + __FILE__, uc.args[1]); + /* NOT REACHED */ + case UCALL_SYNC: + apic_access_addr = uc.args[1]; + break; + case UCALL_DONE: + done = true; + break; + default: + TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd); + } + } + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 30848ca36555..a5ce26d548e4 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -136,7 +136,7 @@ endif ifeq ($(OVERRIDE_TARGETS),) LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h $(OUTPUT)/%:%.c $(LOCAL_HDRS) - $(LINK.c) $^ $(LDLIBS) -o $@ + $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@ $(OUTPUT)/%.o:%.S $(COMPILE.S) $^ -o $@ diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore index 0bc64a6d4c18..17f2d8415162 100644 --- a/tools/testing/selftests/mount/.gitignore +++ b/tools/testing/selftests/mount/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only unprivileged-remount-test +nosymfollow-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 026890744215..2d9454841644 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -3,7 +3,7 @@ CFLAGS = -Wall \ -O2 -TEST_PROGS := run_tests.sh -TEST_GEN_FILES := unprivileged-remount-test +TEST_PROGS := run_unprivileged_remount.sh run_nosymfollow.sh +TEST_GEN_FILES := unprivileged-remount-test nosymfollow-test include ../lib.mk diff --git a/tools/testing/selftests/mount/nosymfollow-test.c b/tools/testing/selftests/mount/nosymfollow-test.c new file mode 100644 index 000000000000..650d6d80a1d2 --- /dev/null +++ b/tools/testing/selftests/mount/nosymfollow-test.c @@ -0,0 +1,218 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <sched.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/vfs.h> +#include <unistd.h> + +#ifndef MS_NOSYMFOLLOW +# define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#endif + +#ifndef ST_NOSYMFOLLOW +# define ST_NOSYMFOLLOW 0x2000 /* Do not follow symlinks */ +#endif + +#define DATA "/tmp/data" +#define LINK "/tmp/symlink" +#define TMP "/tmp" + +static void die(char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(EXIT_FAILURE); +} + +static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt, + va_list ap) +{ + ssize_t written; + char buf[4096]; + int buf_len; + int fd; + + buf_len = vsnprintf(buf, sizeof(buf), fmt, ap); + if (buf_len < 0) + die("vsnprintf failed: %s\n", strerror(errno)); + + if (buf_len >= sizeof(buf)) + die("vsnprintf output truncated\n"); + + fd = open(filename, O_WRONLY); + if (fd < 0) { + if ((errno == ENOENT) && enoent_ok) + return; + die("open of %s failed: %s\n", filename, strerror(errno)); + } + + written = write(fd, buf, buf_len); + if (written != buf_len) { + if (written >= 0) { + die("short write to %s\n", filename); + } else { + die("write to %s failed: %s\n", + filename, strerror(errno)); + } + } + + if (close(fd) != 0) + die("close of %s failed: %s\n", filename, strerror(errno)); +} + +static void maybe_write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(true, filename, fmt, ap); + va_end(ap); +} + +static void write_file(char *filename, char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vmaybe_write_file(false, filename, fmt, ap); + va_end(ap); +} + +static void create_and_enter_ns(void) +{ + uid_t uid = getuid(); + gid_t gid = getgid(); + + if (unshare(CLONE_NEWUSER) != 0) + die("unshare(CLONE_NEWUSER) failed: %s\n", strerror(errno)); + + maybe_write_file("/proc/self/setgroups", "deny"); + write_file("/proc/self/uid_map", "0 %d 1", uid); + write_file("/proc/self/gid_map", "0 %d 1", gid); + + if (setgid(0) != 0) + die("setgid(0) failed %s\n", strerror(errno)); + if (setuid(0) != 0) + die("setuid(0) failed %s\n", strerror(errno)); + + if (unshare(CLONE_NEWNS) != 0) + die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno)); +} + +static void setup_symlink(void) +{ + int data, err; + + data = creat(DATA, O_RDWR); + if (data < 0) + die("creat failed: %s\n", strerror(errno)); + + err = symlink(DATA, LINK); + if (err < 0) + die("symlink failed: %s\n", strerror(errno)); + + if (close(data) != 0) + die("close of %s failed: %s\n", DATA, strerror(errno)); +} + +static void test_link_traversal(bool nosymfollow) +{ + int link; + + link = open(LINK, 0, O_RDWR); + if (nosymfollow) { + if ((link != -1 || errno != ELOOP)) { + die("link traversal unexpected result: %d, %s\n", + link, strerror(errno)); + } + } else { + if (link < 0) + die("link traversal failed: %s\n", strerror(errno)); + + if (close(link) != 0) + die("close of link failed: %s\n", strerror(errno)); + } +} + +static void test_readlink(void) +{ + char buf[4096]; + ssize_t ret; + + bzero(buf, sizeof(buf)); + + ret = readlink(LINK, buf, sizeof(buf)); + if (ret < 0) + die("readlink failed: %s\n", strerror(errno)); + if (strcmp(buf, DATA) != 0) + die("readlink strcmp failed: '%s' '%s'\n", buf, DATA); +} + +static void test_realpath(void) +{ + char *path = realpath(LINK, NULL); + + if (!path) + die("realpath failed: %s\n", strerror(errno)); + if (strcmp(path, DATA) != 0) + die("realpath strcmp failed\n"); + + free(path); +} + +static void test_statfs(bool nosymfollow) +{ + struct statfs buf; + int ret; + + ret = statfs(TMP, &buf); + if (ret) + die("statfs failed: %s\n", strerror(errno)); + + if (nosymfollow) { + if ((buf.f_flags & ST_NOSYMFOLLOW) == 0) + die("ST_NOSYMFOLLOW not set on %s\n", TMP); + } else { + if ((buf.f_flags & ST_NOSYMFOLLOW) != 0) + die("ST_NOSYMFOLLOW set on %s\n", TMP); + } +} + +static void run_tests(bool nosymfollow) +{ + test_link_traversal(nosymfollow); + test_readlink(); + test_realpath(); + test_statfs(nosymfollow); +} + +int main(int argc, char **argv) +{ + create_and_enter_ns(); + + if (mount("testing", TMP, "ramfs", 0, NULL) != 0) + die("mount failed: %s\n", strerror(errno)); + + setup_symlink(); + run_tests(false); + + if (mount("testing", TMP, "ramfs", MS_REMOUNT|MS_NOSYMFOLLOW, NULL) != 0) + die("remount failed: %s\n", strerror(errno)); + + run_tests(true); + + return EXIT_SUCCESS; +} diff --git a/tools/testing/selftests/mount/run_nosymfollow.sh b/tools/testing/selftests/mount/run_nosymfollow.sh new file mode 100755 index 000000000000..5fbbf03043a2 --- /dev/null +++ b/tools/testing/selftests/mount/run_nosymfollow.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./nosymfollow-test diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_unprivileged_remount.sh index 4ab8f507dcba..4ab8f507dcba 100755 --- a/tools/testing/selftests/mount/run_tests.sh +++ b/tools/testing/selftests/mount/run_unprivileged_remount.sh diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ef352477cac6..fa5fa425d148 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -21,6 +21,7 @@ TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh +TEST_PROGS += bareudp.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh new file mode 100755 index 000000000000..f366cadbc5e8 --- /dev/null +++ b/tools/testing/selftests/net/bareudp.sh @@ -0,0 +1,546 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Test various bareudp tunnel configurations. +# +# The bareudp module allows to tunnel network protocols like IP or MPLS over +# UDP, without adding any intermediate header. This scripts tests several +# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting +# IPv4, IPv6 or MPLS packets on the overlay). +# +# Network topology: +# +# * A chain of 4 network namespaces, connected with veth pairs. Each veth +# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to +# join its peer. +# +# * NS0 and NS3 are at the extremities of the chain. They have additional +# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0 +# and NS3, so that they can communicate using these overlay IP addresses. +# For IPv4 and IPv6 reachability tests, the route simply sets the peer's +# veth address as gateway. For MPLS reachability tests, an MPLS header is +# also pushed before the IP header. +# +# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to +# encapsulate the traffic into UDP. +# +# +-----------------------------------------------------------------------+ +# | NS0 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.100/32 | +# | * IPv6 address: 2001:db8::100/128 | +# | * IPv6 address: 2001:db8::200/128 | +# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 | +# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 | +# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 | +# | (encapsulated with MPLS label 203) | +# | | +# | veth01: | +# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 | +# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS1 | +# | | | +# | v | +# | veth10: | +# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 | +# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 | +# | | +# | bareudp_ns1: | +# | * Encapsulate IP or MPLS packets received on veth10 into UDP | +# | and send the resulting packets through veth12. | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth12 and send the inner packets through veth10. | +# | | +# | veth12: | +# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 | +# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test), over UDP +# | +# +---+-------------------------------------------------------------------+ +# | | NS2 | +# | | | +# | v | +# | veth21: | +# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 | +# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 | +# | | +# | bareudp_ns2: | +# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) | +# | received on veth21 and send the inner packets through veth23. | +# | * Encapsulate IP or MPLS packets received on veth23 into UDP | +# | and send the resulting packets through veth21. | +# | | +# | veth23: | +# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 | +# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 | +# | | | +# +---+-------------------------------------------------------------------+ +# | +# | Traffic type: IP or MPLS (depending on test) +# | +# +---+-------------------------------------------------------------------+ +# | | NS3 | +# | v | +# | veth32: | +# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 | +# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 | +# | | +# | lo: | +# | * IPv4 address: 192.0.2.103/32 | +# | * IPv6 address: 2001:db8::103/128 | +# | * IPv6 address: 2001:db8::203/128 | +# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 | +# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 | +# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 | +# | (encapsulated with MPLS label 200) | +# | | +# +-----------------------------------------------------------------------+ + +ERR=4 # Return 4 by default, which is the SKIP code for kselftest +PING6="ping" +PAUSE_ON_FAIL="no" + +readonly NS0=$(mktemp -u ns0-XXXXXXXX) +readonly NS1=$(mktemp -u ns1-XXXXXXXX) +readonly NS2=$(mktemp -u ns2-XXXXXXXX) +readonly NS3=$(mktemp -u ns3-XXXXXXXX) + +# Exit the script after having removed the network namespaces it created +# +# Parameters: +# +# * The list of network namespaces to delete before exiting. +# +exit_cleanup() +{ + for ns in "$@"; do + ip netns delete "${ns}" 2>/dev/null || true + done + + if [ "${ERR}" -eq 4 ]; then + echo "Error: Setting up the testing environment failed." >&2 + fi + + exit "${ERR}" +} + +# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3) +# +# New namespaces are cleaned up manually in case of error, to ensure that only +# namespaces created by this script are deleted. +create_namespaces() +{ + ip netns add "${NS0}" || exit_cleanup + ip netns add "${NS1}" || exit_cleanup "${NS0}" + ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" + ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" +} + +# The trap function handler +# +exit_cleanup_all() +{ + exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" +} + +# Configure a network interface using a host route +# +# Parameters +# +# * $1: the netns the network interface resides in, +# * $2: the network interface name, +# * $3: the local IPv4 address to assign to this interface, +# * $4: the IPv4 address of the remote network interface, +# * $5: the local IPv6 address to assign to this interface, +# * $6: the IPv6 address of the remote network interface. +# +iface_config() +{ + local NS="${1}"; readonly NS + local DEV="${2}"; readonly DEV + local LOCAL_IP4="${3}"; readonly LOCAL_IP4 + local PEER_IP4="${4}"; readonly PEER_IP4 + local LOCAL_IP6="${5}"; readonly LOCAL_IP6 + local PEER_IP6="${6}"; readonly PEER_IP6 + + ip -netns "${NS}" link set dev "${DEV}" up + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}" + ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad +} + +# Create base networking topology: +# +# * set up the loopback device in all network namespaces (NS0..NS3), +# * set up a veth pair to connect each netns in sequence (NS0 with NS1, +# NS1 with NS2, etc.), +# * add and IPv4 and an IPv6 address on each veth interface, +# * prepare the ingress qdiscs in the intermediate namespaces. +# +setup_underlay() +{ + for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do + ip -netns "${ns}" link set dev lo up + done; + + ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" + ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" + ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" + iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128 + iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128 + iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128 + iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128 + iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128 + iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128 + + tc -netns "${NS1}" qdisc add dev veth10 ingress + tc -netns "${NS2}" qdisc add dev veth23 ingress +} + +# Set up the IPv4, IPv6 and MPLS overlays. +# +# Configuration is similar for all protocols: +# +# * add an overlay IP address on the loopback interface of each edge +# namespace, +# * route these IP addresses via the intermediate namespaces (for the MPLS +# tests, this is also where MPLS encapsulation is done), +# * add routes for these IP addresses (or MPLS labels) in the intermediate +# namespaces. +# +# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be +# done just before running the reachability tests. + +setup_overlay_ipv4() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 192.0.2.100/32 dev lo + ip -netns "${NS3}" address add 192.0.2.103/32 dev lo + ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11 + ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 + ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 + ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 + + # The intermediate namespaces don't have routes for the reverse path, + # as it will be handled by tc. So we need to ensure that rp_filter is + # not going to block the traffic. + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 + ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 +} + +setup_overlay_ipv6() +{ + # Add the overlay IP addresses and route them through the veth devices + ip -netns "${NS0}" address add 2001:db8::100/128 dev lo + ip -netns "${NS3}" address add 2001:db8::103/128 dev lo + ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32 + + # Route the overlay addresses in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1 + ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10 + ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33 +} + +setup_overlay_mpls() +{ + # Add specific overlay IP addresses, routed over MPLS + ip -netns "${NS0}" address add 2001:db8::200/128 dev lo + ip -netns "${NS3}" address add 2001:db8::203/128 dev lo + ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11 + ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32 + + # Route the MPLS packets in the intermediate namespaces + # (used after bareudp decapsulation) + ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256 + ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256 + ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10 + ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33 +} + +# Run "ping" from NS0 and print the result +# +# Parameters: +# +# * $1: the variant of ping to use (normally either "ping" or "ping6"), +# * $2: the IP address to ping, +# * $3: a human readable description of the purpose of the test. +# +# If the test fails and PAUSE_ON_FAIL is active, the user is given the +# possibility to continue with the next test or to quit immediately. +# +ping_test_one() +{ + local PING="$1"; readonly PING + local IP="$2"; readonly IP + local MSG="$3"; readonly MSG + local RET + + printf "TEST: %-60s " "${MSG}" + + set +e + ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1 + RET=$? + set -e + + if [ "${RET}" -eq 0 ]; then + printf "[ OK ]\n" + else + ERR=1 + printf "[FAIL]\n" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + printf "\nHit enter to continue, 'q' to quit\n" + read a + if [ "$a" = "q" ]; then + exit 1 + fi + fi + fi +} + +# Run reachability tests +# +# Parameters: +# +# * $1: human readable string describing the underlay protocol. +# +# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling +# function. +# +ping_test() +{ + local UNDERLAY="$1"; readonly UNDERLAY + local MODE + local MSG + + if [ "${MULTIPROTO}" = "multiproto" ]; then + MODE=" (multiproto mode)" + else + MODE="" + fi + + if [ $IPV4 ]; then + ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}" + fi + if [ $IPV6 ]; then + ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}" + fi + if [ $MPLS_UC ]; then + ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}" + fi +} + +# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6 +# +# Parameters: +# +# * $1: the packet type (protocol) to be handled by bareudp, +# * $2: a flag to activate or deactivate bareudp's "multiproto" mode. +# +test_overlay() +{ + local ETHERTYPE="$1"; readonly ETHERTYPE + local MULTIPROTO="$2"; readonly MULTIPROTO + local IPV4 + local IPV6 + local MPLS_UC + + case "${ETHERTYPE}" in + "ipv4") + IPV4="ipv4" + if [ "${MULTIPROTO}" = "multiproto" ]; then + IPV6="ipv6" + else + IPV6="" + fi + MPLS_UC="" + ;; + "ipv6") + IPV6="ipv6" + IPV4="" + MPLS_UC="" + ;; + "mpls_uc") + MPLS_UC="mpls_uc" + IPV4="" + IPV6="" + ;; + *) + exit 1 + ;; + esac + readonly IPV4 + readonly IPV6 + readonly MPLS_UC + + # Create the bareudp devices in the intermediate namespaces + ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}" + + # IPv4 over UDPv4 + if [ $IPV4 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv4 + if [ $IPV6 ]; then + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv4 + if [ $MPLS_UC ]; then + ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1 + ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1 + + # Encapsulation instructions for bareudp over IPv4 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv4 underlay + ping_test "UDPv4" + + # Cleanup bareudp encapsulation instructions, as they were specific to + # the IPv4 underlay, before setting up and testing the IPv6 underlay + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + + # IPv4 over UDPv6 + if [ $IPV4 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \ + flower dst_ip 192.0.2.103/32 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \ + flower dst_ip 192.0.2.100/32 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # IPv6 over UDPv6 + if [ $IPV6 ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \ + flower dst_ip 2001:db8::103/128 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \ + flower dst_ip 2001:db8::100/128 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # MPLS (unicast) over UDPv6 + if [ $MPLS_UC ]; then + # New encapsulation instructions for bareudp over IPv6 + tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \ + flower mpls_label 203 \ + action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \ + action mirred egress redirect dev bareudp_ns1 + tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \ + flower mpls_label 200 \ + action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \ + action mirred egress redirect dev bareudp_ns2 + fi + + # Test IPv6 underlay + ping_test "UDPv6" + + tc -netns "${NS1}" filter delete dev veth10 ingress + tc -netns "${NS2}" filter delete dev veth23 ingress + ip -netns "${NS1}" link delete bareudp_ns1 + ip -netns "${NS2}" link delete bareudp_ns2 +} + +check_features() +{ + ip link help 2>&1 | grep -q bareudp + if [ $? -ne 0 ]; then + echo "Missing bareudp support in iproute2" >&2 + exit_cleanup + fi + + # Use ping6 on systems where ping doesn't handle IPv6 + ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6" +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +check_features + +# Create namespaces before setting up the exit trap. +# Otherwise, exit_cleanup_all() could delete namespaces that were not created +# by this script. +create_namespaces + +set -e +trap exit_cleanup_all EXIT + +setup_underlay +setup_overlay_ipv4 +setup_overlay_ipv6 +setup_overlay_mpls + +test_overlay ipv4 nomultiproto +test_overlay ipv6 nomultiproto +test_overlay ipv4 multiproto +test_overlay mpls_uc nomultiproto + +if [ "${ERR}" -eq 1 ]; then + echo "Some tests failed." >&2 +else + ERR=0 +fi diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 4d5df8e1eee7..614d5477365a 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -34,3 +34,10 @@ CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m CONFIG_NET_FOU=m +CONFIG_MPLS_ROUTING=m +CONFIG_MPLS_IPTUNNEL=m +CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_CLS_FLOWER=m +CONFIG_NET_ACT_TUNNEL_KEY=m +CONFIG_NET_ACT_MIRRED=m +CONFIG_BAREUDP=m diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index 88d2472ba151..675eff45b037 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,11 +1,37 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="reportleave_test" +ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ + v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ + v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ + v3exc_timeout_test v3star_ex_auto_add_test" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" TEST_GROUP_MAC="01:00:5e:0a:0a:0a" + +ALL_GROUP="224.0.0.1" +ALL_MAC="01:00:5e:00:00:01" + +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3 +MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30 +MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12 +MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c" +# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30 +MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21 +MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15" +# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30 +MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e" +# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" +# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30 +MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e" + source lib.sh h1_create() @@ -79,38 +105,7 @@ cleanup() vrf_cleanup } -# return 0 if the packet wasn't seen on host2_if or 1 if it was -mcast_packet_test() -{ - local mac=$1 - local ip=$2 - local host1_if=$3 - local host2_if=$4 - local seen=0 - - # Add an ACL on `host2_if` which will tell us whether the packet - # was received by it or not. - tc qdisc add dev $host2_if ingress - tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \ - flower dst_mac $mac action drop - - $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q - sleep 1 - - tc -j -s filter show dev $host2_if ingress \ - | jq -e ".[] | select(.options.handle == 101) \ - | select(.options.actions[0].stats.packets == 1)" &> /dev/null - if [[ $? -eq 0 ]]; then - seen=1 - fi - - tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower - tc qdisc del dev $host2_if ingress - - return $seen -} - -reportleave_test() +v2reportleave_test() { RET=0 ip address add dev $h2 $TEST_GROUP/32 autojoin @@ -118,12 +113,12 @@ reportleave_test() sleep 5 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null - check_err $? "Report didn't create mdb entry for $TEST_GROUP" + check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_fail $? "Traffic to $TEST_GROUP wasn't forwarded" - log_test "IGMP report $TEST_GROUP" + log_test "IGMPv2 report $TEST_GROUP" RET=0 bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null @@ -136,10 +131,424 @@ reportleave_test() bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP" - mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2 + mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2 check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry" - log_test "IGMP leave $TEST_GROUP" + log_test "IGMPv2 leave $TEST_GROUP" +} + +v3include_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + ip link set dev br0 type bridge mcast_igmp_version 3 + check_err $? "Could not change bridge IGMP version to 3" + + $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +v3exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("192.0.2.1" "192.0.2.2") + local Y=("192.0.2.20" "192.0.2.21") + + v3include_prepare $host1_if $mac $group + + $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists" +} + +v3cleanup() +{ + local port=$1 + local group=$2 + + bridge mdb del dev br0 port $port grp $group + ip link set dev br0 type bridge mcast_igmp_version 2 +} + +v3include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_allow_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_include_test() +{ + RET=0 + local X=("192.0.2.10" "192.0.2.11" "192.0.2.12") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_is_exclude_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1") + local Y=("192.0.2.20" "192.0.2.30") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_allow_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> allow" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_include_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30") + local Y=("192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_include" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_is_exclude_test() +{ + RET=0 + local X=("192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_to_exclude_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.30") + local Y=("192.0.2.20") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3inc_block_test() +{ + RET=0 + local X=("192.0.2.2" "192.0.2.3") + + v3include_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "192.0.2.100" + + log_test "IGMPv3 report $TEST_GROUP include -> block" + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_block_test() +{ + RET=0 + local X=("192.0.2.1" "192.0.2.2" "192.0.2.30") + local Y=("192.0.2.20" "192.0.2.21") + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "IGMPv3 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + v3cleanup $swp1 $TEST_GROUP +} + +v3exc_timeout_test() +{ + RET=0 + local X=("192.0.2.20" "192.0.2.30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"192.0.2.2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 192.0.2.100 + + log_test "IGMPv3 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + v3cleanup $swp1 $TEST_GROUP +} + +v3star_ex_auto_add_test() +{ + RET=0 + + v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP + + $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 192.0.2.3 + + log_test "IGMPv3 S,G port entry automatic add to a *,G port" + + v3cleanup $swp1 $TEST_GROUP + v3cleanup $swp2 $TEST_GROUP } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh new file mode 100755 index 000000000000..ffdcfa87ca2b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -0,0 +1,558 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ + mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ + mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ + mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +NUM_NETIFS=4 +CHECK_TC="yes" +TEST_GROUP="ff02::cc" +TEST_GROUP_MAC="33:33:00:00:00:cc" + +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3 +MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\ +00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\ +00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\ +00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\ +00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\ +05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\ +00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\ +00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12 +MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\ +00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\ +02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\ +00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\ +00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12" +# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30 +MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21 +MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21" +# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30 +MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\ +00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\ +00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\ +01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" +# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30 +MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\ +00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\ +00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\ +0d:b8:00:01:00:00:00:00:00:00:00:00:00:30" + +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 2001:db8:1::2/64 +} + +h2_destroy() +{ + simple_if_fini $h2 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \ + mcast_mld_version 2 mcast_startup_query_interval 300 \ + mcast_querier 1 + + ip link set dev $swp1 master br0 + ip link set dev $swp2 master br0 + + ip link set dev br0 up + ip link set dev $swp1 up + ip link set dev $swp2 up + + # make sure a query has been generated + sleep 5 +} + +switch_destroy() +{ + ip link set dev $swp2 down + ip link set dev $swp1 down + + ip link del dev br0 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + + h1_create + h2_create + + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + + h2_destroy + h1_destroy + + vrf_cleanup +} + +mldv2include_prepare() +{ + local host1_if=$1 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + ip link set dev br0 type bridge mcast_mld_version 2 + check_err $? "Could not change bridge MLD version to 2" + + $MZ $host1_if $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null + check_err $? "Missing *,G entry with source list" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + brmcast_check_sg_entries "is_include" "${X[@]}" +} + +mldv2exclude_prepare() +{ + local host1_if=$1 + local mac=$2 + local group=$3 + local pkt=$4 + local X=("2001:db8:1::1" "2001:db8:1::2") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2include_prepare $h1 + + $MZ $host1_if -c 1 $MZPKT_IS_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::3\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists" +} + +mldv2cleanup() +{ + local port=$1 + + bridge mdb del dev br0 port $port grp $TEST_GROUP + ip link set dev br0 type bridge mcast_mld_version 1 +} + +mldv2include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_allow_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> allow" + + mldv2cleanup $swp1 +} + +mldv2inc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC2 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 "2001:db8:1::100" + + log_test "MLDv2 report $TEST_GROUP include -> is_include" + + mldv2cleanup $swp1 +} + +mldv2inc_is_exclude_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2inc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1") + local Y=("2001:db8:1::20" "2001:db8:1::30") + + mldv2include_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"exclude\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::21\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP include -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_allow_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 1 + brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> allow" + + mldv2cleanup $swp1 +} + +mldv2exc_is_include_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30") + local Y=("2001:db8:1::21") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_INC3 -q + sleep 1 + brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_include" + + mldv2cleanup $swp1 +} + +mldv2exc_is_exclude_test() +{ + RET=0 + local X=("2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q + sleep 1 + brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude" + + mldv2cleanup $swp1 +} + +mldv2exc_to_exclude_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::30") + local Y=("2001:db8:1::20") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_TO_EXC -q + sleep 1 + brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2inc_block_test() +{ + RET=0 + local X=("2001:db8:1::2" "2001:db8:1::3") + + mldv2include_prepare $h1 + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + # make sure the lowered timers have expired (by default 2 seconds) + sleep 3 + brmcast_check_sg_entries "block" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 report $TEST_GROUP include -> block" + + mldv2cleanup $swp1 +} + +mldv2exc_block_test() +{ + RET=0 + local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30") + local Y=("2001:db8:1::20" "2001:db8:1::21") + + mldv2exclude_prepare $h1 + + ip link set dev br0 type bridge mcast_last_member_interval 500 + check_err $? "Could not change mcast_last_member_interval to 5s" + + $MZ $h1 -c 1 $MZPKT_BLOCK -q + sleep 1 + brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + brmcast_check_sg_state 1 "${Y[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100 + brmcast_check_sg_fwding 0 "${Y[@]}" + + log_test "MLDv2 report $TEST_GROUP exclude -> block" + + ip link set dev br0 type bridge mcast_last_member_interval 100 + + mldv2cleanup $swp1 +} + +mldv2exc_timeout_test() +{ + RET=0 + local X=("2001:db8:1::20" "2001:db8:1::30") + + # GMI should be 3 seconds + ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100 + + mldv2exclude_prepare $h1 + ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500 + $MZ $h1 -c 1 $MZPKT_ALLOW2 -q + sleep 3 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and .filter_mode == \"include\")" &>/dev/null + check_err $? "Wrong *,G entry filter mode" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::1\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists" + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and \ + .source_list != null and + .source_list[].address == \"2001:db8:1::2\")" &>/dev/null + check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists" + + brmcast_check_sg_entries "allow" "${X[@]}" + + brmcast_check_sg_state 0 "${X[@]}" + + brmcast_check_sg_fwding 1 "${X[@]}" + brmcast_check_sg_fwding 0 2001:db8:1::100 + + log_test "MLDv2 group $TEST_GROUP exclude timeout" + + ip link set dev br0 type bridge mcast_query_interval 12500 \ + mcast_query_response_interval 1000 + + mldv2cleanup $swp1 +} + +mldv2star_ex_auto_add_test() +{ + RET=0 + + mldv2exclude_prepare $h1 + + $MZ $h2 -c 1 $MZPKT_IS_INC -q + sleep 1 + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\")" &>/dev/null + check_err $? "S,G entry for *,G port doesn't exist" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \ + .port == \"$swp1\" and \ + .flags[] == \"added_by_star_ex\")" &>/dev/null + check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag" + + brmcast_check_sg_fwding 1 2001:db8:1::3 + + log_test "MLDv2 S,G port entry automatic add to a *,G port" + + mldv2cleanup $swp1 + mldv2cleanup $swp2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 927f9ba49e08..98ea37d26c44 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1270,3 +1270,110 @@ tcpdump_show() { tcpdump -e -n -r $capfile 2>&1 } + +# return 0 if the packet wasn't seen on host2_if or 1 if it was +mcast_packet_test() +{ + local mac=$1 + local src_ip=$2 + local ip=$3 + local host1_if=$4 + local host2_if=$5 + local seen=0 + local tc_proto="ip" + local mz_v6arg="" + + # basic check to see if we were passed an IPv4 address, if not assume IPv6 + if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then + tc_proto="ipv6" + mz_v6arg="-6" + fi + + # Add an ACL on `host2_if` which will tell us whether the packet + # was received by it or not. + tc qdisc add dev $host2_if ingress + tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \ + flower ip_proto udp dst_mac $mac action drop + + $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q + sleep 1 + + tc -j -s filter show dev $host2_if ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + if [[ $? -eq 0 ]]; then + seen=1 + fi + + tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower + tc qdisc del dev $host2_if ingress + + return $seen +} + +brmcast_check_sg_entries() +{ + local report=$1; shift + local slist=("$@") + local sarg="" + + for src in "${slist[@]}"; do + sarg="${sarg} and .source_list[].address == \"$src\"" + done + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null + check_err $? "Wrong *,G entry source list after $report report" + + for sgent in "${slist[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null + check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)" + done +} + +brmcast_check_sg_fwding() +{ + local should_fwd=$1; shift + local sources=("$@") + + for src in "${sources[@]}"; do + local retval=0 + + mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1 + retval=$? + if [ $should_fwd -eq 1 ]; then + check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)" + else + check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)" + fi + done +} + +brmcast_check_sg_state() +{ + local is_blocked=$1; shift + local sources=("$@") + local should_fail=1 + + if [ $is_blocked -eq 1 ]; then + should_fail=0 + fi + + for src in "${sources[@]}"; do + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .source_list != null) | + .source_list[] | + select(.address == \"$src\") | + select(.timer == \"0.00\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has zero timer" + + bridge -j -d -s mdb show dev br0 \ + | jq -e ".[].mdb[] | \ + select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \ + .flags[] == \"blocked\")" &>/dev/null + check_err_fail $should_fail $? "Entry $src has blocked flag" + done +} diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 741a1c4f4ae8..0faaccd21447 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -5,3 +5,13 @@ CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m CONFIG_VETH=y CONFIG_NET_SCH_NETEM=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NF_TABLES=m +CONFIG_NFT_COUNTER=m +CONFIG_NFT_COMPAT=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 08f53d86dedc..0d93b243695f 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -13,6 +13,24 @@ capture=0 TEST_COUNT=0 +# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || +# (ip6 && (ip6[74] & 0xf0) == 0x30)'" +CBPF_MPTCP_SUBOPTION_ADD_ADDR="14, + 48 0 0 0, + 84 0 0 240, + 21 0 3 64, + 48 0 0 54, + 84 0 0 240, + 21 6 7 48, + 48 0 0 0, + 84 0 0 240, + 21 0 4 96, + 48 0 0 74, + 84 0 0 240, + 21 0 1 48, + 6 0 0 65535, + 6 0 0 0" + init() { capout=$(mktemp) @@ -82,6 +100,26 @@ reset_with_cookies() done } +reset_with_add_addr_timeout() +{ + local ip="${1:-4}" + local tables + + tables="iptables" + if [ $ip -eq 6 ]; then + tables="ip6tables" + fi + + reset + + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + ip netns exec $ns2 $tables -A OUTPUT -p tcp \ + -m tcp --tcp-option 30 \ + -m bpf --bytecode \ + "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \ + -j DROP +} + for arg in "$@"; do if [ "$arg" = "-c" ]; then capture=1 @@ -94,6 +132,17 @@ if [ $? -ne 0 ];then exit $ksft_skip fi +iptables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without iptables tool" + exit $ksft_skip +fi + +ip6tables -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run all tests without ip6tables tool" + exit $ksft_skip +fi check_transfer() { @@ -135,6 +184,7 @@ do_transfer() connect_addr="$5" rm_nr_ns1="$6" rm_nr_ns2="$7" + speed="$8" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -159,7 +209,7 @@ do_transfer() sleep 1 fi - if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + if [ $speed = "fast" ]; then mptcp_connect="./mptcp_connect -j" else mptcp_connect="./mptcp_connect -r" @@ -250,26 +300,13 @@ run_tests() listener_ns="$1" connector_ns="$2" connect_addr="$3" + rm_nr_ns1="${4:-0}" + rm_nr_ns2="${5:-0}" + speed="${6:-fast}" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 - lret=$? - if [ $lret -ne 0 ]; then - ret=$lret - return - fi -} - -run_remove_tests() -{ - listener_ns="$1" - connector_ns="$2" - connect_addr="$3" - rm_nr_ns1="$4" - rm_nr_ns2="$5" - lret=0 - - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} \ + ${rm_nr_ns1} ${rm_nr_ns2} ${speed} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -491,12 +528,21 @@ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 chk_add_nr 1 1 +# add_addr timeout +reset_with_add_addr_timeout +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 0 0 slow +chk_join_nr "signal address, ADD_ADDR timeout" 1 1 1 +chk_add_nr 4 0 + # single subflow, remove reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl limits 0 1 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +run_tests $ns1 $ns2 10.0.1.1 0 1 slow chk_join_nr "remove single subflow" 1 1 1 chk_rm_nr 1 1 @@ -506,7 +552,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl limits 0 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +run_tests $ns1 $ns2 10.0.1.1 0 2 slow chk_join_nr "remove multiple subflows" 2 2 2 chk_rm_nr 2 2 @@ -515,7 +561,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl limits 0 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 1 -run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +run_tests $ns1 $ns2 10.0.1.1 1 0 slow chk_join_nr "remove single address" 1 1 1 chk_add_nr 1 1 chk_rm_nr 0 0 @@ -526,7 +572,7 @@ ip netns exec $ns1 ./pm_nl_ctl limits 0 2 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +run_tests $ns1 $ns2 10.0.1.1 1 1 slow chk_join_nr "remove subflow and signal" 2 2 2 chk_add_nr 1 1 chk_rm_nr 1 1 @@ -538,7 +584,7 @@ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal ip netns exec $ns2 ./pm_nl_ctl limits 1 3 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow -run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +run_tests $ns1 $ns2 10.0.1.1 1 2 slow chk_join_nr "remove subflows and signal" 3 3 3 chk_add_nr 1 1 chk_rm_nr 2 2 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 6bbf69a28e12..464e31eabc73 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -355,7 +355,7 @@ setup_fou_or_gue() { encap="${3}" if [ "${outer}" = "4" ]; then - modprobe fou || return 2 + modprobe fou || return $ksft_skip a_addr="${prefix4}.${a_r1}.1" b_addr="${prefix4}.${b_r1}.1" if [ "${inner}" = "4" ]; then @@ -366,7 +366,7 @@ setup_fou_or_gue() { ipproto="41" fi else - modprobe fou6 || return 2 + modprobe fou6 || return $ksft_skip a_addr="${prefix6}:${a_r1}::1" b_addr="${prefix6}:${b_r1}::1" if [ "${inner}" = "4" ]; then @@ -380,8 +380,8 @@ setup_fou_or_gue() { fi fi - run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2 - run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2 + run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip + run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto} run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555 @@ -455,7 +455,7 @@ setup_ipvX_over_ipvY() { fi fi - run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2 + run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode} run_cmd ${ns_a} ip link set ip_a up @@ -713,7 +713,7 @@ setup_routing() { } setup_bridge() { - run_cmd ${ns_a} ip link add br0 type bridge || return 2 + run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -765,7 +765,7 @@ setup_ovs_vxlan6() { } setup_ovs_bridge() { - run_cmd ovs-vsctl add-br ovs_br0 || return 2 + run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip run_cmd ip link set ovs_br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C @@ -887,7 +887,7 @@ check_pmtu_value() { test_pmtu_ipvX() { family=${1} - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -985,11 +985,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ${type}4 || return 2 + setup namespaces routing ${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ${type}6 || return 2 + setup namespaces routing ${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1060,11 +1060,11 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing bridge bridged_${type}4 || return 2 + setup namespaces routing bridge bridged_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing bridge bridged_${type}6 || return 2 + setup namespaces routing bridge bridged_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1144,11 +1144,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() { ll_mtu=4000 if [ ${outer_family} -eq 4 ]; then - setup namespaces routing ovs_bridge ovs_${type}4 || return 2 + setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip # IPv4 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14)) else - setup namespaces routing ovs_bridge ovs_${type}6 || return 2 + setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip # IPv6 header UDP header VXLAN/GENEVE header Ethernet header exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14)) fi @@ -1230,7 +1230,7 @@ test_pmtu_ipvX_over_fouY_or_gueY() { encap=${3} ll_mtu=4000 - setup namespaces routing ${encap}${outer_family}${inner_family} || return 2 + setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1309,7 +1309,7 @@ test_pmtu_ipvX_over_ipvY_exception() { outer=${2} ll_mtu=4000 - setup namespaces routing ip${inner}ip${outer} || return 2 + setup namespaces routing ip${inner}ip${outer} || return $ksft_skip trace "${ns_a}" ip_a "${ns_b}" ip_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ @@ -1363,7 +1363,7 @@ test_pmtu_ipv6_ipv6_exception() { } test_pmtu_vti4_exception() { - setup namespaces veth vti4 xfrm4 || return 2 + setup namespaces veth vti4 xfrm4 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti4_a "${ns_b}" vti4_b @@ -1393,7 +1393,7 @@ test_pmtu_vti4_exception() { } test_pmtu_vti6_exception() { - setup namespaces veth vti6 xfrm6 || return 2 + setup namespaces veth vti6 xfrm6 || return $ksft_skip trace "${ns_a}" veth_a "${ns_b}" veth_b \ "${ns_a}" vti6_a "${ns_b}" vti6_b fail=0 @@ -1423,7 +1423,7 @@ test_pmtu_vti6_exception() { } test_pmtu_vti4_default_mtu() { - setup namespaces veth vti4 || return 2 + setup namespaces veth vti4 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv4 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1435,7 +1435,7 @@ test_pmtu_vti4_default_mtu() { } test_pmtu_vti6_default_mtu() { - setup namespaces veth vti6 || return 2 + setup namespaces veth vti6 || return $ksft_skip # Check that MTU of vti device is MTU of veth minus IPv6 header length veth_mtu="$(link_get_mtu "${ns_a}" veth_a)" @@ -1447,10 +1447,10 @@ test_pmtu_vti6_default_mtu() { } test_pmtu_vti4_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10 - [ $? -ne 0 ] && err " vti not supported" && return 2 + [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti4_a fail=0 @@ -1485,10 +1485,10 @@ test_pmtu_vti4_link_add_mtu() { } test_pmtu_vti6_link_add_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10 - [ $? -ne 0 ] && err " vti6 not supported" && return 2 + [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip run_cmd ${ns_a} ip link del vti6_a fail=0 @@ -1523,10 +1523,10 @@ test_pmtu_vti6_link_add_mtu() { } test_pmtu_vti6_link_change_mtu() { - setup namespaces || return 2 + setup namespaces || return $ksft_skip run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy - [ $? -ne 0 ] && err " dummy not supported" && return 2 + [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy run_cmd ${ns_a} ip link set dummy0 up run_cmd ${ns_a} ip link set dummy1 up @@ -1579,10 +1579,10 @@ test_cleanup_vxlanX_exception() { encap="vxlan" ll_mtu=4000 - check_command taskset || return 2 + check_command taskset || return $ksft_skip cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) - setup namespaces routing ${encap}${outer} || return 2 + setup namespaces routing ${encap}${outer} || return $ksft_skip trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B @@ -1644,7 +1644,7 @@ run_test() { fi err_flush exit 1 - elif [ $ret -eq 2 ]; then + elif [ $ret -eq $ksft_skip ]; then printf "TEST: %-60s [SKIP]\n" "${tdesc}" err_flush fi @@ -1652,7 +1652,19 @@ run_test() { return $ret ) ret=$? - [ $ret -ne 0 ] && exitcode=1 + case $ret in + 0) + all_skipped=false + [ $exitcode=$ksft_skip ] && exitcode=0 + ;; + $ksft_skip) + [ $all_skipped = true ] && exitcode=$ksft_skip + ;; + *) + all_skipped=false + exitcode=1 + ;; + esac return $ret } @@ -1667,7 +1679,7 @@ run_test_nh() { } test_list_flush_ipv4_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1721,7 +1733,7 @@ test_list_flush_ipv4_exception() { } test_list_flush_ipv6_exception() { - setup namespaces routing || return 2 + setup namespaces routing || return $ksft_skip trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ @@ -1786,6 +1798,7 @@ usage() { # exitcode=0 desc=0 +all_skipped=true while getopts :ptv o do @@ -1840,7 +1853,7 @@ for t in ${tests}; do if [ $run_this -eq 1 ]; then run_test "${name}" "${desc}" # if test was skipped no need to retry with nexthop objects - [ $? -eq 2 ] && rerun_nh=0 + [ $? -eq $ksft_skip ] && rerun_nh=0 if [ "${rerun_nh}" = "1" ]; then run_test_nh "${name}" "${desc}" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 2c522f7a0aec..db4521335722 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -56,12 +56,15 @@ #define RING_NUM_FRAMES 20 +static uint32_t cfg_max_num_members; + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) { struct sockaddr_ll addr = {0}; - int fd, val; + struct fanout_args args; + int fd, val, err; fd = socket(PF_PACKET, SOCK_RAW, 0); if (fd < 0) { @@ -83,8 +86,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) exit(1); } - val = (((int) typeflags) << 16) | group_id; - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) { + if (cfg_max_num_members) { + args.id = group_id; + args.type_flags = typeflags; + args.max_num_members = cfg_max_num_members; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, + sizeof(args)); + } else { + val = (((int) typeflags) << 16) | group_id; + err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, + sizeof(val)); + } + if (err) { if (close(fd)) { perror("close packet"); exit(1); @@ -286,6 +299,56 @@ static void test_control_group(void) } } +/* Test illegal max_num_members values */ +static void test_control_group_max_num_members(void) +{ + int fds[3]; + + fprintf(stderr, "test: control multiple sockets, max_num_members\n"); + + /* expected failure on greater than PACKET_FANOUT_MAX */ + cfg_max_num_members = (1 << 16) + 1; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n"); + exit(1); + } + + cfg_max_num_members = 256; + fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[0] == -1) { + fprintf(stderr, "ERROR: failed open\n"); + exit(1); + } + + /* expected failure on joining group with different max_num_members */ + cfg_max_num_members = 257; + if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) { + fprintf(stderr, "ERROR: set different max_num_members\n"); + exit(1); + } + + /* success on joining group with same max_num_members */ + cfg_max_num_members = 256; + fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[1] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + /* success on joining group with max_num_members unspecified */ + cfg_max_num_members = 0; + fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0); + if (fds[2] == -1) { + fprintf(stderr, "ERROR: failed to join group\n"); + exit(1); + } + + if (close(fds[2]) || close(fds[1]) || close(fds[0])) { + fprintf(stderr, "ERROR: closing sockets\n"); + exit(1); + } +} + /* Test creating a unique fanout group ids */ static void test_unique_fanout_group_ids(void) { @@ -426,8 +489,11 @@ int main(int argc, char **argv) test_control_single(); test_control_group(); + test_control_group_max_num_members(); test_unique_fanout_group_ids(); + /* PACKET_FANOUT_MAX */ + cfg_max_num_members = 1 << 16; /* find a set of ports that do not collide onto the same socket */ ret = test_datapath(PACKET_FANOUT_HASH, port_off, expect_hash[0], expect_hash[1]); diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c index f4bb4fef0f39..21091be70688 100644 --- a/tools/testing/selftests/net/timestamping.c +++ b/tools/testing/selftests/net/timestamping.c @@ -59,7 +59,8 @@ static void usage(const char *error) " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" " SIOCGSTAMP - check last socket time stamp\n" - " SIOCGSTAMPNS - more accurate socket time stamp\n"); + " SIOCGSTAMPNS - more accurate socket time stamp\n" + " PTPV2 - use PTPv2 messages\n"); exit(1); } @@ -115,13 +116,28 @@ static const unsigned char sync[] = { 0x00, 0x00, 0x00, 0x00 }; -static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +static const unsigned char sync_v2[] = { + 0x00, 0x02, 0x00, 0x2C, + 0x00, 0x00, 0x02, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0xFF, + 0xFE, 0x00, 0x00, 0x00, + 0x00, 0x01, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2) { + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct timeval now; int res; - res = sendto(sock, sync, sizeof(sync), 0, - addr, addr_len); + res = sendto(sock, sync_p, sync_len, 0, addr, addr_len); gettimeofday(&now, 0); if (res < 0) printf("%s: %s\n", "send", strerror(errno)); @@ -134,9 +150,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) static void printpacket(struct msghdr *msg, int res, char *data, int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync); + const void *sync_p = ptpv2 ? sync_v2 : sync; struct cmsghdr *cmsg; struct timeval tv; struct timespec ts; @@ -210,10 +228,9 @@ static void printpacket(struct msghdr *msg, int res, "probably SO_EE_ORIGIN_TIMESTAMPING" #endif ); - if (res < sizeof(sync)) + if (res < sync_len) printf(" => truncated data?!"); - else if (!memcmp(sync, data + res - sizeof(sync), - sizeof(sync))) + else if (!memcmp(sync_p, data + res - sync_len, sync_len)) printf(" => GOT OUR DATA BACK (HURRAY!)"); break; } @@ -257,7 +274,7 @@ static void printpacket(struct msghdr *msg, int res, } static void recvpacket(int sock, int recvmsg_flags, - int siocgstamp, int siocgstampns) + int siocgstamp, int siocgstampns, int ptpv2) { char data[256]; struct msghdr msg; @@ -288,7 +305,7 @@ static void recvpacket(int sock, int recvmsg_flags, } else { printpacket(&msg, res, data, sock, recvmsg_flags, - siocgstamp, siocgstampns); + siocgstamp, siocgstampns, ptpv2); } } @@ -300,6 +317,7 @@ int main(int argc, char **argv) int siocgstamp = 0; int siocgstampns = 0; int ip_multicast_loop = 0; + int ptpv2 = 0; char *interface; int i; int enabled = 1; @@ -335,6 +353,8 @@ int main(int argc, char **argv) siocgstampns = 1; else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "PTPV2")) + ptpv2 = 1; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) @@ -369,6 +389,7 @@ int main(int argc, char **argv) HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; hwconfig.rx_filter = (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC : HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; hwconfig_requested = hwconfig; if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { @@ -496,16 +517,16 @@ int main(int argc, char **argv) printf("has error\n"); recvpacket(sock, 0, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); recvpacket(sock, MSG_ERRQUEUE, siocgstamp, - siocgstampns); + siocgstampns, ptpv2); } } else { /* write one packet */ sendpacket(sock, (struct sockaddr *)&addr, - sizeof(addr)); + sizeof(addr), ptpv2); next.tv_sec += 5; continue; } diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config index bb11de90c0c9..f6f2965e17af 100644 --- a/tools/testing/selftests/pidfd/config +++ b/tools/testing/selftests/pidfd/config @@ -4,3 +4,4 @@ CONFIG_USER_NS=y CONFIG_PID_NS=y CONFIG_NET_NS=y CONFIG_CGROUPS=y +CONFIG_CHECKPOINT_RESTORE=y diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c index 7758c98be015..0930e2411dfb 100644 --- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c @@ -204,7 +204,10 @@ TEST_F(child, fetch_fd) fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0); ASSERT_GE(fd, 0); - EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd)); + ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd); + if (ret < 0 && errno == ENOSYS) + SKIP(return, "kcmp() syscall not supported"); + EXPECT_EQ(ret, 0); ret = fcntl(fd, F_GETFD); ASSERT_GE(ret, 0); diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index b9fe75fc3e51..8a59438ccc78 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -6,7 +6,6 @@ #include <inttypes.h> #include <limits.h> #include <linux/types.h> -#include <linux/wait.h> #include <sched.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c index 4b115444dfe9..610811275357 100644 --- a/tools/testing/selftests/pidfd/pidfd_poll_test.c +++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c @@ -3,7 +3,6 @@ #define _GNU_SOURCE #include <errno.h> #include <linux/types.h> -#include <linux/wait.h> #include <poll.h> #include <signal.h> #include <stdbool.h> diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 1f085b922c6e..6e2f2cd400ca 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -16,7 +16,6 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/stat.h> -#include <linux/kcmp.h> #include "pidfd.h" #include "../clone3/clone3_selftests.h" diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c index c585aaa2acd8..529eb700ac26 100644 --- a/tools/testing/selftests/pidfd/pidfd_test.c +++ b/tools/testing/selftests/pidfd/pidfd_test.c @@ -330,7 +330,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void) ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n", test_name, PID_RECYCLE); case PIDFD_SKIP: - ksft_print_msg("%s test: Skipping test\n", test_name); + ksft_test_result_skip("%s test: Skipping test\n", test_name); ret = 0; break; case PIDFD_XFAIL: diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 2a0503bc7e49..cb53a8b777e6 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -266,8 +266,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *)) } rc = 0; - /* offset = 0 no alignment fault, so skip */ - for (offset = 1; offset < 16; offset++) { + /* + * offset = 0 is aligned but tests the workaround for the P9N + * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s: + * Add workaround for P9 vector CI load issue") + */ + for (offset = 0; offset < 16; offset++) { width = 16; /* vsx == 16 bytes */ r = 0; diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c index 471e2aa28077..fb4fe9188806 100644 --- a/tools/testing/selftests/proc/proc-loadavg-001.c +++ b/tools/testing/selftests/proc/proc-loadavg-001.c @@ -14,7 +14,6 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ /* Test that /proc/loadavg correctly reports last pid in pid namespace. */ -#define _GNU_SOURCE #include <errno.h> #include <sched.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c index 9f6d000c0245..8511dcfe67c7 100644 --- a/tools/testing/selftests/proc/proc-self-syscall.c +++ b/tools/testing/selftests/proc/proc-self-syscall.c @@ -13,7 +13,6 @@ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ -#define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/types.h> diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c index 30e2b7849089..e7ceabed7f51 100644 --- a/tools/testing/selftests/proc/proc-uptime-002.c +++ b/tools/testing/selftests/proc/proc-uptime-002.c @@ -15,7 +15,6 @@ */ // Test that values in /proc/uptime increment monotonically // while shifting across CPUs. -#define _GNU_SOURCE #undef NDEBUG #include <assert.h> #include <unistd.h> diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json index bb543bf69d69..361235ad574b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json @@ -100,7 +100,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": "filter protocol ip pref 1 flower.*handle", "matchCount": "1", "teardown": [ @@ -119,7 +119,7 @@ ], "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop", "expExitCode": "0", - "verifyCmd": "$TC filter show terse dev $DEV2 ingress", + "verifyCmd": "$TC -br filter show dev $DEV2 ingress", "matchPattern": " dst_mac e4:11:22:11:4a:51", "matchCount": "0", "teardown": [ diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile index b4fd9a934654..3a5936cc10ab 100644 --- a/tools/testing/selftests/timens/Makefile +++ b/tools/testing/selftests/timens/Makefile @@ -1,4 +1,4 @@ -TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec +TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex TEST_GEN_PROGS_EXTENDED := gettime_perf CFLAGS := -Wall -Werror -pthread diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c new file mode 100644 index 000000000000..6b2b9264e851 --- /dev/null +++ b/tools/testing/selftests/timens/futex.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <sched.h> + +#include <linux/unistd.h> +#include <linux/futex.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "log.h" +#include "timens.h" + +#define NSEC_PER_SEC 1000000000ULL + +static int run_test(int clockid) +{ + int futex_op = FUTEX_WAIT_BITSET; + struct timespec timeout, end; + int val = 0; + + if (clockid == CLOCK_REALTIME) + futex_op |= FUTEX_CLOCK_REALTIME; + + clock_gettime(clockid, &timeout); + timeout.tv_nsec += NSEC_PER_SEC / 10; // 100ms + if (timeout.tv_nsec > NSEC_PER_SEC) { + timeout.tv_sec++; + timeout.tv_nsec -= NSEC_PER_SEC; + } + + if (syscall(__NR_futex, &val, futex_op, 0, + &timeout, 0, FUTEX_BITSET_MATCH_ANY) >= 0) { + ksft_test_result_fail("futex didn't return ETIMEDOUT\n"); + return 1; + } + + if (errno != ETIMEDOUT) { + ksft_test_result_fail("futex didn't return ETIMEDOUT: %s\n", + strerror(errno)); + return 1; + } + + clock_gettime(clockid, &end); + + if (end.tv_sec < timeout.tv_sec || + (end.tv_sec == timeout.tv_sec && end.tv_nsec < timeout.tv_nsec)) { + ksft_test_result_fail("futex slept less than 100ms\n"); + return 1; + } + + + ksft_test_result_pass("futex with the %d clockid\n", clockid); + + return 0; +} + +int main(int argc, char *argv[]) +{ + int status, len, fd; + char buf[4096]; + pid_t pid; + struct timespec mtime_now; + + nscheck(); + + ksft_set_plan(2); + + clock_gettime(CLOCK_MONOTONIC, &mtime_now); + + if (unshare_timens()) + return 1; + + len = snprintf(buf, sizeof(buf), "%d %d 0", + CLOCK_MONOTONIC, 70 * 24 * 3600); + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + return pr_perror("/proc/self/timens_offsets"); + + if (write(fd, buf, len) != len) + return pr_perror("/proc/self/timens_offsets"); + + close(fd); + + pid = fork(); + if (pid < 0) + return pr_perror("Unable to fork"); + if (pid == 0) { + int ret = 0; + + ret |= run_test(CLOCK_REALTIME); + ret |= run_test(CLOCK_MONOTONIC); + if (ret) + ksft_exit_fail(); + ksft_exit_pass(); + return 0; + } + + if (waitpid(pid, &status, 0) != pid) + return pr_perror("Unable to wait the child process"); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index d77f4829f1e0..74c69b75f6f5 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -316,6 +316,14 @@ pp sleep 3 n2 ping -W 1 -c 1 192.168.241.1 n1 wg set wg0 peer "$pub2" persistent-keepalive 0 +# Test that sk_bound_dev_if works +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 +# What about when the mark changes and the packet must be rerouted? +n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1 +n1 ping -c 1 -W 1 192.168.241.2 # First the boring case +n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case +n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1 + # Test that onion routing works, even when it loops n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5 ip1 addr add 192.168.242.1/24 dev wg0 diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index d531de13c95b..4eecb432a66c 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -18,10 +18,12 @@ CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y +CONFIG_NETFILTER_XT_MARK=y CONFIG_NF_CONNTRACK_IPV4=y CONFIG_NF_NAT_IPV4=y CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y +CONFIG_IP_NF_MANGLE=y CONFIG_IP_NF_NAT=y CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y |