diff options
Diffstat (limited to 'tools')
251 files changed, 11145 insertions, 1725 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a2..5fd7caea4419 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a..eaeda001784e 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661..dd4682857c12 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c..82c6a4d350e0 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529..bf57a824f722 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337..1814b413fd57 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 489cbed7e82a..12796808f07a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -82,7 +82,30 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, are required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + endif + endif +endif all: $(FILES) @@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin: DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) -DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif $(OUTPUT)test-dwarf.bin: @@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin: $(BUILD) -lnuma $(OUTPUT)test-libunwind.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-debug-frame.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-x86.bin: - $(BUILD) -lelf -lunwind-x86 + $(BUILD) -lelf -llzma -lunwind-x86 $(OUTPUT)test-libunwind-x86_64.bin: - $(BUILD) -lelf -lunwind-x86_64 + $(BUILD) -lelf -llzma -lunwind-x86_64 $(OUTPUT)test-libunwind-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libunwind-debug-frame-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-debug-frame-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/crypto/ccp/dbc.c b/tools/crypto/ccp/dbc.c index a807df0f0597..80248d3d3a5a 100644 --- a/tools/crypto/ccp/dbc.c +++ b/tools/crypto/ccp/dbc.c @@ -57,7 +57,6 @@ int process_param(int fd, int msg_index, __u8 *signature, int *data) .msg_index = msg_index, .param = *data, }; - int ret; assert(signature); assert(data); diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 2e60e2c212cd..34ffcec264ab 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -52,7 +52,7 @@ $(OUTPUT)hv_fcopy_uio_daemon: $(HV_FCOPY_UIO_DAEMON_IN) clean: rm -f $(ALL_PROGRAMS) - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(sbindir); \ diff --git a/tools/hv/lsvmbus b/tools/hv/lsvmbus index 099f2c44dbed..f83698f14da2 100644..100755 --- a/tools/hv/lsvmbus +++ b/tools/hv/lsvmbus @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 import os diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 000000000000..7147b1b2cb28 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d02723..5bf6148cac2b 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e07..535cb68fdb5c 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 35bcf52dbc65..e05b39e39c3f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2851,7 +2851,7 @@ union bpf_attr { * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**, **TCP_NOTSENT_LOWAT**, * **TCP_NODELAY**, **TCP_MAXSEG**, **TCP_WINDOW_CLAMP**, * **TCP_THIN_LINEAR_TIMEOUTS**, **TCP_BPF_DELACK_MAX**, - * **TCP_BPF_RTO_MIN**. + * **TCP_BPF_RTO_MIN**, **TCP_BPF_SOCK_OPS_CB_FLAGS**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports the following *optname*\ s: * **IPV6_TCLASS**, **IPV6_AUTOFLOWLABEL**. @@ -7080,6 +7080,7 @@ enum { TCP_BPF_SYN = 1005, /* Copy the TCP header */ TCP_BPF_SYN_IP = 1006, /* Copy the IP[46] and TCP header */ TCP_BPF_SYN_MAC = 1007, /* Copy the MAC, IP[46], and TCP header */ + TCP_BPF_SOCK_OPS_CB_FLAGS = 1008, /* Get or Set TCP sock ops flags */ }; enum { diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa..d358add1611c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc0..637efc055145 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from <symbol> to <symbol>__unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 43742ac5b00d..7c308f04e7a0 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -93,6 +93,7 @@ enum { NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, NETDEV_A_PAGE_POOL_DETACH_TIME, + NETDEV_A_PAGE_POOL_DMABUF, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) @@ -131,6 +132,7 @@ enum { NETDEV_A_QUEUE_IFINDEX, NETDEV_A_QUEUE_TYPE, NETDEV_A_QUEUE_NAPI_ID, + NETDEV_A_QUEUE_DMABUF, __NETDEV_A_QUEUE_MAX, NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) @@ -174,6 +176,16 @@ enum { }; enum { + NETDEV_A_DMABUF_IFINDEX = 1, + NETDEV_A_DMABUF_QUEUES, + NETDEV_A_DMABUF_FD, + NETDEV_A_DMABUF_ID, + + __NETDEV_A_DMABUF_MAX, + NETDEV_A_DMABUF_MAX = (__NETDEV_A_DMABUF_MAX - 1) +}; + +enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, NETDEV_CMD_DEV_DEL_NTF, @@ -186,6 +198,7 @@ enum { NETDEV_CMD_QUEUE_GET, NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, + NETDEV_CMD_BIND_RX, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d..4842c36fdf80 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore index c18dd8d83cee..296c4035dbf2 100644 --- a/tools/net/ynl/lib/.gitignore +++ b/tools/net/ynl/lib/.gitignore @@ -1 +1,2 @@ __pycache__/ +*.d diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index fcb18a5a6d70..e16cef160bc2 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -696,14 +696,14 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse) addr.nl_family = AF_NETLINK; if (bind(ys->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0) { __perr(yse, "unable to bind to a socket address"); - goto err_close_sock;; + goto err_close_sock; } memset(&addr, 0, sizeof(addr)); addrlen = sizeof(addr); if (getsockname(ys->socket, (struct sockaddr *)&addr, &addrlen) < 0) { __perr(yse, "unable to read socket address"); - goto err_close_sock;; + goto err_close_sock; } ys->portid = addr.nl_pid; ys->seq = random(); diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index d42c1d605969..c22c22bf2cb7 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -388,6 +388,8 @@ class NetlinkProtocol: def decode(self, ynl, nl_msg, op): msg = self._decode(nl_msg) + if op is None: + op = ynl.rsp_by_value[msg.cmd()] fixed_header_size = ynl._struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg @@ -921,8 +923,7 @@ class YnlFamily(SpecFamily): print("Netlink done while checking for ntf!?") continue - op = self.rsp_by_value[nl_msg.cmd()] - decoded = self.nlproto.decode(self, nl_msg, op) + decoded = self.nlproto.decode(self, nl_msg, None) if decoded.cmd() not in self.async_msg_ids: print("Unexpected msg id done while checking for ntf", decoded) continue @@ -980,7 +981,7 @@ class YnlFamily(SpecFamily): if nl_msg.extack: self._decode_extack(req_msg, op, nl_msg.extack) else: - op = self.rsp_by_value[nl_msg.cmd()] + op = None req_flags = [] if nl_msg.error: diff --git a/tools/net/ynl/samples/netdev.c b/tools/net/ynl/samples/netdev.c index 3e7b29bd55d5..22609d44c89a 100644 --- a/tools/net/ynl/samples/netdev.c +++ b/tools/net/ynl/samples/netdev.c @@ -79,7 +79,10 @@ int main(int argc, char **argv) goto err_close; printf("Select ifc ($ifindex; or 0 = dump; or -2 ntf check): "); - scanf("%d", &ifindex); + if (scanf("%d", &ifindex) != 1) { + fprintf(stderr, "Error: unable to parse input\n"); + goto err_destroy; + } if (ifindex > 0) { struct netdev_dev_get_req *req; @@ -119,6 +122,7 @@ int main(int argc, char **argv) err_close: fprintf(stderr, "YNL: %s\n", ys->err.msg); +err_destroy: ynl_sock_destroy(ys); return 2; } diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 51529fabd517..717530bc9c52 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -2668,13 +2668,15 @@ def main(): cw.p('#define ' + hdr_prot) cw.nl() + hdr_file=os.path.basename(args.out_file[:-2]) + ".h" + if args.mode == 'kernel': cw.p('#include <net/netlink.h>') cw.p('#include <net/genetlink.h>') cw.nl() if not args.header: if args.out_file: - cw.p(f'#include "{os.path.basename(args.out_file[:-2])}.h"') + cw.p(f'#include "{hdr_file}"') cw.nl() headers = ['uapi/' + parsed.uapi_header] headers += parsed.kernel_family.get('headers', []) @@ -2686,7 +2688,7 @@ def main(): if family_contains_bitfield32(parsed): cw.p('#include <linux/netlink.h>') else: - cw.p(f'#include "{parsed.name}-user.h"') + cw.p(f'#include "{hdr_file}"') cw.p('#include "ynl.h"') headers = [parsed.uapi_header] for definition in parsed['definitions']: diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index 3766886c4bca..83dc87c662b6 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime. $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a If UBSan detects any problem at runtime, it outputs a “runtime error:” message. + +4) Cross compilation +==================== +As Multiarch is commonly supported in Linux distributions, we can install +libraries for multiple architectures on the same system and then cross-compile +Linux perf. For example, Aarch64 libraries and toolchains can be installed on +an x86_64 machine, allowing us to compile perf for an Aarch64 target. + +Below is the command for building the perf with dynamic linking. + + $ cd /path/to/Linux + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +For static linking, the option `LDFLAGS="-static"` is required. + + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + LDFLAGS="-static" -C tools/perf + +In the embedded system world, a use case is to explicitly specify the package +configuration paths for cross building: + + $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \ + PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \ + make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the +variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to +the library paths for cross compilation. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c896babf7a74..fa679db61f62 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -152,7 +152,17 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 + DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) @@ -296,6 +306,11 @@ endif ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) + # Update the python flags for cross compilation + ifdef CROSS_COMPILE + PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/') + PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS)) + endif PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) @@ -897,6 +912,9 @@ else PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no") ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes) PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])') + ifdef CROSS_COMPILE + PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX)) + endif LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX) else $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 175e4c7898f0..f8148db5fc38 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -193,7 +193,32 @@ HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -PKG_CONFIG = $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, is required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,) + $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.) + endif + endif +endif RM = rm -f LN = ln -f diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21..ebae8415dfbb 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10..01071182763e 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5b..7093ee21c0d1 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# <number> <abi> <name> <entry point> +# <number> <abi> <name> <entry point> [<compat entry point> [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfb..9a95871afc95 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <internal/lib.h> +#include <inttypes.h> #include <subcmd/parse-options.h> #include <api/fd/array.h> #include <api/fs/fs.h> @@ -688,9 +689,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - daemon->start) / 60); + csv_sep, (uint64_t)((curr - daemon->start) / 60)); fprintf(out, "\n"); } else { @@ -700,8 +701,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", - (curr - daemon->start) / 60); + fprintf(out, " up: %" PRIu64 " minutes\n", + (uint64_t)((curr - daemon->start) / 60)); } } @@ -727,9 +728,9 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ - csv_sep, (curr - session->start) / 60); + csv_sep, (uint64_t)((curr - session->start) / 60)); fprintf(out, "\n"); } else { @@ -745,8 +746,8 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", - (curr - session->start) / 60); + fprintf(out, " up: %" PRIu64 " minutes\n", + (uint64_t)((curr - session->start) / 60)); } } diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json index a9939823b14b..0c9b9a2d2958 100644 --- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -74,7 +74,7 @@ { "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", "ConfigCode": "0x800000000000000c", - "EventName": "FW_SFENCE_VMA_RECEIVED", + "EventName": "FW_SFENCE_VMA_ASID_SENT", "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" }, { diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json index 9b4a032186a7..7149caec4f80 100644 --- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index 40132655ccd1..c76f53a90a7b 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -456,11 +456,13 @@ static int test__name_cmp(struct test_suite *test __maybe_unused, int subtest __ /** * Test perf_pmu__match() that's used to search for a PMU given a name passed * on the command line. The name that's passed may also be a filename type glob - * match. + * match. If the name does not match, perf_pmu__match() attempts to match the + * alias of the PMU, if provided. */ static int test__pmu_match(struct test_suite *test __maybe_unused, int subtest __maybe_unused) { struct perf_pmu test_pmu; + test_pmu.alias_name = NULL; test_pmu.name = "pmuname"; TEST_ASSERT_EQUAL("Exact match", perf_pmu__match(&test_pmu, "pmuname"), true); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370b..df9cdb8bbfb8 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd664..753971770733 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc/<pid>/maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc/<pid>/maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad00..225bc366ffcb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d535316..887a25286441 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92..8bf7e8a0eb6f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index b4cb3fe5cc25..bc4e92c0c08b 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -286,6 +286,9 @@ static void account_end_timestamp(struct lock_contention *con) goto next; for (int i = 0; i < total_cpus; i++) { + if (cpu_data[i].lock == 0) + continue; + update_lock_stat(stat_fd, -1, end_ts, aggr_mode, &cpu_data[i]); } diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947..6d075648d2cc 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 3be882b2e845..31a223eaf8e6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -20,6 +20,7 @@ #include "util/env.h" #include "util/kvm-stat.h" #include "util/kwork.h" +#include "util/sample.h" #include "util/lock-contention.h" #include <internal/lib.h> #include "../builtin.h" diff --git a/tools/power/cpupower/bindings/python/.gitignore b/tools/power/cpupower/bindings/python/.gitignore new file mode 100644 index 000000000000..5c9a1f0212dd --- /dev/null +++ b/tools/power/cpupower/bindings/python/.gitignore @@ -0,0 +1,8 @@ +__pycache__/ +raw_pylibcpupower_wrap.c +*.o +*.so +*.py +!test_raw_pylibcpupower.py +# git keeps ignoring this file, use git add -f raw_libcpupower.i +!raw_pylibcpupower.i diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile new file mode 100644 index 000000000000..dc09c5b66ead --- /dev/null +++ b/tools/power/cpupower/bindings/python/Makefile @@ -0,0 +1,33 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for libcpupower's Python bindings +# +# This Makefile expects you have already run the makefile for cpupower to build +# the .o files in the lib directory for the bindings to be created. + +CC := gcc +HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) +HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) + +LIB_DIR := ../../lib +PY_INCLUDE = $(firstword $(shell python-config --includes)) +OBJECTS_LIB = $(wildcard $(LIB_DIR)/*.o) + +all: _raw_pylibcpupower.so + +_raw_pylibcpupower.so: raw_pylibcpupower_wrap.o + $(CC) -shared $(OBJECTS_LIB) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + +raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c + $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + +raw_pylibcpupower_wrap.c: raw_pylibcpupower.i +ifeq ($(HAVE_SWIG),0) + $(error "swig was not found. Make sure you have it installed and in the PATH to generate the bindings.") +else ifeq ($(HAVE_PYCONFIG),0) + $(error "python-config was not found. Make sure you have it installed and in the PATH to generate the bindings.") +endif + swig -python raw_pylibcpupower.i + +# Will only clean the bindings folder; will not clean the actual cpupower folder +clean: + rm -f raw_pylibcpupower.py raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.o _raw_pylibcpupower.so diff --git a/tools/power/cpupower/bindings/python/README b/tools/power/cpupower/bindings/python/README new file mode 100644 index 000000000000..0a4bb2581e8a --- /dev/null +++ b/tools/power/cpupower/bindings/python/README @@ -0,0 +1,59 @@ +This folder contains the necessary files to build the Python bindings for +libcpupower (aside from the libcpupower object files). + + +requirements +------------ + +* You need the object files in the libcpupower directory compiled by +cpupower's makefile. +* The SWIG program must be installed. +* The Python's development libraries installed. + +Please check that your version of SWIG is compatible with the version of Python +installed on your machine by checking the SWIG changelog on their website. +https://swig.org/ + +Note that while SWIG itself is GPL v3+ licensed; the resulting output, +the bindings code: is permissively licensed + the license of libcpupower's .o +files. For these bindings that means GPL v2. + +Please see https://swig.org/legal.html and the discussion [1] for more details. + +[1] +https://lore.kernel.org/linux-pm/Zqv9BOjxLAgyNP5B@hatbackup/ + + +build +----- + +Install SWIG and the Python development files provided by your distribution. + +Build the object files for libcpupower by running make in the cpupower +directory. + +Return to the directory this README is in to run: + +$ make + + +testing +------- + +Please verify the _raw_pylibcpupower.so and raw_pylibcpupower.py files have +been created. + +To run the test script: + +$ python test_raw_pylibcpupower.py + + +credits +------- + +Original Bindings Author: +John B. Wyatt IV +jwyatt@redhat.com +sageofredondo@gmail.com + +Copyright (C) 2024 Red Hat diff --git a/tools/power/cpupower/bindings/python/raw_pylibcpupower.i b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i new file mode 100644 index 000000000000..96556d87a745 --- /dev/null +++ b/tools/power/cpupower/bindings/python/raw_pylibcpupower.i @@ -0,0 +1,247 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +%module raw_pylibcpupower +%{ +#include "../../lib/cpupower_intern.h" +#include "../../lib/acpi_cppc.h" +#include "../../lib/cpufreq.h" +#include "../../lib/cpuidle.h" +#include "../../lib/cpupower.h" +#include "../../lib/powercap.h" +%} + +/* + * cpupower_intern.h + */ + +#define PATH_TO_CPU "/sys/devices/system/cpu/" +#define MAX_LINE_LEN 4096 +#define SYSFS_PATH_MAX 255 + +int is_valid_path(const char *path); + +unsigned int cpupower_read_sysfs(const char *path, char *buf, size_t buflen); + +unsigned int cpupower_write_sysfs(const char *path, char *buf, size_t buflen); + +/* + * acpi_cppc.h + */ + +enum acpi_cppc_value { + HIGHEST_PERF, + LOWEST_PERF, + NOMINAL_PERF, + LOWEST_NONLINEAR_PERF, + LOWEST_FREQ, + NOMINAL_FREQ, + REFERENCE_PERF, + WRAPAROUND_TIME, + MAX_CPPC_VALUE_FILES +}; + +unsigned long acpi_cppc_get_data(unsigned int cpu, + enum acpi_cppc_value which); + +/* + * cpufreq.h + */ + +struct cpufreq_policy { + unsigned long min; + unsigned long max; + char *governor; +}; + +struct cpufreq_available_governors { + char *governor; + struct cpufreq_available_governors *next; + struct cpufreq_available_governors *first; +}; + +struct cpufreq_available_frequencies { + unsigned long frequency; + struct cpufreq_available_frequencies *next; + struct cpufreq_available_frequencies *first; +}; + + +struct cpufreq_affected_cpus { + unsigned int cpu; + struct cpufreq_affected_cpus *next; + struct cpufreq_affected_cpus *first; +}; + +struct cpufreq_stats { + unsigned long frequency; + unsigned long long time_in_state; + struct cpufreq_stats *next; + struct cpufreq_stats *first; +}; + +unsigned long cpufreq_get_freq_kernel(unsigned int cpu); + +unsigned long cpufreq_get_freq_hardware(unsigned int cpu); + +#define cpufreq_get(cpu) cpufreq_get_freq_kernel(cpu); + +unsigned long cpufreq_get_transition_latency(unsigned int cpu); + +int cpufreq_get_hardware_limits(unsigned int cpu, + unsigned long *min, + unsigned long *max); + +char *cpufreq_get_driver(unsigned int cpu); + +void cpufreq_put_driver(char *ptr); + +struct cpufreq_policy *cpufreq_get_policy(unsigned int cpu); + +void cpufreq_put_policy(struct cpufreq_policy *policy); + +struct cpufreq_available_governors +*cpufreq_get_available_governors(unsigned int cpu); + +void cpufreq_put_available_governors( + struct cpufreq_available_governors *first); + +struct cpufreq_available_frequencies +*cpufreq_get_available_frequencies(unsigned int cpu); + +void cpufreq_put_available_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_available_frequencies +*cpufreq_get_boost_frequencies(unsigned int cpu); + +void cpufreq_put_boost_frequencies( + struct cpufreq_available_frequencies *first); + +struct cpufreq_affected_cpus *cpufreq_get_affected_cpus(unsigned + int cpu); + +void cpufreq_put_affected_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_affected_cpus *cpufreq_get_related_cpus(unsigned + int cpu); + +void cpufreq_put_related_cpus(struct cpufreq_affected_cpus *first); + +struct cpufreq_stats *cpufreq_get_stats(unsigned int cpu, + unsigned long long *total_time); + +void cpufreq_put_stats(struct cpufreq_stats *stats); + +unsigned long cpufreq_get_transitions(unsigned int cpu); + +int cpufreq_set_policy(unsigned int cpu, struct cpufreq_policy *policy); + +int cpufreq_modify_policy_min(unsigned int cpu, unsigned long min_freq); + +int cpufreq_modify_policy_max(unsigned int cpu, unsigned long max_freq); + +int cpufreq_modify_policy_governor(unsigned int cpu, char *governor); + +int cpufreq_set_frequency(unsigned int cpu, + unsigned long target_frequency); + +unsigned long cpufreq_get_sysfs_value_from_table(unsigned int cpu, + const char **table, + unsigned int index, + unsigned int size); + +/* + * cpuidle.h + */ + +int cpuidle_is_state_disabled(unsigned int cpu, + unsigned int idlestate); +int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, + unsigned int disable); +unsigned long cpuidle_state_latency(unsigned int cpu, + unsigned int idlestate); +unsigned long cpuidle_state_usage(unsigned int cpu, + unsigned int idlestate); +unsigned long long cpuidle_state_time(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_name(unsigned int cpu, + unsigned int idlestate); +char *cpuidle_state_desc(unsigned int cpu, + unsigned int idlestate); +unsigned int cpuidle_state_count(unsigned int cpu); + +char *cpuidle_get_governor(void); + +char *cpuidle_get_driver(void); + +/* + * cpupower.h + */ + +struct cpupower_topology { + /* Amount of CPU cores, packages and threads per core in the system */ + unsigned int cores; + unsigned int pkgs; + unsigned int threads; /* per core */ + + /* Array gets mallocated with cores entries, holding per core info */ + struct cpuid_core_info *core_info; +}; + +struct cpuid_core_info { + int pkg; + int core; + int cpu; + + /* flags */ + unsigned int is_online:1; +}; + +int get_cpu_topology(struct cpupower_topology *cpu_top); + +void cpu_topology_release(struct cpupower_topology cpu_top); + +int cpupower_is_cpu_online(unsigned int cpu); + +/* + * powercap.h + */ + +struct powercap_zone { + char name[MAX_LINE_LEN]; + /* + * sys_name relative to PATH_TO_POWERCAP, + * do not forget the / in between + */ + char sys_name[SYSFS_PATH_MAX]; + int tree_depth; + struct powercap_zone *parent; + struct powercap_zone *children[POWERCAP_MAX_CHILD_ZONES]; + /* More possible caps or attributes to be added? */ + uint32_t has_power_uw:1, + has_energy_uj:1; + +}; + +int powercap_walk_zones(struct powercap_zone *zone, + int (*f)(struct powercap_zone *zone)); + +struct powercap_zone *powercap_init_zones(void); + +int powercap_get_enabled(int *mode); + +int powercap_set_enabled(int mode); + +int powercap_get_driver(char *driver, int buflen); + +int powercap_get_max_energy_range_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_energy_uj(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_max_power_range_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_get_power_uw(struct powercap_zone *zone, uint64_t *val); + +int powercap_zone_get_enabled(struct powercap_zone *zone, int *mode); + +int powercap_zone_set_enabled(struct powercap_zone *zone, int mode); diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py new file mode 100755 index 000000000000..3d6f62b9556a --- /dev/null +++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +import raw_pylibcpupower as p + +# Simple function call + +""" +Get cstate count +""" +cpu_cstates_count = p.cpuidle_state_count(0) +if cpu_cstates_count > -1: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +""" +Disable cstate (will fail if the above is 0, ex: a virtual machine) +""" +cstate_disabled = p.cpuidle_state_disable(0, 0, 1) +if cpu_cstates_count == 0: + print(f"CPU 0 has {cpu_cstates_count} c-states") +else: + print(f"cstate count error: return code: {cpu_cstates_count}") + +match cstate_disabled: + case 0: + print(f"CPU state disabled") + case -1: + print(f"Idlestate not available") + case _: + print(f"Not documented") + + +# Pointer example + +topo = p.cpupower_topology() +total_cpus = p.get_cpu_topology(topo) +if total_cpus > 0: + print(f"Number of total cpus: {total_cpus} and number of cores: {topo.cores}") +else: + print(f"Error: could not get cpu topology") diff --git a/tools/power/cpupower/lib/cpuidle.c b/tools/power/cpupower/lib/cpuidle.c index 479c5971aa6d..0ecac009273c 100644 --- a/tools/power/cpupower/lib/cpuidle.c +++ b/tools/power/cpupower/lib/cpuidle.c @@ -116,6 +116,7 @@ enum idlestate_value { IDLESTATE_USAGE, IDLESTATE_POWER, IDLESTATE_LATENCY, + IDLESTATE_RESIDENCY, IDLESTATE_TIME, IDLESTATE_DISABLE, MAX_IDLESTATE_VALUE_FILES @@ -125,6 +126,7 @@ static const char *idlestate_value_files[MAX_IDLESTATE_VALUE_FILES] = { [IDLESTATE_USAGE] = "usage", [IDLESTATE_POWER] = "power", [IDLESTATE_LATENCY] = "latency", + [IDLESTATE_RESIDENCY] = "residency", [IDLESTATE_TIME] = "time", [IDLESTATE_DISABLE] = "disable", }; @@ -254,6 +256,12 @@ unsigned long cpuidle_state_latency(unsigned int cpu, return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_LATENCY); } +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate) +{ + return cpuidle_state_get_one_value(cpu, idlestate, IDLESTATE_RESIDENCY); +} + unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate) { diff --git a/tools/power/cpupower/lib/cpuidle.h b/tools/power/cpupower/lib/cpuidle.h index 2e10fead2e1e..2ab404d40259 100644 --- a/tools/power/cpupower/lib/cpuidle.h +++ b/tools/power/cpupower/lib/cpuidle.h @@ -8,6 +8,8 @@ int cpuidle_state_disable(unsigned int cpu, unsigned int idlestate, unsigned int disable); unsigned long cpuidle_state_latency(unsigned int cpu, unsigned int idlestate); +unsigned long cpuidle_state_residency(unsigned int cpu, + unsigned int idlestate); unsigned long cpuidle_state_usage(unsigned int cpu, unsigned int idlestate); unsigned long long cpuidle_state_time(unsigned int cpu, diff --git a/tools/power/cpupower/lib/powercap.c b/tools/power/cpupower/lib/powercap.c index a7a59c6bacda..94a0c69e55ef 100644 --- a/tools/power/cpupower/lib/powercap.c +++ b/tools/power/cpupower/lib/powercap.c @@ -78,6 +78,14 @@ int powercap_get_enabled(int *mode) } /* + * TODO: implement function. Returns dummy 0 for now. + */ +int powercap_set_enabled(int mode) +{ + return 0; +} + +/* * Hardcoded, because rapl is the only powercap implementation - * this needs to get more generic if more powercap implementations * should show up diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c index 44126a87fa7a..e0d17f0de3fe 100644 --- a/tools/power/cpupower/utils/cpuidle-info.c +++ b/tools/power/cpupower/utils/cpuidle-info.c @@ -64,6 +64,8 @@ static void cpuidle_cpu_output(unsigned int cpu, int verbose) printf(_("Latency: %lu\n"), cpuidle_state_latency(cpu, idlestate)); + printf(_("Residency: %lu\n"), + cpuidle_state_residency(cpu, idlestate)); printf(_("Usage: %lu\n"), cpuidle_state_usage(cpu, idlestate)); printf(_("Duration: %llu\n"), @@ -115,6 +117,8 @@ static void proc_cpuidle_cpu_output(unsigned int cpu) printf(_("promotion[--] demotion[--] ")); printf(_("latency[%03lu] "), cpuidle_state_latency(cpu, cstate)); + printf(_("residency[%05lu] "), + cpuidle_state_residency(cpu, cstate)); printf(_("usage[%08lu] "), cpuidle_state_usage(cpu, cstate)); printf(_("duration[%020Lu] \n"), diff --git a/tools/power/pm-graph/.gitignore b/tools/power/pm-graph/.gitignore new file mode 100644 index 000000000000..37762a8a06d6 --- /dev/null +++ b/tools/power/pm-graph/.gitignore @@ -0,0 +1,3 @@ +# sleepgraph.py artifacts +suspend-[0-9]*-[0-9]* +suspend-[0-9]*-[0-9]*-x[0-9]* diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile index b5310832c19c..aeddbaf2d4c4 100644 --- a/tools/power/pm-graph/Makefile +++ b/tools/power/pm-graph/Makefile @@ -1,51 +1,86 @@ # SPDX-License-Identifier: GPL-2.0 -PREFIX ?= /usr -DESTDIR ?= +# +# Copyright (c) 2013, Intel Corporation. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms and conditions of the GNU General Public License, +# version 2, as published by the Free Software Foundation. +# +# This program is distributed in the hope it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# Authors: +# Todd Brandt <todd.e.brandt@linux.intel.com> + +# Prefix to the directories we're installing to +DESTDIR ?= + +# Directory definitions. These are default and most probably +# do not need to be changed. Please note that DESTDIR is +# added in front of any of them + +BINDIR ?= /usr/bin +MANDIR ?= /usr/share/man +LIBDIR ?= /usr/lib + +# Toolchain: what tools do we use, and what options do they need: +INSTALL = /usr/bin/install +INSTALL_DATA = ${INSTALL} -m 644 all: @echo "Nothing to build" install : uninstall - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph - install sleepgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install bootgraph.py $(DESTDIR)$(PREFIX)/lib/pm-graph - install -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/cgskip.txt $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/freeze-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/standby-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-callgraph.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-dev.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config - - install -d $(DESTDIR)$(PREFIX)/bin - ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph - ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph - - install -d $(DESTDIR)$(PREFIX)/share/man/man8 - install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 - install sleepgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8 + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) sleepgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) bootgraph.py $(DESTDIR)$(LIBDIR)/pm-graph + $(INSTALL) -d $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/cgskip.txt $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/freeze-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/standby-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-callgraph.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-dev.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + $(INSTALL_DATA) config/suspend-x2-proc.cfg $(DESTDIR)$(LIBDIR)/pm-graph/config + + $(INSTALL) -d $(DESTDIR)$(BINDIR) + ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(BINDIR)/bootgraph + ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(BINDIR)/sleepgraph + + $(INSTALL) -d $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) bootgraph.8 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) sleepgraph.8 $(DESTDIR)$(MANDIR)/man8 uninstall : - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/bootgraph.8 - rm -f $(DESTDIR)$(PREFIX)/share/man/man8/sleepgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/bootgraph.8 + rm -f $(DESTDIR)$(MANDIR)/man8/sleepgraph.8 - rm -f $(DESTDIR)$(PREFIX)/bin/bootgraph - rm -f $(DESTDIR)$(PREFIX)/bin/sleepgraph + rm -f $(DESTDIR)$(BINDIR)/bootgraph + rm -f $(DESTDIR)$(BINDIR)/sleepgraph - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/config/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/config/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/config ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/config; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__ ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph/__pycache__; \ fi; - rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/* - if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \ - rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \ + rm -f $(DESTDIR)$(LIBDIR)/pm-graph/* + if [ -d $(DESTDIR)$(LIBDIR)/pm-graph ] ; then \ + rmdir $(DESTDIR)$(LIBDIR)/pm-graph; \ fi; + +help: + @echo 'Building targets:' + @echo ' all - Nothing to build' + @echo ' install - Install the program and create necessary directories' + @echo ' uninstall - Remove installed files and directories' + +.PHONY: all install uninstall help diff --git a/tools/spi/spidev_fdx.c b/tools/spi/spidev_fdx.c index 7d2a867cd4ae..bc9c4f6c3ba8 100644 --- a/tools/spi/spidev_fdx.c +++ b/tools/spi/spidev_fdx.c @@ -99,7 +99,7 @@ static void dumpstat(const char *name, int fd) return; } - printf("%s: spi mode 0x%x, %d bits %sper word, %d Hz max\n", + printf("%s: spi mode 0x%x, %d bits %sper word, %u Hz max\n", name, mode, bits, lsb ? "(lsb first) " : "", speed); } diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f0..3d1ca9e38b1f 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e..d619672faa49 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index bc8fe9e8f7f2..3b7df5477317 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -65,9 +65,11 @@ TARGETS += net/af_unix TARGETS += net/forwarding TARGETS += net/hsr TARGETS += net/mptcp +TARGETS += net/netfilter TARGETS += net/openvswitch +TARGETS += net/packetdrill +TARGETS += net/rds TARGETS += net/tcp_ao -TARGETS += net/netfilter TARGETS += nsfs TARGETS += perf_events TARGETS += pidfd diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index d8909b2b535a..f2d6007a2b98 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -156,6 +156,12 @@ static void pmull_sigill(void) asm volatile(".inst 0x0ee0e000" : : : ); } +static void poe_sigill(void) +{ + /* mrs x0, POR_EL0 */ + asm volatile("mrs x0, S3_3_C10_C2_4" : : : "x0"); +} + static void rng_sigill(void) { asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0"); @@ -602,6 +608,14 @@ static const struct hwcap_data { .sigill_fn = pmull_sigill, }, { + .name = "POE", + .at_hwcap = AT_HWCAP2, + .hwcap_bit = HWCAP2_POE, + .cpuinfo = "poe", + .sigill_fn = poe_sigill, + .sigill_reliable = true, + }, + { .name = "RNG", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_RNG, diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c index e4fa507cbdd0..b51d21f78cf9 100644 --- a/tools/testing/selftests/arm64/abi/ptrace.c +++ b/tools/testing/selftests/arm64/abi/ptrace.c @@ -163,10 +163,10 @@ static void test_hw_debug(pid_t child, int type, const char *type_name) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_perror("PTRACE_TRACEME"); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_perror("raise(SIGSTOP)"); return EXIT_SUCCESS; } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index 1ce5b5eac386..b2f2bfd5c6aa 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -2,6 +2,7 @@ mangle_* fake_sigreturn_* fpmr_* +poe_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index 8f5febaf1a9a..edb3613513b8 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -23,7 +23,7 @@ $(TEST_GEN_PROGS): $(PROGS) # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \ - signals.S + signals.S sve_helpers.c COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h .SECONDEXPANSION: diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.c b/tools/testing/selftests/arm64/signal/sve_helpers.c new file mode 100644 index 000000000000..0acc121af306 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#include <stdbool.h> +#include <kselftest.h> +#include <asm/sigcontext.h> +#include <sys/prctl.h> + +unsigned int vls[SVE_VQ_MAX]; +unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls) +{ + int vq, vl; + int pr_set_vl = use_sme ? PR_SME_SET_VL : PR_SVE_SET_VL; + int len_mask = use_sme ? PR_SME_VL_LEN_MASK : PR_SVE_VL_LEN_MASK; + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(pr_set_vl, vq * 16); + if (vl == -1) + return KSFT_FAIL; + + vl &= len_mask; + + /* + * Unlike SVE, SME does not require the minimum vector length + * to be implemented, or the VLs to be consecutive, so any call + * to the prctl might return the single implemented VL, which + * might be larger than 16. So to avoid this loop never + * terminating, bail out here when we find a higher VL than + * we asked for. + * See the ARM ARM, DDI 0487K.a, B1.4.2: I_QQRNR and I_NWYBP. + */ + if (vq < sve_vq_from_vl(vl)) + break; + + /* Skip missing VLs */ + vq = sve_vq_from_vl(vl); + + vls[nvls++] = vl; + } + + if (nvls < min_vls) { + fprintf(stderr, "Only %d VL supported\n", nvls); + return KSFT_SKIP; + } + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h new file mode 100644 index 000000000000..50948ce471cc --- /dev/null +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 ARM Limited + * + * Common helper functions for SVE and SME functionality. + */ + +#ifndef __SVE_HELPERS_H__ +#define __SVE_HELPERS_H__ + +#include <stdbool.h> + +#define VLS_USE_SVE false +#define VLS_USE_SME true + +extern unsigned int vls[]; +extern unsigned int nvls; + +int sve_fill_vls(bool use_sme, int min_vls); + +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c index ebd5815b54bb..dfd6a2badf9f 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c @@ -6,44 +6,28 @@ * handler, this is not supported and is expected to segfault. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SME_VL_LEN_MASK; + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } - - return true; + return false; } static int fake_sigreturn_ssve_change_vl(struct tdescr *td, @@ -51,30 +35,30 @@ static int fake_sigreturn_ssve_change_vl(struct tdescr *td, { size_t resv_sz, offset; struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); - struct sve_context *sve; + struct za_context *za; /* Get a signal context with a SME ZA frame in it */ if (!get_current_context(td, &sf.uc, sizeof(sf.uc))) return 1; resv_sz = GET_SF_RESV_SIZE(sf); - head = get_header(head, SVE_MAGIC, resv_sz, &offset); + head = get_header(head, ZA_MAGIC, resv_sz, &offset); if (!head) { - fprintf(stderr, "No SVE context\n"); + fprintf(stderr, "No ZA context\n"); return 1; } - if (head->size != sizeof(struct sve_context)) { + if (head->size != sizeof(struct za_context)) { fprintf(stderr, "Register data present, aborting\n"); return 1; } - sve = (struct sve_context *)head; + za = (struct za_context *)head; /* No changes are supported; init left us at minimum VL so go to max */ fprintf(stderr, "Attempting to change VL from %d to %d\n", - sve->vl, vls[0]); - sve->vl = vls[0]; + za->vl, vls[0]); + za->vl = vls[0]; fake_sigreturn(&sf, sizeof(sf), 0); diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c index e2a452190511..e1ccf8f85a70 100644 --- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c @@ -12,40 +12,22 @@ #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" struct fake_sigframe sf; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 2); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; + if (!res) + return true; - vl &= PR_SVE_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least two VLs */ - if (nvls < 2) { - fprintf(stderr, "Only %d VL supported\n", nvls); + if (res == KSFT_SKIP) td->result = KSFT_SKIP; - return false; - } - return true; + return false; } static int fake_sigreturn_sve_change_vl(struct tdescr *td, diff --git a/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c new file mode 100644 index 000000000000..36bd9940ee05 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/poe_siginfo.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 Arm Limited + * + * Verify that the POR_EL0 register context in signal frames is set up as + * expected. + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <unistd.h> +#include <asm/sigcontext.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 128]; +} context; + +#define SYS_POR_EL0 "S3_3_C10_C2_4" + +static uint64_t get_por_el0(void) +{ + uint64_t val; + + asm volatile( + "mrs %0, " SYS_POR_EL0 "\n" + : "=r"(val) + : + : ); + + return val; +} + +int poe_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct poe_context *poe_ctx; + size_t offset; + bool in_sigframe; + bool have_poe; + __u64 orig_poe; + + have_poe = getauxval(AT_HWCAP2) & HWCAP2_POE; + if (have_poe) + orig_poe = get_por_el0(); + + if (!get_current_context(td, &context.uc, sizeof(context))) + return 1; + + poe_ctx = (struct poe_context *) + get_header(head, POE_MAGIC, td->live_sz, &offset); + + in_sigframe = poe_ctx != NULL; + + fprintf(stderr, "POR_EL0 sigframe %s on system %s POE\n", + in_sigframe ? "present" : "absent", + have_poe ? "with" : "without"); + + td->pass = (in_sigframe == have_poe); + + /* + * Check that the value we read back was the one present at + * the time that the signal was triggered. + */ + if (have_poe && poe_ctx) { + if (poe_ctx->por_el0 != orig_poe) { + fprintf(stderr, "POR_EL0 in frame is %llx, was %llx\n", + poe_ctx->por_el0, orig_poe); + td->pass = false; + } + } + + return 0; +} + +struct tdescr tde = { + .name = "POR_EL0", + .descr = "Validate that POR_EL0 is present as expected", + .timeout = 3, + .run = poe_present, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 3d37daafcff5..6dbe48cf8b09 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -6,51 +6,31 @@ * set up as expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_ssve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c index 9dc5f128bbc0..5557e116e973 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c @@ -6,51 +6,31 @@ * signal frames is set up as expected when enabled simultaneously. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c index 8b16eabbb769..8143eb1c58c1 100644 --- a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 64]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sve_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SVE, 1); - /* - * Enumerate up to SVE_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SVE_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SVE_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); + if (!res) + return true; - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_sve_regs(void) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 674b88cc8c39..e6daa94fcd2e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -6,29 +6,6 @@ #include "testcases.h" -struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, - size_t resv_sz, size_t *offset) -{ - size_t offs = 0; - struct _aarch64_ctx *found = NULL; - - if (!head || resv_sz < HDR_SZ) - return found; - - while (offs <= resv_sz - HDR_SZ && - head->magic != magic && head->magic) { - offs += head->size; - head = GET_RESV_NEXT_HEAD(head); - } - if (head->magic == magic) { - found = head; - if (offset) - *offset = offs; - } - - return found; -} - bool validate_extra_context(struct extra_context *extra, char **err, void **extra_data, size_t *extra_size) { @@ -184,6 +161,10 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) if (head->size != sizeof(struct esr_context)) *err = "Bad size for esr_context"; break; + case POE_MAGIC: + if (head->size != sizeof(struct poe_context)) + *err = "Bad size for poe_context"; + break; case TPIDR2_MAGIC: if (head->size != sizeof(struct tpidr2_context)) *err = "Bad size for tpidr2_context"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 7727126347e0..9872b8912714 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -26,6 +26,9 @@ #define HDR_SZ \ sizeof(struct _aarch64_ctx) +#define GET_UC_RESV_HEAD(uc) \ + (struct _aarch64_ctx *)(&(uc->uc_mcontext.__reserved)) + #define GET_SF_RESV_HEAD(sf) \ (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) @@ -88,8 +91,29 @@ struct fake_sigframe { bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); -struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, - size_t resv_sz, size_t *offset); +static inline struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *found = NULL; + + if (!head || resv_sz < HDR_SZ) + return found; + + while (offs <= resv_sz - HDR_SZ && + head->magic != magic && head->magic) { + offs += head->size; + head = GET_RESV_NEXT_HEAD(head); + } + if (head->magic == magic) { + found = head; + if (offset) + *offset = offs; + } + + return found; +} + static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, size_t resv_sz, diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c index 4d6f94b6178f..ce26e9c2fa5e 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c @@ -6,47 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); + if (!res) + return true; - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index 174ad6656696..b9e13f27f1f9 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -6,51 +6,31 @@ * expected. */ +#include <kselftest.h> #include <signal.h> #include <ucontext.h> #include <sys/prctl.h> #include "test_signals_utils.h" +#include "sve_helpers.h" #include "testcases.h" static union { ucontext_t uc; char buf[1024 * 128]; } context; -static unsigned int vls[SVE_VQ_MAX]; -unsigned int nvls = 0; static bool sme_get_vls(struct tdescr *td) { - int vq, vl; + int res = sve_fill_vls(VLS_USE_SME, 1); - /* - * Enumerate up to SME_VQ_MAX vector lengths - */ - for (vq = SVE_VQ_MAX; vq > 0; --vq) { - vl = prctl(PR_SME_SET_VL, vq * 16); - if (vl == -1) - return false; - - vl &= PR_SME_VL_LEN_MASK; - - /* Did we find the lowest supported VL? */ - if (vq < sve_vq_from_vl(vl)) - break; + if (!res) + return true; - /* Skip missing VLs */ - vq = sve_vq_from_vl(vl); - - vls[nvls++] = vl; - } - - /* We need at least one VL */ - if (nvls < 1) { - fprintf(stderr, "Only %d VL supported\n", nvls); - return false; - } + if (res == KSFT_SKIP) + td->result = KSFT_SKIP; - return true; + return false; } static void setup_za_regs(void) diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dd49c1d23a60..81d4757ecd4c 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -713,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 11ee801e75e7..6c3b4d4f173a 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -34,6 +34,12 @@ DECLARE_TRACE(bpf_testmod_test_write_bare, TP_ARGS(task, ctx) ); +/* Used in bpf_testmod_test_read() to test __nullable suffix */ +DECLARE_TRACE(bpf_testmod_test_nullable_bare, + TP_PROTO(struct bpf_testmod_test_read_ctx *ctx__nullable), + TP_ARGS(ctx__nullable) +); + #undef BPF_TESTMOD_DECLARE_TRACE #ifdef DECLARE_TRACE_WRITABLE #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index fd28c1157bd3..a32771da4293 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -356,6 +356,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, if (bpf_testmod_loop_test(101) > 100) trace_bpf_testmod_test_read(current, &ctx); + trace_bpf_testmod_test_nullable_bare(NULL); + /* Magic number to enable writable tp */ if (len == 64) { struct bpf_testmod_test_writable_ctx writable = { diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 00965a6e83bb..61de88cf4ad0 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -3551,6 +3551,40 @@ static struct btf_raw_test raw_tests[] = { BTF_STR_SEC("\0x\0?.foo bar:buz"), }, { + .descr = "datasec: name with non-printable first char not is ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC ?.data */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0\7foo"), + .err_str = "Invalid name", + .btf_load_err = true, +}, +{ + .descr = "datasec: name '\\0' is not ok", + .raw_types = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC \0 */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + BTF_END_RAW, + }, + BTF_STR_SEC("\0x\0"), + .err_str = "Invalid name", + .btf_load_err = true, +}, +{ .descr = "type name '?foo' is not ok", .raw_types = { /* union ?foo; */ diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 7cfac53c0d58..b614a5272dfd 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -9,6 +9,7 @@ enum test_setup_type { SETUP_SYSCALL_SLEEP, SETUP_SKB_PROG, + SETUP_SKB_PROG_TP, }; static struct { @@ -28,6 +29,7 @@ static struct { {"test_dynptr_clone", SETUP_SKB_PROG}, {"test_dynptr_skb_no_buff", SETUP_SKB_PROG}, {"test_dynptr_skb_strcmp", SETUP_SKB_PROG}, + {"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP}, }; static void verify_success(const char *prog_name, enum test_setup_type setup_type) @@ -35,7 +37,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ struct dynptr_success *skel; struct bpf_program *prog; struct bpf_link *link; - int err; + int err; skel = dynptr_success__open(); if (!ASSERT_OK_PTR(skel, "dynptr_success__open")) @@ -47,7 +49,7 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) goto cleanup; - bpf_program__set_autoload(prog, true); + bpf_program__set_autoload(prog, true); err = dynptr_success__load(skel); if (!ASSERT_OK(err, "dynptr_success__load")) @@ -87,6 +89,37 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ break; } + case SETUP_SKB_PROG_TP: + { + struct __sk_buff skb = {}; + struct bpf_object *obj; + int aux_prog_fd; + + /* Just use its test_run to trigger kfree_skb tracepoint */ + err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &aux_prog_fd); + if (!ASSERT_OK(err, "prog_load sched cls")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .ctx_in = &skb, + .ctx_size_in = sizeof(skb), + ); + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "bpf_program__attach")) + goto cleanup; + + err = bpf_prog_test_run_opts(aux_prog_fd, &topts); + bpf_link__destroy(link); + + if (!ASSERT_OK(err, "test_run")) + goto cleanup; + + break; + } } ASSERT_EQ(skel->bss->err, 0, "err"); diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c index 9e5f38739104..6b3078dd5645 100644 --- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c +++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c @@ -378,8 +378,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, @@ -407,8 +407,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 80, .tcp.dest = 8080, @@ -436,8 +436,8 @@ struct test tests[] = { .iph_inner.ihl = 5, .iph_inner.protocol = IPPROTO_TCP, .iph_inner.tot_len = - __bpf_constant_htons(MAGIC_BYTES) - - sizeof(struct iphdr), + __bpf_constant_htons(MAGIC_BYTES - + sizeof(struct iphdr)), .tcp.doff = 5, .tcp.source = 99, .tcp.dest = 9090, diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c index 7d4a9b3d3722..e12255121c15 100644 --- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c +++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c @@ -154,6 +154,51 @@ err_out: close(sfd); } +static void test_nonstandard_opt(int family) +{ + struct setget_sockopt__bss *bss = skel->bss; + struct bpf_link *getsockopt_link = NULL; + int sfd = -1, fd = -1, cfd = -1, flags; + socklen_t flagslen = sizeof(flags); + + memset(bss, 0, sizeof(*bss)); + + sfd = start_server(family, SOCK_STREAM, + family == AF_INET6 ? addr6_str : addr4_str, 0, 0); + if (!ASSERT_GE(sfd, 0, "start_server")) + return; + + fd = connect_to_fd(sfd, 0); + if (!ASSERT_GE(fd, 0, "connect_to_fd_server")) + goto err_out; + + /* cgroup/getsockopt prog will intercept getsockopt() below and + * retrieve the tcp socket bpf_sock_ops_cb_flags value for the + * accept()ed socket; this was set earlier in the passive established + * callback for the accept()ed socket via bpf_setsockopt(). + */ + getsockopt_link = bpf_program__attach_cgroup(skel->progs._getsockopt, cg_fd); + if (!ASSERT_OK_PTR(getsockopt_link, "getsockopt prog")) + goto err_out; + + cfd = accept(sfd, NULL, 0); + if (!ASSERT_GE(cfd, 0, "accept")) + goto err_out; + + if (!ASSERT_OK(getsockopt(cfd, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, &flagslen), + "getsockopt_flags")) + goto err_out; + ASSERT_EQ(flags & BPF_SOCK_OPS_STATE_CB_FLAG, BPF_SOCK_OPS_STATE_CB_FLAG, + "cb_flags_set"); +err_out: + close(sfd); + if (fd != -1) + close(fd); + if (cfd != -1) + close(cfd); + bpf_link__destroy(getsockopt_link); +} + void test_setget_sockopt(void) { cg_fd = test__join_cgroup(CG_NAME); @@ -191,6 +236,8 @@ void test_setget_sockopt(void) test_udp(AF_INET); test_ktls(AF_INET6); test_ktls(AF_INET); + test_nonstandard_opt(AF_INET); + test_nonstandard_opt(AF_INET6); done: setget_sockopt__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 1337153eb0ad..82bfb266741c 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -451,11 +451,11 @@ out: #define MAX_EVENTS 10 static void test_sockmap_skb_verdict_shutdown(void) { + int n, err, map, verdict, c1 = -1, p1 = -1; struct epoll_event ev, events[MAX_EVENTS]; - int n, err, map, verdict, s, c1 = -1, p1 = -1; struct test_sockmap_pass_prog *skel; - int epollfd; int zero = 0; + int epollfd; char b; skel = test_sockmap_pass_prog__open_and_load(); @@ -469,10 +469,7 @@ static void test_sockmap_skb_verdict_shutdown(void) if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (s < 0) - goto out; - err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); if (err < 0) goto out; @@ -506,8 +503,8 @@ out: static void test_sockmap_skb_verdict_fionread(bool pass_prog) { + int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1; int expected, zero = 0, sent, recvd, avail; - int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; struct test_sockmap_pass_prog *pass = NULL; struct test_sockmap_drop_prog *drop = NULL; char buf[256] = "0123456789"; @@ -534,11 +531,8 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog) if (!ASSERT_OK(err, "bpf_prog_attach")) goto out; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (!ASSERT_GT(s, -1, "socket_loopback(s)")) - goto out; - err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); - if (!ASSERT_OK(err, "create_socket_pairs(s)")) + err = create_socket_pairs(AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1); + if (!ASSERT_OK(err, "create_socket_pairs()")) goto out; err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); @@ -570,16 +564,12 @@ out: static void test_sockmap_skb_verdict_peek_helper(int map) { - int err, s, c1, p1, zero = 0, sent, recvd, avail; + int err, c1, p1, zero = 0, sent, recvd, avail; char snd[256] = "0123456789"; char rcv[256] = "0"; - s = socket_loopback(AF_INET, SOCK_STREAM); - if (!ASSERT_GT(s, -1, "socket_loopback(s)")) - return; - - err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1); - if (!ASSERT_OK(err, "create_pairs(s)")) + err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1); + if (!ASSERT_OK(err, "create_pair()")) return; err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST); diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h index e880f97bc44d..38e35c72bdaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h @@ -3,6 +3,9 @@ #include <linux/vm_sockets.h> +/* include/linux/net.h */ +#define SOCK_TYPE_MASK 0xf + #define IO_TIMEOUT_SEC 30 #define MAX_STRERR_LEN 256 #define MAX_TEST_NAME 80 @@ -14,6 +17,17 @@ #define __always_unused __attribute__((__unused__)) +/* include/linux/cleanup.h */ +#define __get_and_null(p, nullvalue) \ + ({ \ + __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = nullvalue; \ + __val; \ + }) + +#define take_fd(fd) __get_and_null(fd, -EBADF) + #define _FAIL(errnum, fmt...) \ ({ \ error_at_line(0, (errnum), __func__, __LINE__, fmt); \ @@ -179,6 +193,14 @@ __ret; \ }) +static inline void close_fd(int *fd) +{ + if (*fd >= 0) + xclose(*fd); +} + +#define __close_fd __attribute__((cleanup(close_fd))) + static inline int poll_connect(int fd, unsigned int timeout_sec) { struct timeval timeout = { .tv_sec = timeout_sec }; @@ -312,54 +334,6 @@ static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2) return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST); } -static inline int create_pair(int s, int family, int sotype, int *c, int *p) -{ - struct sockaddr_storage addr; - socklen_t len; - int err = 0; - - len = sizeof(addr); - err = xgetsockname(s, sockaddr(&addr), &len); - if (err) - return err; - - *c = xsocket(family, sotype, 0); - if (*c < 0) - return errno; - err = xconnect(*c, sockaddr(&addr), len); - if (err) { - err = errno; - goto close_cli0; - } - - *p = xaccept_nonblock(s, NULL, NULL); - if (*p < 0) { - err = errno; - goto close_cli0; - } - return err; -close_cli0: - close(*c); - return err; -} - -static inline int create_socket_pairs(int s, int family, int sotype, - int *c0, int *c1, int *p0, int *p1) -{ - int err; - - err = create_pair(s, family, sotype, c0, p0); - if (err) - return err; - - err = create_pair(s, family, sotype, c1, p1); - if (err) { - close(*c0); - close(*p0); - } - return err; -} - static inline int enable_reuseport(int s, int progfd) { int err, one = 1; @@ -412,5 +386,84 @@ static inline int socket_loopback(int family, int sotype) return socket_loopback_reuseport(family, sotype, -1); } +static inline int create_pair(int family, int sotype, int *p0, int *p1) +{ + __close_fd int s, c = -1, p = -1; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); + int err; + + s = socket_loopback(family, sotype); + if (s < 0) + return s; + + err = xgetsockname(s, sockaddr(&addr), &len); + if (err) + return err; + + c = xsocket(family, sotype, 0); + if (c < 0) + return c; + + err = connect(c, sockaddr(&addr), len); + if (err) { + if (errno != EINPROGRESS) { + FAIL_ERRNO("connect"); + return err; + } + + err = poll_connect(c, IO_TIMEOUT_SEC); + if (err) { + FAIL_ERRNO("poll_connect"); + return err; + } + } + + switch (sotype & SOCK_TYPE_MASK) { + case SOCK_DGRAM: + err = xgetsockname(c, sockaddr(&addr), &len); + if (err) + return err; + + err = xconnect(s, sockaddr(&addr), len); + if (err) + return err; + + *p0 = take_fd(s); + break; + case SOCK_STREAM: + case SOCK_SEQPACKET: + p = xaccept_nonblock(s, NULL, NULL); + if (p < 0) + return p; + + *p0 = take_fd(p); + break; + default: + FAIL("Unsupported socket type %#x", sotype); + return -EOPNOTSUPP; + } + + *p1 = take_fd(c); + return 0; +} + +static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1, + int *p0, int *p1) +{ + int err; + + err = create_pair(family, sotype, c0, p0); + if (err) + return err; + + err = create_pair(family, sotype, c1, p1); + if (err) { + close(*c0); + close(*p0); + } + + return err; +} #endif // __SOCKMAP_HELPERS__ diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 9ce0e0e0b7da..da5a6fb03b69 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -677,7 +677,7 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int verd_mapfd, enum redir_mode mode) { const char *log_prefix = redir_mode_str(mode); - int s, c0, c1, p0, p1; + int c0, c1, p0, p1; unsigned int pass; int err, n; u32 key; @@ -685,13 +685,10 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, zero_verdict_count(verd_mapfd); - s = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (s < 0) - return; - - err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); + err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, + &p0, &p1); if (err) - goto close_srv; + return; err = add_to_sockmap(sock_mapfd, p0, p1); if (err) @@ -722,8 +719,6 @@ close: xclose(c1); xclose(p0); xclose(c0); -close_srv: - xclose(s); } static void test_skb_redir_to_connected(struct test_sockmap_listen *skel, @@ -909,7 +904,7 @@ static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *sk static void redir_partial(int family, int sotype, int sock_map, int parser_map) { - int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1; + int c0 = -1, c1 = -1, p0 = -1, p1 = -1; int err, n, key, value; char buf[] = "abc"; @@ -919,13 +914,10 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) if (err) return; - s = socket_loopback(family, sotype | SOCK_NONBLOCK); - if (s < 0) - goto clean_parser_map; - - err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1); + err = create_socket_pairs(family, sotype | SOCK_NONBLOCK, &c0, &c1, + &p0, &p1); if (err) - goto close_srv; + goto clean_parser_map; err = add_to_sockmap(sock_map, p0, p1); if (err) @@ -944,8 +936,6 @@ close: xclose(p0); xclose(c1); xclose(p1); -close_srv: - xclose(s); clean_parser_map: key = 0; @@ -1500,49 +1490,7 @@ static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *ma /* Returns two connected loopback vsock sockets */ static int vsock_socketpair_connectible(int sotype, int *v0, int *v1) { - struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int s, p, c; - - s = socket_loopback(AF_VSOCK, sotype); - if (s < 0) - return -1; - - c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0); - if (c == -1) - goto close_srv; - - if (getsockname(s, sockaddr(&addr), &len) < 0) - goto close_cli; - - if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) { - FAIL_ERRNO("connect"); - goto close_cli; - } - - len = sizeof(addr); - p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC); - if (p < 0) - goto close_cli; - - if (poll_connect(c, IO_TIMEOUT_SEC) < 0) { - FAIL_ERRNO("poll_connect"); - goto close_acc; - } - - *v0 = p; - *v1 = c; - - return 0; - -close_acc: - close(p); -close_cli: - close(c); -close_srv: - close(s); - - return -1; + return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1); } static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd, @@ -1691,44 +1639,7 @@ static void test_reuseport(struct test_sockmap_listen *skel, static int inet_socketpair(int family, int type, int *s, int *c) { - struct sockaddr_storage addr; - socklen_t len; - int p0, c0; - int err; - - p0 = socket_loopback(family, type | SOCK_NONBLOCK); - if (p0 < 0) - return p0; - - len = sizeof(addr); - err = xgetsockname(p0, sockaddr(&addr), &len); - if (err) - goto close_peer0; - - c0 = xsocket(family, type | SOCK_NONBLOCK, 0); - if (c0 < 0) { - err = c0; - goto close_peer0; - } - err = xconnect(c0, sockaddr(&addr), len); - if (err) - goto close_cli0; - err = xgetsockname(c0, sockaddr(&addr), &len); - if (err) - goto close_cli0; - err = xconnect(p0, sockaddr(&addr), len); - if (err) - goto close_cli0; - - *s = p0; - *c = c0; - return 0; - -close_cli0: - xclose(c0); -close_peer0: - xclose(p0); - return err; + return create_pair(family, type | SOCK_NONBLOCK, s, c); } static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, @@ -1795,11 +1706,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; int err; - if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd)) + if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd)) return; c0 = sfd[0], p0 = sfd[1]; - err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1); + err = inet_socketpair(family, type, &p1, &c1); if (err) goto close; @@ -1847,7 +1758,7 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; int err; - err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0); + err = inet_socketpair(family, type, &p0, &c0); if (err) return; @@ -1882,7 +1793,7 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel, unix_inet_redir_to_connected(family, SOCK_DGRAM, sock_map, -1, verdict_map, REDIR_EGRESS, NO_FLAGS); - unix_inet_redir_to_connected(family, SOCK_DGRAM, + unix_inet_redir_to_connected(family, SOCK_STREAM, sock_map, -1, verdict_map, REDIR_EGRESS, NO_FLAGS); diff --git a/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c new file mode 100644 index 000000000000..accc42e01f8a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tp_btf_nullable.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "test_tp_btf_nullable.skel.h" + +void test_tp_btf_nullable(void) +{ + if (!env.has_testmod) { + test__skip(); + return; + } + + RUN_TESTS(test_tp_btf_nullable); +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index bd8c75b620c2..c397336fe1ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void) } #ifndef __NR_uretprobe -#define __NR_uretprobe 467 +#define __NR_uretprobe 335 #endif __naked unsigned long uretprobe_syscall_call_1(void) @@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void) struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c; - if (ASSERT_OK(pipe(go), "pipe")) + if (!ASSERT_OK(pipe(go), "pipe")) return; skel = uprobe_syscall_executed__open_and_load(); diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index e35bc1eac52a..c3bc186af21e 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -6,6 +6,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include <linux/if_ether.h> #include "bpf_misc.h" #include "bpf_kfuncs.h" @@ -1254,6 +1255,30 @@ int skb_invalid_ctx(void *ctx) return 0; } +SEC("fentry/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_invalid_ctx_fentry, void *skb) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(skb, 0, &ptr); + + return 0; +} + +SEC("fexit/skb_tx_error") +__failure __msg("must be referenced or trusted") +int BPF_PROG(skb_invalid_ctx_fexit, void *skb) +{ + struct bpf_dynptr ptr; + + /* this should fail */ + bpf_dynptr_from_skb(skb, 0, &ptr); + + return 0; +} + /* Reject writes to dynptr slot for uninit arg */ SEC("?raw_tp") __failure __msg("potential write to dynptr at off=-16") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 5985920d162e..bfcc85686cf0 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -5,6 +5,7 @@ #include <stdbool.h> #include <linux/bpf.h> #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" #include "bpf_kfuncs.h" #include "errno.h" @@ -544,3 +545,25 @@ int test_dynptr_skb_strcmp(struct __sk_buff *skb) return 1; } + +SEC("tp_btf/kfree_skb") +int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location) +{ + __u8 write_data[2] = {1, 2}; + struct bpf_dynptr ptr; + int ret; + + if (bpf_dynptr_from_skb(skb, 0, &ptr)) { + err = 1; + return 1; + } + + /* since tp_btf skbs are read only, writes should fail */ + ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0); + if (ret != -EINVAL) { + err = 2; + return 1; + } + + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 16bdc3e25591..ef70b88bccb2 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx) return sum; } +__u32 upper, select_n, result; +__u64 global; + +static __noinline bool nest_2(char *str) +{ + /* some insns (including branch insns) to ensure stacksafe() is triggered + * in nest_2(). This way, stacksafe() can compare frame associated with nest_1(). + */ + if (str[0] == 't') + return true; + if (str[1] == 'e') + return true; + if (str[2] == 's') + return true; + if (str[3] == 't') + return true; + return false; +} + +static __noinline bool nest_1(int n) +{ + /* case 0: allocate stack, case 1: no allocate stack */ + switch (n) { + case 0: { + char comm[16]; + + if (bpf_get_current_comm(comm, 16)) + return false; + return nest_2(comm); + } + case 1: + return nest_2((char *)&global); + default: + return false; + } +} + +SEC("raw_tp") +__success +int iter_subprog_check_stacksafe(const void *ctx) +{ + long i; + + bpf_for(i, 0, upper) { + if (!nest_1(select_n)) { + result = 1; + return 0; + } + } + + result = 2; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c index 60518aed1ffc..6dd4318debbf 100644 --- a/tools/testing/selftests/bpf/progs/setget_sockopt.c +++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c @@ -59,6 +59,8 @@ static const struct sockopt_test sol_tcp_tests[] = { { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, }, { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, }, { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, }, + { .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS, + .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, }, { .opt = 0, }, }; @@ -353,11 +355,30 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, return 1; } +SEC("cgroup/getsockopt") +int _getsockopt(struct bpf_sockopt *ctx) +{ + struct bpf_sock *sk = ctx->sk; + int *optval = ctx->optval; + struct tcp_sock *tp; + + if (!sk || ctx->level != SOL_TCP || ctx->optname != TCP_BPF_SOCK_OPS_CB_FLAGS) + return 1; + + tp = bpf_core_cast(sk, struct tcp_sock); + if (ctx->optval + sizeof(int) <= ctx->optval_end) { + *optval = tp->bpf_sock_ops_cb_flags; + ctx->retval = 0; + } + return 1; +} + SEC("sockops") int skops_sockopt(struct bpf_sock_ops *skops) { struct bpf_sock *bpf_sk = skops->sk; struct sock *sk; + int flags; if (!bpf_sk) return 1; @@ -384,9 +405,8 @@ int skops_sockopt(struct bpf_sock_ops *skops) nr_passive += !(bpf_test_sockopt(skops, sk) || test_tcp_maxseg(skops, sk) || test_tcp_saved_syn(skops, sk)); - bpf_sock_ops_cb_flags_set(skops, - skops->bpf_sock_ops_cb_flags | - BPF_SOCK_OPS_STATE_CB_FLAG); + flags = skops->bpf_sock_ops_cb_flags | BPF_SOCK_OPS_STATE_CB_FLAG; + bpf_setsockopt(skops, SOL_TCP, TCP_BPF_SOCK_OPS_CB_FLAGS, &flags, sizeof(flags)); break; case BPF_SOCK_OPS_STATE_CB: if (skops->args[1] == BPF_TCP_CLOSE_WAIT) diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c index 44ee0d037f95..eb5cca1fce16 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -486,17 +486,10 @@ static int tcp_validate_cookie(struct tcp_syncookie *ctx) goto err; mssind = (cookie & (3 << 6)) >> 6; - if (ctx->ipv4) { - if (mssind > ARRAY_SIZE(msstab4)) - goto err; - + if (ctx->ipv4) ctx->attrs.mss = msstab4[mssind]; - } else { - if (mssind > ARRAY_SIZE(msstab6)) - goto err; - + else ctx->attrs.mss = msstab6[mssind]; - } ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK; ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale; diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c new file mode 100644 index 000000000000..bba3e37f749b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" +#include "bpf_misc.h" + +SEC("tp_btf/bpf_testmod_test_nullable_bare") +__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) +{ + return nullable_ctx->len; +} + +SEC("tp_btf/bpf_testmod_test_nullable_bare") +int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx) +{ + if (nullable_ctx) + return nullable_ctx->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 8144fd145237..1ee0ef114f9d 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -324,6 +324,25 @@ out: return zc_avail; } +#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags" +static unsigned int get_max_skb_frags(void) +{ + unsigned int max_skb_frags = 0; + FILE *file; + + file = fopen(MAX_SKB_FRAGS_PATH, "r"); + if (!file) { + ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH); + return 0; + } + + if (fscanf(file, "%u", &max_skb_frags) != 1) + ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH); + + fclose(file); + return max_skb_frags; +} + static struct option long_options[] = { {"interface", required_argument, 0, 'i'}, {"busy-poll", no_argument, 0, 'b'}, @@ -2244,13 +2263,24 @@ static int testapp_poll_rxq_tmout(struct test_spec *test) static int testapp_too_many_frags(struct test_spec *test) { - struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {}; + struct pkt *pkts; u32 max_frags, i; + int ret; - if (test->mode == TEST_MODE_ZC) + if (test->mode == TEST_MODE_ZC) { max_frags = test->ifobj_tx->xdp_zc_max_segs; - else - max_frags = XSK_DESC__MAX_SKB_FRAGS; + } else { + max_frags = get_max_skb_frags(); + if (!max_frags) { + ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n"); + max_frags = 17; + } + max_frags += 1; + } + + pkts = calloc(2 * max_frags + 2, sizeof(struct pkt)); + if (!pkts) + return TEST_FAILURE; test->mtu = MAX_ETH_JUMBO_SIZE; @@ -2280,7 +2310,10 @@ static int testapp_too_many_frags(struct test_spec *test) pkts[2 * max_frags + 1].valid = true; pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2); - return testapp_validate_traffic(test); + ret = testapp_validate_traffic(test); + + free(pkts); + return ret; } static int xsk_load_xdp_programs(struct ifobject *ifobj) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 885c948c5d83..e46e823f6a1a 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -55,7 +55,6 @@ #define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024) #define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024) #define XSK_DESC__INVALID_OPTION (0xffff) -#define XSK_DESC__MAX_SKB_FRAGS 18 #define HUGEPAGE_SIZE (2 * 1024 * 1024) #define PKT_DUMP_NB_TO_PRINT 16 #define RUN_ALL_TESTS UINT_MAX diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 991c473e3859..e0d9851fe1c9 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -26,6 +26,10 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) #endif +#ifndef F_CREATED_QUERY +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) +#endif + static inline int sys_close_range(unsigned int fd, unsigned int max_fd, unsigned int flags) { @@ -589,4 +593,74 @@ TEST(close_range_cloexec_unshare_syzbot) EXPECT_EQ(close(fd3), 0); } +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(fcntl_created) +{ + for (int i = 0; i < 101; i++) { + int fd; + char path[PATH_MAX]; + + fd = open("/dev/null", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0) { + if (errno == ENOENT) + SKIP(return, + "Skipping test since /dev/null does not exist"); + } + + /* We didn't create "/dev/null". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + + sprintf(path, "aaaa_%d", i); + fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0600); + ASSERT_GE(fd, 0); + + /* We created "aaaa_%d". */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 1); + close(fd); + + fd = open(path, O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + + /* We're opening it again, so no positive creation check. */ + EXPECT_EQ(fcntl(fd, F_CREATED_QUERY, 0), 0); + close(fd); + unlink(path); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 5f541522364f..5d0a809dc2df 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -29,9 +29,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index e54f382bcb02..39fb97a8c1df 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -1,8 +1,11 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_INCLUDES := $(wildcard lib/py/*.py) +TEST_INCLUDES := $(wildcard lib/py/*.py) \ + ../../net/net_helper.sh \ + ../../net/lib.sh \ TEST_PROGS := \ + netcons_basic.sh \ ping.py \ queues.py \ stats.py \ diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index f6a58ce8a230..a2d8af60876d 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -1,2 +1,6 @@ CONFIG_IPV6=y CONFIG_NETDEVSIM=m +CONFIG_CONFIGFS_FS=y +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py index 026d98976c35..05b6fbb3fcdd 100755 --- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import errno import time import os from lib.py import ksft_run, ksft_exit, ksft_pr @@ -61,7 +62,7 @@ def test_pp_alloc(cfg, netdevnl): try: stats = get_stats() except NlError as e: - if e.nl_msg.error == -95: + if e.nl_msg.error == -errno.EOPNOTSUPP: stats = {} else: raise diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 931dbc36ca43..9d7adb3cf33b 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -3,7 +3,7 @@ import datetime import random -from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt +from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily from lib.py import KsftSkipEx @@ -19,6 +19,15 @@ def _rss_key_rand(length): return [random.randint(0, 255) for _ in range(length)] +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + def get_rss(cfg, context=0): return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] @@ -81,17 +90,18 @@ def _send_traffic_check(cfg, port, name, params): ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts)) if params.get('noise'): ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2, - "traffic on other queues:" + str(cnts)) + f"traffic on other queues ({name})':" + str(cnts)) if params.get('empty'): ksft_eq(sum(cnts[i] for i in params['empty']), 0, - "traffic on inactive queues: " + str(cnts)) + f"traffic on inactive queues ({name}): " + str(cnts)) def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" - if len(_get_rx_cnts(cfg)) < 2: - KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -101,6 +111,7 @@ def test_rss_key_indir(cfg): if not data[k]: raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + _rss_key_check(cfg, data=data) key_len = len(data['rss-hash-key']) # Set the key @@ -110,9 +121,26 @@ def test_rss_key_indir(cfg): data = get_rss(cfg) ksft_eq(key, data['rss-hash-key']) + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + # Set the indirection table ethtool(f"-X {cfg.ifname} equal 2") - reset_indir = defer(ethtool, f"-X {cfg.ifname} default") data = get_rss(cfg) ksft_eq(0, min(data['rss-indirection-table'])) ksft_eq(1, max(data['rss-indirection-table'])) @@ -274,6 +302,78 @@ def test_hitless_key_update(cfg): ksft_eq(carrier1 - carrier0, 0) +def test_rss_context_dump(cfg): + """ + Test dumping RSS contexts. This tests mostly exercises the kernel APIs. + """ + + # Get a random key of the right size + data = get_rss(cfg) + if 'rss-hash-key' in data: + key_data = _rss_key_rand(len(data['rss-hash-key'])) + key = _rss_key_str(key_data) + else: + key_data = [] + key = "ba:ad" + + ids = [] + try: + ids.append(ethtool_create(cfg, "-X", f"context new")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new weight 1 1")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + + ids.append(ethtool_create(cfg, "-X", f"context new hkey {key}")) + defer(ethtool, f"-X {cfg.ifname} context {ids[-1]} delete") + except CmdExitFailure: + if not ids: + raise KsftSkipEx("Unable to add any contexts") + ksft_pr(f"Added only {len(ids)} out of 3 contexts") + + expect_tuples = set([(cfg.ifname, -1)] + [(cfg.ifname, ctx_id) for ctx_id in ids]) + + # Dump all + ctxs = cfg.ethnl.rss_get({}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # Sanity-check the results + for data in ctxs: + ksft_ne(set(data['indir']), {0}, "indir table is all zero") + ksft_ne(set(data.get('hkey', [1])), {0}, "key is all zero") + + # More specific checks + if len(ids) > 1 and data.get('context') == ids[1]: + ksft_eq(set(data['indir']), {0, 1}, + "ctx1 - indir table mismatch") + if len(ids) > 2 and data.get('context') == ids[2]: + ksft_eq(data['hkey'], bytes(key_data), "ctx2 - key mismatch") + + # Ifindex filter + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ctx_tuples = set(tuples) + ksft_eq(len(tuples), len(ctx_tuples), "duplicates in context dump") + ksft_eq(expect_tuples, ctx_tuples) + + # Skip ctx 0 + expect_tuples.remove((cfg.ifname, -1)) + + ctxs = cfg.ethnl.rss_get({'start-context': 1}, dump=True) + tuples = [(c['header']['dev-name'], c.get('context', -1)) for c in ctxs] + ksft_eq(len(tuples), len(set(tuples)), "duplicates in context dump") + ctx_tuples = set([ctx for ctx in tuples if ctx[0] == cfg.ifname]) + ksft_eq(expect_tuples, ctx_tuples) + + # And finally both with ifindex and skip main + ctxs = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}, 'start-context': 1}, dump=True) + ctx_tuples = set([(c['header']['dev-name'], c.get('context', -1)) for c in ctxs]) + ksft_eq(expect_tuples, ctx_tuples) + + def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): """ Test separating traffic into RSS contexts. @@ -317,8 +417,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ctx_cnt = i break + _rss_key_check(cfg, context=ctx_id) + if not create_with_cfg: ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) # Sanity check the context we just created data = get_rss(cfg, ctx_id) @@ -511,7 +614,7 @@ def main() -> None: ksft_run([test_rss_key_indir, test_rss_queue_reconfigure, test_rss_resize, test_hitless_key_update, test_rss_context, test_rss_context4, test_rss_context32, - test_rss_context_queue_reconfigure, + test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg], args=(cfg, )) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index a5e800b8f103..1ea9bb695e94 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,6 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import ksft_setup from lib.py import cmd, ethtool, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -14,7 +15,7 @@ def _load_env_file(src_path): src_dir = Path(src_path).parent.resolve() if not (src_dir / "net.config").exists(): - return env + return ksft_setup(env) with open((src_dir / "net.config").as_posix(), 'r') as fp: for line in fp.readlines(): @@ -30,7 +31,7 @@ def _load_env_file(src_path): if len(pair) != 2: raise Exception("Can't parse configuration line:", full_file) env[pair[0]] = pair[1] - return env + return ksft_setup(env) class NetDrvEnv: diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a1..fe905a7f34b3 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh new file mode 100755 index 000000000000..06021b2059b7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -0,0 +1,234 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This test creates two netdevsim virtual interfaces, assigns one of them (the +# "destination interface") to a new namespace, and assigns IP addresses to both +# interfaces. +# +# It listens on the destination interface using socat and configures a dynamic +# target on netconsole, pointing to the destination IP address. +# +# Finally, it checks whether the message was received properly on the +# destination interface. Note that this test may pollute the kernel log buffer +# (dmesg) and relies on dynamic configuration and namespaces being configured. +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +# Simple script to test dynamic targets in netconsole +SRCIF="" # to be populated later +SRCIP=192.168.1.1 +DSTIF="" # to be populated later +DSTIP=192.168.1.2 + +PORT="6666" +MSG="netconsole selftest" +TARGET=$(mktemp -u netcons_XXXXX) +DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) +NETCONS_CONFIGFS="/sys/kernel/config/netconsole" +NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +# NAMESPACE will be populated by setup_ns with a random value +NAMESPACE="" + +# IDs for netdevsim +NSIM_DEV_1_ID=$((256 + RANDOM % 256)) +NSIM_DEV_2_ID=$((512 + RANDOM % 256)) + +# Used to create and delete namespaces +source "${SCRIPTDIR}"/../../net/lib.sh +source "${SCRIPTDIR}"/../../net/net_helper.sh + +# Create netdevsim interfaces +create_ifaces() { + local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device + + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" + udevadm settle 2> /dev/null || true + + local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID" + local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID" + + # These are global variables + SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \ + -path "$NSIM1"/net -exec basename {} \;) + DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \ + -path "$NSIM2"/net -exec basename {} \;) +} + +link_ifaces() { + local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" + local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex) + local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex) + + exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}" + exec {INITNS_FD}</proc/self/ns/net + + # Bind the dst interface to namespace + ip link set "${DSTIF}" netns "${NAMESPACE}" + + # Linking one device to the other one (on the other namespace} + if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK + then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup + exit "${ksft_skip}" + fi +} + +function configure_ip() { + # Configure the IPs for both interfaces + ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}" + ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up + + ip addr add "${SRCIP}"/24 dev "${SRCIF}" + ip link set "${SRCIF}" up +} + +function set_network() { + # setup_ns function is coming from lib.sh + setup_ns NAMESPACE + + # Create both interfaces, and assign the destination to a different + # namespace + create_ifaces + + # Link both interfaces back to back + link_ifaces + + configure_ip +} + +function create_dynamic_target() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + + # Create a dynamic target + mkdir "${NETCONS_PATH}" + + echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip + echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip + echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac + echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + + echo 1 > "${NETCONS_PATH}"/enabled +} + +function cleanup() { + local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" + + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + # Delete netdevsim devices + echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" + echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" + + # this is coming from lib.sh + cleanup_all_ns + + # Restoring printk configurations + echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk +} + +function listen_port_and_save_to() { + local OUTPUT=${1} + # Just wait for 2 seconds + timeout 2 ip netns exec "${NAMESPACE}" \ + socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" +} + +function validate_result() { + local TMPFILENAME="$1" + + # Check if the file exists + if [ ! -f "$TMPFILENAME" ]; then + echo "FAIL: File was not generated." >&2 + exit "${ksft_fail}" + fi + + if ! grep -q "${MSG}" "${TMPFILENAME}"; then + echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + + # Delete the file once it is validated, otherwise keep it + # for debugging purposes + rm "${TMPFILENAME}" + exit "${ksft_pass}" +} + +function check_for_dependencies() { + if [ "$(id -u)" -ne 0 ]; then + echo "This test must be run as root" >&2 + exit "${ksft_skip}" + fi + + if ! which socat > /dev/null ; then + echo "SKIP: socat(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which ip > /dev/null ; then + echo "SKIP: ip(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if ! which udevadm > /dev/null ; then + echo "SKIP: udevadm(1) is not available" >&2 + exit "${ksft_skip}" + fi + + if [ ! -d "${NETCONS_CONFIGFS}" ]; then + echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 + exit "${ksft_skip}" + fi + + if ip link show "${DSTIF}" 2> /dev/null; then + echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then + echo "SKIP: IPs already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi +} + +# ========== # +# Start here # +# ========== # +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Listed for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + +# Make sure the message was received in the dst part +# and exit +validate_result "${OUTPUT_FILE}" diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index 820b8e0a22c6..63e3c045a3b2 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,10 +1,13 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +import errno from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx +from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv +from lib.py import ip, defer ethnl = EthtoolFamily() netfam = NetdevFamily() @@ -17,7 +20,7 @@ def check_pause(cfg) -> None: try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("pause not supported by the device") raise @@ -32,7 +35,7 @@ def check_fec(cfg) -> None: try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: - if e.error == 95: + if e.error == errno.EOPNOTSUPP: raise KsftXfailEx("FEC not supported by the device") raise @@ -117,7 +120,7 @@ def qstat_by_ifindex(cfg) -> None: # loopback has no stats with ksft_raises(NlError) as cm: netfam.qstats_get({"ifindex": 1}, dump=True) - ksft_eq(cm.exception.nl_msg.error, -95) + ksft_eq(cm.exception.nl_msg.error, -errno.EOPNOTSUPP) ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') # Try to get stats for lowest unused ifindex but not 0 @@ -133,9 +136,31 @@ def qstat_by_ifindex(cfg) -> None: ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex') +@ksft_disruptive +def check_down(cfg) -> None: + try: + qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + except NlError as e: + if e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("qstats not supported by the device") + raise + + ip(f"link set dev {cfg.dev['ifname']} down") + defer(ip, f"link set dev {cfg.dev['ifname']} up") + + qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + for k, v in qstat.items(): + ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") + + # exercise per-queue API to make sure that "device down" state + # is handled correctly and doesn't crash + netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True) + + def main() -> None: with NetDrvEnv(__file__) as cfg: - ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex], + ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, + check_down], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index dc0408a831d0..75b7b4ef6cfa 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { + struct bpf_map *iter_map; int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) @@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[], *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); @@ -687,6 +695,24 @@ TEST_F(hid_bpf, subprog_raw_event) } /* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + +/* * Ensures that we can attach/detach programs */ TEST_F(hid_bpf, test_attach_detach) diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index ee9bbbcf751b..5ecc845ef792 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -455,7 +455,7 @@ struct { __type(value, struct elem); } hmap SEC(".maps"); -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *work) { __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; struct hid_bpf_ctx *hid_ctx; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index cfe37f491906..e5db897586bb 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *wq), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 6343f4053bd4..4927b9add5ad 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -825,7 +825,7 @@ TEST_F(iommufd_ioas, copy_area) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .src_ioas_id = self->ioas_id, .length = PAGE_SIZE, @@ -1318,7 +1318,7 @@ TEST_F(iommufd_ioas, copy_sweep) { struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .src_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = MOCK_PAGE_SIZE, @@ -1608,7 +1608,7 @@ TEST_F(iommufd_mock_domain, user_copy) }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), - .flags = IOMMU_IOAS_MAP_FIXED_IOVA, + .flags = IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE, .dst_ioas_id = self->ioas_id, .dst_iova = MOCK_APERTURE_START, .length = BUFFER_SIZE, diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e..bf215790a89d 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -70,7 +70,7 @@ def test_result(condition, description=""): def finished(): - if ksft_cnt["pass"] == ksft_num_tests: + if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests: exit_code = KSFT_PASS else: exit_code = KSFT_FAIL diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh index 74954f6a8f94..2c3c58e65a41 100644 --- a/tools/testing/selftests/kselftest/runner.sh +++ b/tools/testing/selftests/kselftest/runner.sh @@ -111,8 +111,11 @@ run_one() stdbuf="/usr/bin/stdbuf --output=L " fi eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}" - cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" - if [ ! -x "$TEST" ]; then + if [ -x "$TEST" ]; then + cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args" + elif [ -x "./ksft_runner.sh" ]; then + cmd="$stdbuf ./ksft_runner.sh ./$BASENAME_TEST" + else echo "# Warning: file $TEST is not executable" if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ] diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 48d32c5aa3eb..0c4b254ab56b 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -152,6 +152,7 @@ TEST_GEN_PROGS_x86_64 += pre_fault_memory_test TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs +TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test @@ -163,6 +164,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vgic_init TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access +TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer TEST_GEN_PROGS_aarch64 += demand_paging_test diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c new file mode 100644 index 000000000000..a36a7e2db434 --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/arch_timer_edge_cases.c @@ -0,0 +1,1062 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch_timer_edge_cases.c - Tests the aarch64 timer IRQ functionality. + * + * The test validates some edge cases related to the arch-timer: + * - timers above the max TVAL value. + * - timers in the past + * - moving counters ahead and behind pending timers. + * - reprograming timers. + * - timers fired multiple times. + * - masking/unmasking using the timer control mask. + * + * Copyright (c) 2021, Google LLC. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <sys/sysinfo.h> + +#include "arch_timer.h" +#include "gic.h" +#include "vgic.h" + +static const uint64_t CVAL_MAX = ~0ULL; +/* tval is a signed 32-bit int. */ +static const int32_t TVAL_MAX = INT32_MAX; +static const int32_t TVAL_MIN = INT32_MIN; + +/* After how much time we say there is no IRQ. */ +static const uint32_t TIMEOUT_NO_IRQ_US = 50000; + +/* A nice counter value to use as the starting one for most tests. */ +static const uint64_t DEF_CNT = (CVAL_MAX / 2); + +/* Number of runs. */ +static const uint32_t NR_TEST_ITERS_DEF = 5; + +/* Default wait test time in ms. */ +static const uint32_t WAIT_TEST_MS = 10; + +/* Default "long" wait test time in ms. */ +static const uint32_t LONG_WAIT_TEST_MS = 100; + +/* Shared with IRQ handler. */ +struct test_vcpu_shared_data { + atomic_t handled; + atomic_t spurious; +} shared_data; + +struct test_args { + /* Virtual or physical timer and counter tests. */ + enum arch_timer timer; + /* Delay used for most timer tests. */ + uint64_t wait_ms; + /* Delay used in the test_long_timer_delays test. */ + uint64_t long_wait_ms; + /* Number of iterations. */ + int iterations; + /* Whether to test the physical timer. */ + bool test_physical; + /* Whether to test the virtual timer. */ + bool test_virtual; +}; + +struct test_args test_args = { + .wait_ms = WAIT_TEST_MS, + .long_wait_ms = LONG_WAIT_TEST_MS, + .iterations = NR_TEST_ITERS_DEF, + .test_physical = true, + .test_virtual = true, +}; + +static int vtimer_irq, ptimer_irq; + +enum sync_cmd { + SET_COUNTER_VALUE, + USERSPACE_USLEEP, + USERSPACE_SCHED_YIELD, + USERSPACE_MIGRATE_SELF, + NO_USERSPACE_CMD, +}; + +typedef void (*sleep_method_t)(enum arch_timer timer, uint64_t usec); + +static void sleep_poll(enum arch_timer timer, uint64_t usec); +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec); +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec); +static void sleep_migrate(enum arch_timer timer, uint64_t usec); + +sleep_method_t sleep_method[] = { + sleep_poll, + sleep_sched_poll, + sleep_migrate, + sleep_in_userspace, +}; + +typedef void (*irq_wait_method_t)(void); + +static void wait_for_non_spurious_irq(void); +static void wait_poll_for_irq(void); +static void wait_sched_poll_for_irq(void); +static void wait_migrate_poll_for_irq(void); + +irq_wait_method_t irq_wait_method[] = { + wait_for_non_spurious_irq, + wait_poll_for_irq, + wait_sched_poll_for_irq, + wait_migrate_poll_for_irq, +}; + +enum timer_view { + TIMER_CVAL, + TIMER_TVAL, +}; + +static void assert_irqs_handled(uint32_t n) +{ + int h = atomic_read(&shared_data.handled); + + __GUEST_ASSERT(h == n, "Handled %d IRQS but expected %d", h, n); +} + +static void userspace_cmd(uint64_t cmd) +{ + GUEST_SYNC_ARGS(cmd, 0, 0, 0, 0); +} + +static void userspace_migrate_vcpu(void) +{ + userspace_cmd(USERSPACE_MIGRATE_SELF); +} + +static void userspace_sleep(uint64_t usecs) +{ + GUEST_SYNC_ARGS(USERSPACE_USLEEP, usecs, 0, 0, 0); +} + +static void set_counter(enum arch_timer timer, uint64_t counter) +{ + GUEST_SYNC_ARGS(SET_COUNTER_VALUE, counter, timer, 0, 0); +} + +static void guest_irq_handler(struct ex_regs *regs) +{ + unsigned int intid = gic_get_and_ack_irq(); + enum arch_timer timer; + uint64_t cnt, cval; + uint32_t ctl; + bool timer_condition, istatus; + + if (intid == IAR_SPURIOUS) { + atomic_inc(&shared_data.spurious); + goto out; + } + + if (intid == ptimer_irq) + timer = PHYSICAL; + else if (intid == vtimer_irq) + timer = VIRTUAL; + else + goto out; + + ctl = timer_get_ctl(timer); + cval = timer_get_cval(timer); + cnt = timer_get_cntct(timer); + timer_condition = cnt >= cval; + istatus = (ctl & CTL_ISTATUS) && (ctl & CTL_ENABLE); + GUEST_ASSERT_EQ(timer_condition, istatus); + + /* Disable and mask the timer. */ + timer_set_ctl(timer, CTL_IMASK); + + atomic_inc(&shared_data.handled); + +out: + gic_set_eoi(intid); +} + +static void set_cval_irq(enum arch_timer timer, uint64_t cval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_cval(timer, cval_cycles); + timer_set_ctl(timer, ctl); +} + +static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, + uint32_t ctl) +{ + atomic_set(&shared_data.handled, 0); + atomic_set(&shared_data.spurious, 0); + timer_set_ctl(timer, ctl); + timer_set_tval(timer, tval_cycles); +} + +static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, + enum timer_view tv) +{ + switch (tv) { + case TIMER_CVAL: + set_cval_irq(timer, xval, ctl); + break; + case TIMER_TVAL: + set_tval_irq(timer, xval, ctl); + break; + default: + GUEST_FAIL("Could not get timer %d", timer); + } +} + +/* + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void wait_for_non_spurious_irq(void) +{ + int h; + + local_irq_disable(); + + for (h = atomic_read(&shared_data.handled); h == atomic_read(&shared_data.handled);) { + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); + } +} + +/* + * Wait for an non-spurious IRQ by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHED_YIELD). + * + * Note that this can theoretically hang forever, so we rely on having + * a timeout mechanism in the "runner", like: + * tools/testing/selftests/kselftest/runner.sh. + */ +static void poll_for_non_spurious_irq(enum sync_cmd usp_cmd) +{ + int h; + + local_irq_disable(); + + h = atomic_read(&shared_data.handled); + + local_irq_enable(); + while (h == atomic_read(&shared_data.handled)) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } + local_irq_disable(); +} + +static void wait_poll_for_irq(void) +{ + poll_for_non_spurious_irq(NO_USERSPACE_CMD); +} + +static void wait_sched_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_SCHED_YIELD); +} + +static void wait_migrate_poll_for_irq(void) +{ + poll_for_non_spurious_irq(USERSPACE_MIGRATE_SELF); +} + +/* + * Sleep for usec microseconds by polling in the guest or in + * userspace (e.g. userspace_cmd=USERSPACE_SCHEDULE). + */ +static void guest_poll(enum arch_timer test_timer, uint64_t usec, + enum sync_cmd usp_cmd) +{ + uint64_t cycles = usec_to_cycles(usec); + /* Whichever timer we are testing with, sleep with the other. */ + enum arch_timer sleep_timer = 1 - test_timer; + uint64_t start = timer_get_cntct(sleep_timer); + + while ((timer_get_cntct(sleep_timer) - start) < cycles) { + if (usp_cmd == NO_USERSPACE_CMD) + cpu_relax(); + else + userspace_cmd(usp_cmd); + } +} + +static void sleep_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, NO_USERSPACE_CMD); +} + +static void sleep_sched_poll(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_SCHED_YIELD); +} + +static void sleep_migrate(enum arch_timer timer, uint64_t usec) +{ + guest_poll(timer, usec, USERSPACE_MIGRATE_SELF); +} + +static void sleep_in_userspace(enum arch_timer timer, uint64_t usec) +{ + userspace_sleep(usec); +} + +/* + * Reset the timer state to some nice values like the counter not being close + * to the edge, and the control register masked and disabled. + */ +static void reset_timer_state(enum arch_timer timer, uint64_t cnt) +{ + set_counter(timer, cnt); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timer_xval(enum arch_timer timer, uint64_t xval, + enum timer_view tv, irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + local_irq_disable(); + + if (reset_state) + reset_timer_state(timer, reset_cnt); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * The test_timer_* functions will program the timer, wait for it, and assert + * the firing of the correct IRQ. + * + * These functions don't have a timeout and return as soon as they receive an + * IRQ. They can hang (forever), so we rely on having a timeout mechanism in + * the "runner", like: tools/testing/selftests/kselftest/runner.sh. + */ + +static void test_timer_cval(enum arch_timer timer, uint64_t cval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, cval, TIMER_CVAL, wm, reset_state, reset_cnt); +} + +static void test_timer_tval(enum arch_timer timer, int32_t tval, + irq_wait_method_t wm, bool reset_state, + uint64_t reset_cnt) +{ + test_timer_xval(timer, (uint64_t) tval, TIMER_TVAL, wm, reset_state, + reset_cnt); +} + +static void test_xval_check_no_irq(enum arch_timer timer, uint64_t xval, + uint64_t usec, enum timer_view timer_view, + sleep_method_t guest_sleep) +{ + local_irq_disable(); + + set_xval_irq(timer, xval, CTL_ENABLE | CTL_IMASK, timer_view); + guest_sleep(timer, usec); + + local_irq_enable(); + isb(); + + /* Assume success (no IRQ) after waiting usec microseconds */ + assert_irqs_handled(0); +} + +static void test_cval_no_irq(enum arch_timer timer, uint64_t cval, + uint64_t usec, sleep_method_t wm) +{ + test_xval_check_no_irq(timer, cval, usec, TIMER_CVAL, wm); +} + +static void test_tval_no_irq(enum arch_timer timer, int32_t tval, uint64_t usec, + sleep_method_t wm) +{ + /* tval will be cast to an int32_t in test_xval_check_no_irq */ + test_xval_check_no_irq(timer, (uint64_t) tval, usec, TIMER_TVAL, wm); +} + +/* Test masking/unmasking a timer using the timer mask (not the IRQ mask). */ +static void test_timer_control_mask_then_unmask(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Unmask the timer, and then get an IRQ. */ + local_irq_disable(); + timer_set_ctl(timer, CTL_ENABLE); + /* This method re-enables IRQs to handle the one we're looking for. */ + wait_for_non_spurious_irq(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Check that timer control masks actually mask a timer being fired. */ +static void test_timer_control_masks(enum arch_timer timer) +{ + reset_timer_state(timer, DEF_CNT); + + /* Local IRQs are not masked at this point. */ + + set_tval_irq(timer, -1, CTL_ENABLE | CTL_IMASK); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + sleep_poll(timer, TIMEOUT_NO_IRQ_US); + + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_fire_a_timer_multiple_times(enum arch_timer timer, + irq_wait_method_t wm, int num) +{ + int i; + + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_tval_irq(timer, 0, CTL_ENABLE); + + for (i = 1; i <= num; i++) { + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ handler masked and disabled the timer. + * Enable and unmmask it again. + */ + timer_set_ctl(timer, CTL_ENABLE); + + assert_irqs_handled(i); + } + + local_irq_enable(); +} + +static void test_timers_fired_multiple_times(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) + test_fire_a_timer_multiple_times(timer, irq_wait_method[i], 10); +} + +/* + * Set a timer for tval=delta_1_ms then reprogram it to + * tval=delta_2_ms. Check that we get the timer fired. There is no + * timeout for the wait: we use the wfi instruction. + */ +static void test_reprogramming_timer(enum arch_timer timer, irq_wait_method_t wm, + int32_t delta_1_ms, int32_t delta_2_ms) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + /* Program the timer to DEF_CNT + delta_1_ms. */ + set_tval_irq(timer, msec_to_cycles(delta_1_ms), CTL_ENABLE); + + /* Reprogram the timer to DEF_CNT + delta_2_ms. */ + timer_set_tval(timer, msec_to_cycles(delta_2_ms)); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + /* The IRQ should arrive at DEF_CNT + delta_2_ms (or after). */ + GUEST_ASSERT(timer_get_cntct(timer) >= + DEF_CNT + msec_to_cycles(delta_2_ms)); + + local_irq_enable(); + assert_irqs_handled(1); +}; + +static void test_reprogram_timers(enum arch_timer timer) +{ + int i; + uint64_t base_wait = test_args.wait_ms; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* + * Ensure reprogramming works whether going from a + * longer time to a shorter or vice versa. + */ + test_reprogramming_timer(timer, irq_wait_method[i], 2 * base_wait, + base_wait); + test_reprogramming_timer(timer, irq_wait_method[i], base_wait, + 2 * base_wait); + } +} + +static void test_basic_functionality(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +/* + * This test checks basic timer behavior without actually firing timers, things + * like: the relationship between cval and tval, tval down-counting. + */ +static void timers_sanity_checks(enum arch_timer timer, bool use_sched) +{ + reset_timer_state(timer, DEF_CNT); + + local_irq_disable(); + + /* cval in the past */ + timer_set_cval(timer, + timer_get_cntct(timer) - + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + /* tval in the past */ + timer_set_tval(timer, -1); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) < timer_get_cntct(timer)); + + /* tval larger than TVAL_MAX. This requires programming with + * timer_set_cval instead so the value is expressible + */ + timer_set_cval(timer, + timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= 0); + + /* + * tval larger than 2 * TVAL_MAX. + * Twice the TVAL_MAX completely loops around the TVAL. + */ + timer_set_cval(timer, + timer_get_cntct(timer) + 2ULL * TVAL_MAX + + msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_tval(timer) <= + msec_to_cycles(test_args.wait_ms)); + + /* negative tval that rollovers from 0. */ + set_counter(timer, msec_to_cycles(1)); + timer_set_tval(timer, -1 * msec_to_cycles(test_args.wait_ms)); + if (use_sched) + userspace_migrate_vcpu(); + GUEST_ASSERT(timer_get_cval(timer) >= (CVAL_MAX - msec_to_cycles(test_args.wait_ms))); + + /* tval should keep down-counting from 0 to -1. */ + timer_set_tval(timer, 0); + sleep_poll(timer, 1); + GUEST_ASSERT(timer_get_tval(timer) < 0); + + local_irq_enable(); + + /* Mask and disable any pending timer. */ + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_timers_sanity_checks(enum arch_timer timer) +{ + timers_sanity_checks(timer, false); + /* Check how KVM saves/restores these edge-case values. */ + timers_sanity_checks(timer, true); +} + +static void test_set_cnt_after_tval_max(enum arch_timer timer, irq_wait_method_t wm) +{ + local_irq_disable(); + reset_timer_state(timer, DEF_CNT); + + set_cval_irq(timer, + (uint64_t) TVAL_MAX + + msec_to_cycles(test_args.wait_ms) / 2, CTL_ENABLE); + + set_counter(timer, TVAL_MAX); + + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* Test timers set for: cval = now + TVAL_MAX + wait_ms / 2 */ +static void test_timers_above_tval_max(enum arch_timer timer) +{ + uint64_t cval; + int i; + + /* + * Test that the system is not implementing cval in terms of + * tval. If that was the case, setting a cval to "cval = now + * + TVAL_MAX + wait_ms" would wrap to "cval = now + + * wait_ms", and the timer would fire immediately. Test that it + * doesn't. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + reset_timer_state(timer, DEF_CNT); + cval = timer_get_cntct(timer) + TVAL_MAX + + msec_to_cycles(test_args.wait_ms); + test_cval_no_irq(timer, cval, + msecs_to_usecs(test_args.wait_ms) + + TIMEOUT_NO_IRQ_US, sleep_method[i]); + } + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + /* Get the IRQ by moving the counter forward. */ + test_set_cnt_after_tval_max(timer, irq_wait_method[i]); + } +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval(enum arch_timer timer, uint64_t cnt_1, + uint64_t xval, uint64_t cnt_2, + irq_wait_method_t wm, enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + /* This method re-enables IRQs to handle the one we're looking for. */ + wm(); + + assert_irqs_handled(1); + local_irq_enable(); +} + +/* + * Template function to be used by the test_move_counter_ahead_* tests. It + * sets the counter to cnt_1, the [c|t]val, the counter to cnt_2, and + * then waits for an IRQ. + */ +static void test_set_cnt_after_xval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t xval, + uint64_t cnt_2, + sleep_method_t guest_sleep, + enum timer_view tv) +{ + local_irq_disable(); + + set_counter(timer, cnt_1); + timer_set_ctl(timer, CTL_IMASK); + + set_xval_irq(timer, xval, CTL_ENABLE, tv); + set_counter(timer, cnt_2); + guest_sleep(timer, TIMEOUT_NO_IRQ_US); + + local_irq_enable(); + isb(); + + /* Assume no IRQ after waiting TIMEOUT_NO_IRQ_US microseconds */ + assert_irqs_handled(0); + timer_set_ctl(timer, CTL_IMASK); +} + +static void test_set_cnt_after_tval(enum arch_timer timer, uint64_t cnt_1, + int32_t tval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, tval, cnt_2, wm, TIMER_TVAL); +} + +static void test_set_cnt_after_cval(enum arch_timer timer, uint64_t cnt_1, + uint64_t cval, uint64_t cnt_2, + irq_wait_method_t wm) +{ + test_set_cnt_after_xval(timer, cnt_1, cval, cnt_2, wm, TIMER_CVAL); +} + +static void test_set_cnt_after_tval_no_irq(enum arch_timer timer, + uint64_t cnt_1, int32_t tval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, tval, cnt_2, wm, + TIMER_TVAL); +} + +static void test_set_cnt_after_cval_no_irq(enum arch_timer timer, + uint64_t cnt_1, uint64_t cval, + uint64_t cnt_2, sleep_method_t wm) +{ + test_set_cnt_after_xval_no_irq(timer, cnt_1, cval, cnt_2, wm, + TIMER_CVAL); +} + +/* Set a timer and then move the counter ahead of it. */ +static void test_move_counters_ahead_of_timers(enum arch_timer timer) +{ + int i; + int32_t tval; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_set_cnt_after_cval(timer, 0, DEF_CNT, DEF_CNT + 1, wm); + test_set_cnt_after_cval(timer, CVAL_MAX, 1, 2, wm); + + /* Move counter ahead of negative tval. */ + test_set_cnt_after_tval(timer, 0, -1, DEF_CNT + 1, wm); + test_set_cnt_after_tval(timer, 0, -1, TVAL_MAX, wm); + tval = TVAL_MAX; + test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, + wm); + } + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); + } +} + +/* + * Program a timer, mask it, and then change the tval or counter to cancel it. + * Unmask it and check that nothing fires. + */ +static void test_move_counters_behind_timers(enum arch_timer timer) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + test_set_cnt_after_cval_no_irq(timer, DEF_CNT, DEF_CNT - 1, 0, + sm); + test_set_cnt_after_tval_no_irq(timer, DEF_CNT, -1, 0, sm); + } +} + +static void test_timers_in_the_past(enum arch_timer timer) +{ + int32_t tval = -1 * (int32_t) msec_to_cycles(test_args.wait_ms); + uint64_t cval; + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + /* set a timer wait_ms the past. */ + cval = DEF_CNT - msec_to_cycles(test_args.wait_ms); + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + + /* Set a timer to counter=0 (in the past) */ + test_timer_cval(timer, 0, wm, true, DEF_CNT); + + /* Set a time for tval=0 (now) */ + test_timer_tval(timer, 0, wm, true, DEF_CNT); + + /* Set a timer to as far in the past as possible */ + test_timer_tval(timer, TVAL_MIN, wm, true, DEF_CNT); + } + + /* + * Set the counter to wait_ms, and a tval to -wait_ms. There should be no + * IRQ as that tval means cval=CVAL_MAX-wait_ms. + */ + for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { + sleep_method_t sm = sleep_method[i]; + + set_counter(timer, msec_to_cycles(test_args.wait_ms)); + test_tval_no_irq(timer, tval, TIMEOUT_NO_IRQ_US, sm); + } +} + +static void test_long_timer_delays(enum arch_timer timer) +{ + int32_t tval = (int32_t) msec_to_cycles(test_args.long_wait_ms); + uint64_t cval = DEF_CNT + msec_to_cycles(test_args.long_wait_ms); + int i; + + for (i = 0; i < ARRAY_SIZE(irq_wait_method); i++) { + irq_wait_method_t wm = irq_wait_method[i]; + + test_timer_cval(timer, cval, wm, true, DEF_CNT); + test_timer_tval(timer, tval, wm, true, DEF_CNT); + } +} + +static void guest_run_iteration(enum arch_timer timer) +{ + test_basic_functionality(timer); + test_timers_sanity_checks(timer); + + test_timers_above_tval_max(timer); + test_timers_in_the_past(timer); + + test_move_counters_ahead_of_timers(timer); + test_move_counters_behind_timers(timer); + test_reprogram_timers(timer); + + test_timers_fired_multiple_times(timer); + + test_timer_control_mask_then_unmask(timer); + test_timer_control_masks(timer); +} + +static void guest_code(enum arch_timer timer) +{ + int i; + + local_irq_disable(); + + gic_init(GIC_V3, 1); + + timer_set_ctl(VIRTUAL, CTL_IMASK); + timer_set_ctl(PHYSICAL, CTL_IMASK); + + gic_irq_enable(vtimer_irq); + gic_irq_enable(ptimer_irq); + local_irq_enable(); + + for (i = 0; i < test_args.iterations; i++) { + GUEST_SYNC(i); + guest_run_iteration(timer); + } + + test_long_timer_delays(timer); + GUEST_DONE(); +} + +static uint32_t next_pcpu(void) +{ + uint32_t max = get_nprocs(); + uint32_t cur = sched_getcpu(); + uint32_t next = cur; + cpu_set_t cpuset; + + TEST_ASSERT(max > 1, "Need at least two physical cpus"); + + sched_getaffinity(0, sizeof(cpuset), &cpuset); + + do { + next = (next + 1) % CPU_SETSIZE; + } while (!CPU_ISSET(next, &cpuset)); + + return next; +} + +static void migrate_self(uint32_t new_pcpu) +{ + int ret; + cpu_set_t cpuset; + pthread_t thread; + + thread = pthread_self(); + + CPU_ZERO(&cpuset); + CPU_SET(new_pcpu, &cpuset); + + pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); + + ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); + + TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", + new_pcpu, ret); +} + +static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, + enum arch_timer timer) +{ + if (timer == PHYSICAL) + vcpu_set_reg(vcpu, KVM_REG_ARM_PTIMER_CNT, cnt); + else + vcpu_set_reg(vcpu, KVM_REG_ARM_TIMER_CNT, cnt); +} + +static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) +{ + enum sync_cmd cmd = uc->args[1]; + uint64_t val = uc->args[2]; + enum arch_timer timer = uc->args[3]; + + switch (cmd) { + case SET_COUNTER_VALUE: + kvm_set_cntxct(vcpu, val, timer); + break; + case USERSPACE_USLEEP: + usleep(val); + break; + case USERSPACE_SCHED_YIELD: + sched_yield(); + break; + case USERSPACE_MIGRATE_SELF: + migrate_self(next_pcpu()); + break; + default: + break; + } +} + +static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + /* Start on CPU 0 */ + migrate_self(0); + + while (true) { + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + handle_sync(vcpu, &uc); + break; + case UCALL_DONE: + goto out; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + goto out; + default: + TEST_FAIL("Unexpected guest exit\n"); + } + } + + out: + return; +} + +static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq); + vcpu_device_attr_get(vcpu, KVM_ARM_VCPU_TIMER_CTRL, + KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq); + + sync_global_to_guest(vm, ptimer_irq); + sync_global_to_guest(vm, vtimer_irq); + + pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); +} + +static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, + enum arch_timer timer) +{ + *vm = vm_create_with_one_vcpu(vcpu, guest_code); + TEST_ASSERT(*vm, "Failed to create the test VM\n"); + + vm_init_descriptor_tables(*vm); + vm_install_exception_handler(*vm, VECTOR_IRQ_CURRENT, + guest_irq_handler); + + vcpu_init_descriptor_tables(*vcpu); + vcpu_args_set(*vcpu, 1, timer); + + test_init_timer_irq(*vm, *vcpu); + vgic_v3_setup(*vm, 1, 64); + sync_global_to_guest(*vm, test_args); +} + +static void test_print_help(char *name) +{ + pr_info("Usage: %s [-h] [-b] [-i iterations] [-l long_wait_ms] [-p] [-v]\n" + , name); + pr_info("\t-i: Number of iterations (default: %u)\n", + NR_TEST_ITERS_DEF); + pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); + pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", + LONG_WAIT_TEST_MS); + pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + WAIT_TEST_MS); + pr_info("\t-p: Test physical timer (default: true)\n"); + pr_info("\t-v: Test virtual timer (default: true)\n"); + pr_info("\t-h: Print this help message\n"); +} + +static bool parse_args(int argc, char *argv[]) +{ + int opt; + + while ((opt = getopt(argc, argv, "bhi:l:pvw:")) != -1) { + switch (opt) { + case 'b': + test_args.test_physical = true; + test_args.test_virtual = true; + break; + case 'i': + test_args.iterations = + atoi_positive("Number of iterations", optarg); + break; + case 'l': + test_args.long_wait_ms = + atoi_positive("Long wait time", optarg); + break; + case 'p': + test_args.test_physical = true; + test_args.test_virtual = false; + break; + case 'v': + test_args.test_virtual = true; + test_args.test_physical = false; + break; + case 'w': + test_args.wait_ms = atoi_positive("Wait time", optarg); + break; + case 'h': + default: + goto err; + } + } + + return true; + + err: + test_print_help(argv[0]); + return false; +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + if (!parse_args(argc, argv)) + exit(KSFT_SKIP); + + if (test_args.test_virtual) { + test_vm_create(&vm, &vcpu, VIRTUAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + if (test_args.test_physical) { + test_vm_create(&vm, &vcpu, PHYSICAL); + test_run(vm, vcpu); + kvm_vm_free(vm); + } + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d721760..d43fb3f49050 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -32,13 +32,25 @@ static struct feature_id_reg feat_id_regs[] = { { ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 }, { ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, + 1 + }, + { + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, + 1 + }, + { + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ + ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ + 16, 1 } }; @@ -468,6 +480,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ + ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */ @@ -475,6 +488,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */ + ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */ diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c new file mode 100644 index 000000000000..943d65fc6b0b --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that, on a GICv3 system, not configuring GICv3 correctly +// results in all of the sysregs generating an UNDEF exception. + +#include <test_util.h> +#include <kvm_util.h> +#include <processor.h> + +static volatile bool handled; + +#define __check_sr_read(r) \ + ({ \ + uint64_t val; \ + \ + handled = false; \ + dsb(sy); \ + val = read_sysreg_s(SYS_ ## r); \ + val; \ + }) + +#define __check_sr_write(r) \ + do { \ + handled = false; \ + dsb(sy); \ + write_sysreg_s(0, SYS_ ## r); \ + isb(); \ + } while(0) + +/* Fatal checks */ +#define check_sr_read(r) \ + do { \ + __check_sr_read(r); \ + __GUEST_ASSERT(handled, #r " no read trap"); \ + } while(0) + +#define check_sr_write(r) \ + do { \ + __check_sr_write(r); \ + __GUEST_ASSERT(handled, #r " no write trap"); \ + } while(0) + +#define check_sr_rw(r) \ + do { \ + check_sr_read(r); \ + check_sr_write(r); \ + } while(0) + +static void guest_code(void) +{ + uint64_t val; + + /* + * Check that we advertise that ID_AA64PFR0_EL1.GIC == 0, having + * hidden the feature at runtime without any other userspace action. + */ + __GUEST_ASSERT(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), + read_sysreg(id_aa64pfr0_el1)) == 0, + "GICv3 wrongly advertised"); + + /* + * Access all GICv3 registers, and fail if we don't get an UNDEF. + * Note that we happily access all the APxRn registers without + * checking their existance, as all we want to see is a failure. + */ + check_sr_rw(ICC_PMR_EL1); + check_sr_read(ICC_IAR0_EL1); + check_sr_write(ICC_EOIR0_EL1); + check_sr_rw(ICC_HPPIR0_EL1); + check_sr_rw(ICC_BPR0_EL1); + check_sr_rw(ICC_AP0R0_EL1); + check_sr_rw(ICC_AP0R1_EL1); + check_sr_rw(ICC_AP0R2_EL1); + check_sr_rw(ICC_AP0R3_EL1); + check_sr_rw(ICC_AP1R0_EL1); + check_sr_rw(ICC_AP1R1_EL1); + check_sr_rw(ICC_AP1R2_EL1); + check_sr_rw(ICC_AP1R3_EL1); + check_sr_write(ICC_DIR_EL1); + check_sr_read(ICC_RPR_EL1); + check_sr_write(ICC_SGI1R_EL1); + check_sr_write(ICC_ASGI1R_EL1); + check_sr_write(ICC_SGI0R_EL1); + check_sr_read(ICC_IAR1_EL1); + check_sr_write(ICC_EOIR1_EL1); + check_sr_rw(ICC_HPPIR1_EL1); + check_sr_rw(ICC_BPR1_EL1); + check_sr_rw(ICC_CTLR_EL1); + check_sr_rw(ICC_IGRPEN0_EL1); + check_sr_rw(ICC_IGRPEN1_EL1); + + /* + * ICC_SRE_EL1 may not be trappable, as ICC_SRE_EL2.Enable can + * be RAO/WI. Engage in non-fatal accesses, starting with a + * write of 0 to try and disable SRE, and let's see if it + * sticks. + */ + __check_sr_write(ICC_SRE_EL1); + if (!handled) + GUEST_PRINTF("ICC_SRE_EL1 write not trapping (OK)\n"); + + val = __check_sr_read(ICC_SRE_EL1); + if (!handled) { + __GUEST_ASSERT((val & BIT(0)), + "ICC_SRE_EL1 not trapped but ICC_SRE_EL1.SRE not set\n"); + GUEST_PRINTF("ICC_SRE_EL1 read not trapping (OK)\n"); + } + + GUEST_DONE(); +} + +static void guest_undef_handler(struct ex_regs *regs) +{ + /* Success, we've gracefully exploded! */ + handled = true; + regs->pc += 4; +} + +static void test_run_vcpu(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + do { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unknown ucall %lu", uc.cmd); + } + } while (uc.cmd != UCALL_DONE); +} + +static void test_guest_no_gicv3(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* Create a VM without a GICv3 */ + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); + + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, + ESR_EC_UNKNOWN, guest_undef_handler); + + test_run_vcpu(vcpu); + + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + uint64_t pfr0; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &pfr0); + __TEST_REQUIRE(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_GIC), pfr0), + "GICv3 not supported."); + kvm_vm_free(vm); + + test_guest_no_gicv3(); + + return 0; +} diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index d20981663831..2a3fe7914b72 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -126,6 +126,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), + REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c index a51dbd2a5f84..f4ac28d53747 100644 --- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c +++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c @@ -269,13 +269,12 @@ static void guest_inject(struct test_args *args, KVM_INJECT_MULTI(cmd, first_intid, num); while (irq_handled < num) { - asm volatile("wfi\n" - "msr daifclr, #2\n" - /* handle IRQ */ - "msr daifset, #2\n" - : : : "memory"); + wfi(); + local_irq_enable(); + isb(); /* handle IRQ */ + local_irq_disable(); } - asm volatile("msr daifclr, #2" : : : "memory"); + local_irq_enable(); GUEST_ASSERT_EQ(irq_handled, num); for (i = first_intid; i < num + first_intid; i++) diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h index b3e97525cb55..bf461de34785 100644 --- a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h +++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h @@ -79,7 +79,7 @@ static inline uint64_t timer_get_cval(enum arch_timer timer) return 0; } -static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) +static inline void timer_set_tval(enum arch_timer timer, int32_t tval) { switch (timer) { case VIRTUAL: @@ -95,6 +95,22 @@ static inline void timer_set_tval(enum arch_timer timer, uint32_t tval) isb(); } +static inline int32_t timer_get_tval(enum arch_timer timer) +{ + isb(); + switch (timer) { + case VIRTUAL: + return read_sysreg(cntv_tval_el0); + case PHYSICAL: + return read_sysreg(cntp_tval_el0); + default: + GUEST_FAIL("Could not get timer %d\n", timer); + } + + /* We should not reach here */ + return 0; +} + static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl) { switch (timer) { diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index 9b20a355d81a..de977d131082 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -243,4 +243,7 @@ void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5, uint64_t arg6, struct arm_smccc_res *res); +/* Execute a Wait For Interrupt instruction. */ +void wfi(void); + #endif /* SELFTEST_KVM_PROCESSOR_H */ diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index 0ac7cc89f38c..fe4dc3693112 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -639,3 +639,9 @@ void vm_vaddr_populate_bitmap(struct kvm_vm *vm) sparsebit_set_num(vm->vpages_valid, 0, (1ULL << vm->va_bits) >> vm->page_shift); } + +/* Helper to call wfi instruction. */ +void wfi(void) +{ + asm volatile("wfi"); +} diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index f92c2fb23fcd..8e34f7fa44e9 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); -KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), -KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), -KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), -KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 69849acd95b0..618cd2442390 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -184,6 +184,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -211,4 +238,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index 65c9c058458d..bd13257bfdfe 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -139,11 +139,8 @@ load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -mods=(/sys/kernel/livepatch/*) -nmods=${#mods[@]} -if [ "$nmods" -ne 1 ]; then - die "Expecting only one moduled listed, found $nmods" -fi +loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || + die "Expecting only one moduled listed, found $nmods" # These modules were disabled by the atomic replace for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c index 06d24d4679a6..1cc8a977c711 100644 --- a/tools/testing/selftests/lsm/lsm_list_modules_test.c +++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c @@ -128,6 +128,9 @@ TEST(correct_lsm_list_modules) case LSM_ID_EVM: name = "evm"; break; + case LSM_ID_IPE: + name = "ipe"; + break; default: name = "INVALID"; break; diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 901e0d07765b..5f2ca591c956 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -104,13 +106,13 @@ TEST_GEN_FILES += $(BINARIES_64) endif else -ifneq (,$(findstring $(ARCH),powerpc)) +ifneq (,$(filter $(ARCH),arm64 powerpc)) TEST_GEN_FILES += protection_keys endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53..2c3a0eb6b22d 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1b03bcfaefdf..5a3a9bcba640 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,8 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c index a818f010de47..bfcea5cf9a48 100644 --- a/tools/testing/selftests/mm/mseal_test.c +++ b/tools/testing/selftests/mm/mseal_test.c @@ -81,17 +81,6 @@ static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) -{ - void *sret; - - errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); - return sret; -} - static int sys_munmap(void *ptr, size_t size) { int sret; @@ -172,7 +161,7 @@ static void setup_single_address(int size, void **ptrOut) { void *ptr; - ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); *ptrOut = ptr; } @@ -181,7 +170,7 @@ static void setup_single_address_rw(int size, void **ptrOut) void *ptr; unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE; - ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0); *ptrOut = ptr; } @@ -205,7 +194,7 @@ bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; @@ -481,8 +470,8 @@ static void test_seal_zero_address(void) int prot; /* use mmap to change protection. */ - ptr = sys_mmap(0, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ptr = mmap(0, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); FAIL_TEST_IF_FALSE(ptr == 0); size = get_vma_size(ptr, &prot); @@ -1209,8 +1198,8 @@ static void test_seal_mmap_overwrite_prot(bool seal) } /* use mmap to change protection. */ - ret2 = sys_mmap(ptr, size, PROT_NONE, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_NONE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1240,8 +1229,8 @@ static void test_seal_mmap_expand(bool seal) } /* use mmap to expand. */ - ret2 = sys_mmap(ptr, size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1268,8 +1257,8 @@ static void test_seal_mmap_shrink(bool seal) } /* use mmap to shrink. */ - ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ, - MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); + ret2 = mmap(ptr, 8 * page_size, PROT_READ, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0); if (seal) { FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED); FAIL_TEST_IF_FALSE(errno == EPERM); @@ -1650,7 +1639,7 @@ static void test_seal_discard_ro_anon_on_filebacked(bool seal) ret = fallocate(fd, 0, 0, size); FAIL_TEST_IF_FALSE(!ret); - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, fd, 0); FAIL_TEST_IF_FALSE(ptr != MAP_FAILED); if (seal) { @@ -1680,7 +1669,7 @@ static void test_seal_discard_ro_anon_on_shared(bool seal) int ret; unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED; - ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0); + ptr = mmap(NULL, size, PROT_READ, mapflags, -1, 0); FAIL_TEST_IF_FALSE(ptr != (void *)-1); if (seal) { diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h new file mode 100644 index 000000000000..580e1b0bb38e --- /dev/null +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2023 Arm Ltd. + */ + +#ifndef _PKEYS_ARM64_H +#define _PKEYS_ARM64_H + +#include "vm_util.h" +/* for signal frame parsing */ +#include "../arm64/signal/testcases/testcases.h" + +#ifndef SYS_mprotect_key +# define SYS_mprotect_key 288 +#endif +#ifndef SYS_pkey_alloc +# define SYS_pkey_alloc 289 +# define SYS_pkey_free 290 +#endif +#define MCONTEXT_IP(mc) mc.pc +#define MCONTEXT_TRAPNO(mc) -1 + +#define PKEY_MASK 0xf + +#define POE_NONE 0x0 +#define POE_X 0x2 +#define POE_RX 0x3 +#define POE_RWX 0x7 + +#define NR_PKEYS 8 +#define NR_RESERVED_PKEYS 1 /* pkey-0 */ + +#define PKEY_ALLOW_ALL 0x77777777 + +#define PKEY_BITS_PER_PKEY 4 +#define PAGE_SIZE sysconf(_SC_PAGESIZE) +#undef HPAGE_SIZE +#define HPAGE_SIZE default_huge_page_size() + +/* 4-byte instructions * 16384 = 64K page */ +#define __page_o_noops() asm(".rept 16384 ; nop; .endr") + +static inline u64 __read_pkey_reg(void) +{ + u64 pkey_reg = 0; + + // POR_EL0 + asm volatile("mrs %0, S3_3_c10_c2_4" : "=r" (pkey_reg)); + + return pkey_reg; +} + +static inline void __write_pkey_reg(u64 pkey_reg) +{ + u64 por = pkey_reg; + + dprintf4("%s() changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); + + // POR_EL0 + asm volatile("msr S3_3_c10_c2_4, %0\nisb" :: "r" (por) :); + + dprintf4("%s() pkey register after changing %016llx to %016llx\n", + __func__, __read_pkey_reg(), pkey_reg); +} + +static inline int cpu_has_pkeys(void) +{ + /* No simple way to determine this */ + return 1; +} + +static inline u32 pkey_bit_position(int pkey) +{ + return pkey * PKEY_BITS_PER_PKEY; +} + +static inline int get_arch_reserved_keys(void) +{ + return NR_RESERVED_PKEYS; +} + +void expect_fault_on_read_execonly_key(void *p1, int pkey) +{ +} + +void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) +{ + return PTR_ERR_ENOTSUP; +} + +#define set_pkey_bits set_pkey_bits +static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) +{ + u32 shift = pkey_bit_position(pkey); + u64 new_val = POE_RWX; + + /* mask out bits from pkey in old value */ + reg &= ~((u64)PKEY_MASK << shift); + + if (flags & PKEY_DISABLE_ACCESS) + new_val = POE_X; + else if (flags & PKEY_DISABLE_WRITE) + new_val = POE_RX; + + /* OR in new bits for pkey */ + reg |= new_val << shift; + + return reg; +} + +#define get_pkey_bits get_pkey_bits +static inline u64 get_pkey_bits(u64 reg, int pkey) +{ + u32 shift = pkey_bit_position(pkey); + /* + * shift down the relevant bits to the lowest four, then + * mask off all the other higher bits + */ + u32 perm = (reg >> shift) & PKEY_MASK; + + if (perm == POE_X) + return PKEY_DISABLE_ACCESS; + if (perm == POE_RX) + return PKEY_DISABLE_WRITE; + return 0; +} + +static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +{ + struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); + struct poe_context *poe_ctx = + (struct poe_context *) get_header(ctx, POE_MAGIC, + sizeof(uctxt->uc_mcontext), NULL); + if (poe_ctx) + poe_ctx->por_el0 = pkey; +} + +#endif /* _PKEYS_ARM64_H */ diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 1af3156a9db8..15608350fc01 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -91,12 +91,17 @@ void record_pkey_malloc(void *ptr, long size, int prot); #include "pkey-x86.h" #elif defined(__powerpc64__) /* arch */ #include "pkey-powerpc.h" +#elif defined(__aarch64__) /* arch */ +#include "pkey-arm64.h" #else /* arch */ #error Architecture not supported #endif /* arch */ +#ifndef PKEY_MASK #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) +#endif +#ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { u32 shift = pkey_bit_position(pkey); @@ -106,7 +111,9 @@ static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) reg |= (flags & PKEY_MASK) << shift; return reg; } +#endif +#ifndef get_pkey_bits static inline u64 get_pkey_bits(u64 reg, int pkey) { u32 shift = pkey_bit_position(pkey); @@ -116,6 +123,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) */ return ((reg >> shift) & PKEY_MASK); } +#endif extern u64 shadow_pkey_reg; diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index ae5df26104e5..3d0c0bdae5bc 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -8,7 +8,10 @@ # define SYS_pkey_free 385 #endif #define REG_IP_IDX PT_NIP +#define MCONTEXT_IP(mc) mc.gp_regs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gp_regs[REG_TRAPNO] #define REG_TRAPNO PT_TRAP +#define MCONTEXT_FPREGS #define gregs gp_regs #define fpregs fp_regs #define si_pkey_offset 0x20 diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 814758e109c0..5f28e26a2511 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -15,6 +15,10 @@ #endif +#define MCONTEXT_IP(mc) mc.gregs[REG_IP_IDX] +#define MCONTEXT_TRAPNO(mc) mc.gregs[REG_TRAPNO] +#define MCONTEXT_FPREGS + #ifndef PKEY_DISABLE_ACCESS # define PKEY_DISABLE_ACCESS 0x1 #endif diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c index eaa6d1fc5328..0789981b72b9 100644 --- a/tools/testing/selftests/mm/protection_keys.c +++ b/tools/testing/selftests/mm/protection_keys.c @@ -147,7 +147,7 @@ void abort_hooks(void) * will then fault, which makes sure that the fault code handles * execute-only memory properly. */ -#ifdef __powerpc64__ +#if defined(__powerpc64__) || defined(__aarch64__) /* This way, both 4K and 64K alignment are maintained */ __attribute__((__aligned__(65536))) #else @@ -212,7 +212,6 @@ void pkey_disable_set(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights; - u64 orig_pkey_reg = read_pkey_reg(); dprintf1("START->%s(%d, 0x%x)\n", __func__, pkey, flags); @@ -242,8 +241,6 @@ void pkey_disable_set(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - pkey_assert(read_pkey_reg() >= orig_pkey_reg); dprintf1("END<---%s(%d, 0x%x)\n", __func__, pkey, flags); } @@ -253,7 +250,6 @@ void pkey_disable_clear(int pkey, int flags) unsigned long syscall_flags = 0; int ret; int pkey_rights = hw_pkey_get(pkey, syscall_flags); - u64 orig_pkey_reg = read_pkey_reg(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); @@ -273,8 +269,6 @@ void pkey_disable_clear(int pkey, int flags) dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__, pkey, read_pkey_reg()); - if (flags) - assert(read_pkey_reg() <= orig_pkey_reg); } void pkey_write_allow(int pkey) @@ -314,7 +308,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) ucontext_t *uctxt = vucontext; int trapno; unsigned long ip; +#ifdef MCONTEXT_FPREGS char *fpregs; +#endif #if defined(__i386__) || defined(__x86_64__) /* arch */ u32 *pkey_reg_ptr; int pkey_reg_offset; @@ -328,9 +324,11 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg); - trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; - ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext); + ip = MCONTEXT_IP(uctxt->uc_mcontext); +#ifdef MCONTEXT_FPREGS fpregs = (char *) uctxt->uc_mcontext.fpregs; +#endif dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n", __func__, trapno, ip, si_code_str(si->si_code), @@ -359,7 +357,9 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #endif /* arch */ dprintf1("siginfo: %p\n", si); +#ifdef MCONTEXT_FPREGS dprintf1(" fpregs: %p\n", fpregs); +#endif if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || @@ -389,6 +389,8 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) #elif defined(__powerpc64__) /* arch */ /* restore access and let the faulting instruction continue */ pkey_access_allow(siginfo_pkey); +#elif defined(__aarch64__) + aarch64_write_signal_pkey(uctxt, PKEY_ALLOW_ALL); #endif /* arch */ pkey_faults++; dprintf1("<<<<==================================================\n"); @@ -902,7 +904,9 @@ void expected_pkey_fault(int pkey) * test program continue. We now have to restore it. */ if (__read_pkey_reg() != 0) -#else /* arch */ +#elif defined(__aarch64__) + if (__read_pkey_reg() != PKEY_ALLOW_ALL) +#else if (__read_pkey_reg() != shadow_pkey_reg) #endif /* arch */ pkey_assert(0); @@ -1492,6 +1496,11 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) lots_o_noops_around_write(&scratch); do_not_expect_pkey_fault("executing on PROT_EXEC memory"); expect_fault_on_read_execonly_key(p1, pkey); + + // Reset back to PROT_EXEC | PROT_READ for architectures that support + // non-PKEY execute-only permissions. + ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey); + pkey_assert(!ret); } void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) @@ -1665,6 +1674,84 @@ void test_ptrace_modifies_pkru(int *ptr, u16 pkey) } #endif +#if defined(__aarch64__) +void test_ptrace_modifies_pkru(int *ptr, u16 pkey) +{ + pid_t child; + int status, ret; + struct iovec iov; + u64 trace_pkey; + /* Just a random pkey value.. */ + u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) | + (POE_NONE << PKEY_BITS_PER_PKEY) | + POE_RWX; + + child = fork(); + pkey_assert(child >= 0); + dprintf3("[%d] fork() ret: %d\n", getpid(), child); + if (!child) { + ptrace(PTRACE_TRACEME, 0, 0, 0); + + /* Stop and allow the tracer to modify PKRU directly */ + raise(SIGSTOP); + + /* + * need __read_pkey_reg() version so we do not do shadow_pkey_reg + * checking + */ + if (__read_pkey_reg() != new_pkey) + exit(1); + + raise(SIGSTOP); + + exit(0); + } + + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + iov.iov_base = &trace_pkey; + iov.iov_len = 8; + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == read_pkey_reg()); + + trace_pkey = new_pkey; + + ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + + /* Test that the modification is visible in ptrace before any execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + /* Execute the tracee */ + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + + /* Test that the tracee saw the PKRU value change */ + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP); + + /* Test that the modification is visible in ptrace after execution */ + memset(&trace_pkey, 0, sizeof(trace_pkey)); + ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov); + pkey_assert(ret == 0); + pkey_assert(trace_pkey == new_pkey); + + ret = ptrace(PTRACE_CONT, child, 0, 0); + pkey_assert(ret == 0); + pkey_assert(child == waitpid(child, &status, 0)); + dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status); + pkey_assert(WIFEXITED(status)); + pkey_assert(WEXITSTATUS(status) == 0); +} +#endif + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1700,7 +1787,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_pkey_syscalls_bad_args, test_pkey_alloc_exhaust, test_pkey_alloc_free_attach_pkey0, -#if defined(__i386__) || defined(__x86_64__) +#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) test_ptrace_modifies_pkru, #endif }; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce..36045edb10de 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c index 7aa1366063e4..d9f8ba8d5050 100644 --- a/tools/testing/selftests/mm/seal_elf.c +++ b/tools/testing/selftests/mm/seal_elf.c @@ -30,17 +30,6 @@ static int sys_mseal(void *start, size_t len) return sret; } -static void *sys_mmap(void *addr, unsigned long len, unsigned long prot, - unsigned long flags, unsigned long fd, unsigned long offset) -{ - void *sret; - - errno = 0; - sret = (void *) syscall(__NR_mmap, addr, len, prot, - flags, fd, offset); - return sret; -} - static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot) { int sret; @@ -56,7 +45,7 @@ static bool seal_support(void) void *ptr; unsigned long page_size = getpagesize(); - ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + ptr = mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (ptr == (void *) -1) return false; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 666ab7d9390b..1c04c780db66 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -17,6 +17,7 @@ ipv6_flowlabel ipv6_flowlabel_mgr log.txt msg_zerocopy +ncdevmem nettest psock_fanout psock_snd @@ -34,6 +35,7 @@ scm_pidfd scm_rights sk_bind_sendto_listen sk_connect_zero_addr +sk_so_peek_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8eaffd7a641c..649f1fe0dc46 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -56,7 +56,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -80,12 +80,14 @@ TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr +TEST_GEN_PROGS += sk_so_peek_off TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard +TEST_GEN_PROGS += bind_wildcard +TEST_GEN_PROGS += bind_timewait TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh @@ -95,6 +97,11 @@ TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py +# YNL files, must be before "include ..lib.mk" +EXTRA_CLEAN += $(OUTPUT)/libynl.a +YNL_GEN_FILES := ncdevmem +TEST_GEN_FILES += $(YNL_GEN_FILES) + TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh @@ -104,6 +111,10 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +# YNL build +YNL_GENS := netdev +include ynl.mk + $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaeb..3ed3882a93b8 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, + const char *expected_buf, int expected_len, int buf_len, int flags) { int i, ret[2], recv_errno[2], expected_errno = 0; @@ -525,6 +525,29 @@ TEST_F(msg_oob, ex_oob_drop_2) } } +TEST_F(msg_oob, ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); +} + TEST_F(msg_oob, ex_oob_ahead_break) { sendpair("hello", 5, MSG_OOB); diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 386ebd829df5..899dbad0104b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -4304,14 +4304,7 @@ elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" fi -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" declare -i nfail=0 declare -i nsuccess=0 diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ac0b2c6a5761..77c83d9508d3 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -78,7 +78,12 @@ log_test() else ret=1 nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" + if [[ $rc -eq $ksft_skip ]]; then + printf "TEST: %-60s [SKIP]\n" "${msg}" + else + printf "TEST: %-60s [FAIL]\n" "${msg}" + fi + if [ "$VERBOSE" = "1" ]; then echo " rc=$rc, expected $expected" fi @@ -923,6 +928,29 @@ ipv6_grp_fcnal() ipv6_grp_refs log_test $? 0 "Nexthop group replace refcounts" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_res_grp_fcnal() @@ -987,6 +1015,31 @@ ipv6_res_grp_fcnal() check_nexthop_bucket "list id 102" \ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" log_test $? 0 "Nexthop buckets updated after replace - nECMP" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(: + )type resilient buckets 32 idle_timer 0 $(: + )unbalanced_timer 0" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP unbalanced_time 0" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_fcnal_runtime() diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 7c01f58a20de..1d58b3b87465 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -35,18 +35,13 @@ log_test() local expected=$2 local msg="$3" - $IP rule show | grep -q l3mdev - if [ $? -eq 0 ]; then - msg="$msg (VRF)" - fi - if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-60s [ OK ]\n" "${msg}" + printf " TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-60s [FAIL]\n" "${msg}" + printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -56,39 +51,6 @@ log_test() fi } -log_section() -{ - echo - echo "######################################################################" - echo "TEST SECTION: $*" - echo "######################################################################" -} - -check_nettest() -{ - if which nettest > /dev/null 2>&1; then - return 0 - fi - - # Add the selftest directory to PATH if not already done - if [ "${SELFTEST_PATH}" = "" ]; then - SELFTEST_PATH="$(dirname $0)" - PATH="${PATH}:${SELFTEST_PATH}" - - # Now retry with the new path - if which nettest > /dev/null 2>&1; then - return 0 - fi - - if [ "${ret}" -eq 0 ]; then - ret="${ksft_skip}" - fi - echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" - fi - - return 1 -} - setup() { set -e @@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule6 check: $description" + $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule6 check: $nomatch_description" + fib_rule6_del_by_pref "$match" log_test $? 0 "rule6 del by pref: $description" } @@ -213,18 +180,27 @@ fib_rule6_test_reject() fib_rule6_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv6 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink match="oif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" match="from $SRC_IP6 iif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -238,44 +214,89 @@ fib_rule6_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule6_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule6_test_match_n_redirect "$match" \ + "from $SRC_IP6 iif $DEV $getmatch" \ + "from $SRC_IP6 iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + getnomatch="ipproto udp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto ipv6-icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" + getnomatch="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto ipv6-icmp match" \ + "ipproto ipv6-tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP6 iif $DEV tos 0xfc" + getnomatch="from $SRC_IP6 iif $DEV tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" fi } fib_rule6_vrf_test() { setup_vrf - fib_rule6_test + fib_rule6_test "- with VRF" cleanup_vrf } @@ -285,10 +306,8 @@ fib_rule6_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv6 FIB rule connect tests" setup_peer $IP -6 rule add dsfield 0x04 table $RTABLE_PEER @@ -306,7 +325,45 @@ fib_rule6_connect_test() log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ + -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)" + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -6 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp udp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp tcp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp udp no connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp tcp no connect" + + $IP -6 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } @@ -326,12 +383,17 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule4 check: $description" + $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule4 check: $nomatch_description" + fib_rule4_del_by_pref "$match" log_test $? 0 "rule4 del by pref: $description" } @@ -352,23 +414,31 @@ fib_rule4_test_reject() fib_rule4_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv4 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink match="oif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" - # need enable forwarding and disable rp_filter temporarily as all the - # addresses are in the same subnet and egress device == ingress device. + # Enable forwarding and disable rp_filter as all the addresses are in + # the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -382,44 +452,90 @@ fib_rule4_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule4_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule4_test_match_n_redirect "$match" \ + "from $SRC_IP iif $DEV $getmatch" \ + "from $SRC_IP iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + getnomatch="ipproto udp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" \ + "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto icmp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + getnomatch="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto icmp match" \ + "ipproto tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP iif $DEV tos 0xfc" + getnomatch="from $SRC_IP iif $DEV tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" fi } fib_rule4_vrf_test() { setup_vrf - fib_rule4_test + fib_rule4_test "- with VRF" cleanup_vrf } @@ -429,10 +545,8 @@ fib_rule4_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv4 FIB rule connect tests" setup_peer $IP -4 rule add dsfield 0x04 table $RTABLE_PEER @@ -450,16 +564,46 @@ fib_rule4_connect_test() log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)" + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER - cleanup_peer -} -run_fibrule_tests() -{ - log_section "IPv4 fib rule" - fib_rule4_test - log_section "IPv6 fib rule" - fib_rule6_test + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -4 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp udp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp tcp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp udp no connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp tcp no connect" + + $IP -4 rule del dscp 0x3f table $RTABLE_PEER + + cleanup_peer } ################################################################################ # usage @@ -495,6 +639,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi +check_gen_prog "nettest" + # start clean cleanup &> /dev/null setup diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7fdb6a9ca543..a652429bfd53 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +(dev->netns_local) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..90f8a244ea90 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 1783c10215e5..7d531f7091e6 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -224,10 +224,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 9788bd0f6e8b..dda11a4a9450 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -319,10 +319,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index 2ab9eaaa5532..e28b4a079e52 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -321,10 +321,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff..c992e385159c 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -509,6 +514,13 @@ xfail_on_slow() fi } +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + xfail_on_veth() { local dev=$1; shift @@ -1113,6 +1125,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2229,3 +2274,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..c35548767756 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +131,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +145,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +168,78 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,61 +247,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -228,62 +364,217 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() { pre_cleanup + + ip link set $h2 down + ip link set $h1 down + vrf_cleanup } diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index af3b398d13f0..9e677aa64a06 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -233,6 +233,9 @@ cleanup() { pre_cleanup + ip link set dev $swp2 down + ip link set dev $swp1 down + h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 2ba44247c60a..a7d8399c8d4f 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test ping_ipv4_blackhole ping_ipv6_blackhole nh_stats_test_v4 @@ -226,9 +227,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -242,7 +245,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -275,7 +281,8 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -313,6 +320,23 @@ multipath_test() multipath6_test "Weighted MP 11:45" 11 45 } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + multipath4_test "65535:65535" 65535 65535 + multipath4_test "128:512" 128 512 + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + log_info "Running 16-bit IPv6 multipath tests" + multipath6_test "65535:65535" 65535 65535 + multipath6_test "128:512" 128 512 + omit_on_slow \ + multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 +} + ping_ipv4_blackhole() { RET=0 diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh index 2903294d8bca..507b2852dabe 100644 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -117,3 +117,16 @@ __nh_stats_test_v6() $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 sysctl_restore net.ipv6.fib_multipath_hash_policy } + +check_nhgw16() +{ + local nhid=$1; shift + + ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null + if (( $? )); then + log_test_skip "16-bit multipath tests" \ + "iproute2 or the kernel do not support 16-bit next hop weights" + return 1 + fi + ip nexthop del id 9999 ||: +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cd9e346436fc..88ddae05b39d 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test nh_stats_test_v4 nh_stats_test_v6 " @@ -228,9 +229,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -243,7 +246,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_l4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -273,7 +279,8 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -371,6 +378,41 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + + ip nexthop replace id 103 group 101,65535/102,65535 type resilient + multipath4_test "65535:65535" 65535 65535 + + ip nexthop replace id 103 group 101,128/102,512 type resilient + multipath4_test "128:512" 128 512 + + ip nexthop replace id 103 group 101,255/102,65535 type resilient + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running 16-bit IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + ip nexthop replace id 106 group 104,65535/105,65535 type resilient + multipath6_l4_test "65535:65535" 65535 65535 + + ip nexthop replace id 106 group 104,128/105,512 type resilient + multipath6_l4_test "128:512" 128 512 + + ip nexthop replace id 106 group 104,255/105,65535 type resilient + omit_on_slow \ + multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + nh_stats_test_v4() { __nh_stats_test_v4 resilient diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index e2be354167a1..46f365b557b7 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -180,6 +180,7 @@ multipath4_test() ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -217,6 +218,7 @@ multipath6_test() $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 589629636502..ea89e558672d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -4,7 +4,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ gact_trap_test mirred_egress_to_ingress_test \ - mirred_egress_to_ingress_tcp_test" + mirred_egress_to_ingress_tcp_test \ + ingress_2nd_vlan_push egress_2nd_vlan_push" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test() log_test "mirred_egress_to_ingress_tcp ($tcflags)" } +ingress_2nd_vlan_push() +{ + tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 1 \ + action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -Q 10 -q + + tc_check_packets "dev $swp1 ingress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower + tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower + + log_test "ingress_2nd_vlan_push ($tcflags)" +} + +egress_2nd_vlan_push() +{ + tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 0 \ + action vlan push id 10 protocol 0x8100 \ + pipe action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h1 egress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower + tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower + + log_test "egress_2nd_vlan_push ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f773..be8707bfb46e 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -125,6 +125,21 @@ slowwait_for_counter() slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +# Check for existence of tools which are built as part of selftests +# but may also already exist in $PATH +check_gen_prog() +{ + local prog_name=$1; shift + + if ! which $prog_name >/dev/null 2>/dev/null; then + PATH=$PWD:$PATH + if ! which $prog_name >/dev/null; then + echo "'$prog_name' command not found; skipping tests" + exit $ksft_skip + fi + fi +} + remove_ns_list() { local item=$1 @@ -146,6 +161,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index b9f3fc3c3426..e0a34e5e8dd5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len) { struct iphdr *iph = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*iph) || iph->protocol != proto) return -1; + ip_len = ntohs(iph->tot_len); + if (ip_len > len || ip_len < sizeof(*iph)) + return -1; + + len = ip_len; iph_addr_p = &iph->saddr; if (proto == IPPROTO_TCP) return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); @@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; + ip_len = ntohs(ip6h->payload_len); + if (ip_len > len - sizeof(*ip6h)) + return -1; + + len = ip_len; iph_addr_p = &ip6h->saddr; if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_tcp(ip6h + 1, len); else - return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_udp(ip6h + 1, len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f26c20df9db4..477ae76de93d 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 import builtins +import functools import inspect import sys import time @@ -10,6 +11,7 @@ from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True +KSFT_DISRUPTIVE = True class KsftFailEx(Exception): @@ -32,8 +34,18 @@ def _fail(*args): global KSFT_RESULT KSFT_RESULT = False - frame = inspect.stack()[2] - ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":") + stack = inspect.stack() + started = False + for frame in reversed(stack[2:]): + # Start printing from the test case function + if not started: + if frame.function == 'ksft_run': + started = True + continue + + ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) + + ", in " + frame.function + ":") + ksft_pr("Check| " + frame.code_context[0].strip()) ksft_pr(*args) @@ -43,6 +55,12 @@ def ksft_eq(a, b, comment=""): _fail("Check failed", a, "!=", b, comment) +def ksft_ne(a, b, comment=""): + global KSFT_RESULT + if a == b: + _fail("Check failed", a, "==", b, comment) + + def ksft_true(a, comment=""): if not a: _fail("Check failed", a, "does not eval to True", comment) @@ -127,6 +145,44 @@ def ksft_flush_defer(): KSFT_RESULT = False +def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive (e.g. the test + that can down the interface). Disruptive tests can be skipped + by passing DISRUPTIVE=False environment variable. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not KSFT_DISRUPTIVE: + raise KsftSkipEx(f"marked as disruptive") + return func(*args, **kwargs) + return wrapper + + +def ksft_setup(env): + """ + Setup test framework global state from the environment. + """ + + def get_bool(env, name): + value = env.get(name, "").lower() + if value in ["yes", "true"]: + return True + if value in ["no", "false"]: + return False + try: + return bool(int(value)) + except: + raise Exception(f"failed to parse {name}") + + if "DISRUPTIVE" in env: + global KSFT_DISRUPTIVE + KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE") + + return env + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 7b936a926859..5d796622e730 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := mptcp_lib.sh settings +TEST_INCLUDES := ../lib.sh ../net_helper.sh + EXTRA_CLEAN := *.pcap include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 776d43a6922d..2bd0c1eb70c5 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -284,7 +284,7 @@ echo "b" | \ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 -chk_msk_nr 2 "after MPC handshake " +chk_msk_nr 2 "after MPC handshake" chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d..4209b9569039 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1115,11 +1115,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b77fb7065bfb..57325d57e4c6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -345,9 +345,11 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - local result_msg - result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - mptcp_lib_print_title "${result_msg}" + local pretty_title + pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${pretty_title}" + + local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}" if $capture; then local capuser @@ -431,7 +433,6 @@ do_transfer() local duration duration=$((stop-start)) - result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" @@ -444,7 +445,7 @@ do_transfer() echo cat "$capout" - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" return 1 fi @@ -544,12 +545,12 @@ do_transfer() if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then mptcp_lib_pr_ok "${extra:1}" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}" else if [ -n "${extra}" ]; then mptcp_lib_print_warn "${extra:1}" fi - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" fi cat "$capout" @@ -848,6 +849,8 @@ stop_if_error() make_file "$cin" "client" make_file "$sin" "server" +mptcp_lib_subtests_last_ts_reset + check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef1..e8d0a01b4144 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -61,6 +61,16 @@ unset sflags unset fastclose unset fullmesh unset speed +unset join_csum_ns1 +unset join_csum_ns2 +unset join_fail_nr +unset join_rst_nr +unset join_infi_nr +unset join_corrupted_pkts +unset join_syn_tx +unset join_create_err +unset join_bind_err +unset join_connect_err # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -196,6 +206,22 @@ print_skip() mptcp_lib_pr_skip "${@}" } +# $1: check name; $2: rc +print_results() +{ + local check="${1}" + local rc=${2} + + print_check "${check}" + if [ ${rc} = ${KSFT_PASS} ]; then + print_ok + elif [ ${rc} = ${KSFT_SKIP} ]; then + print_skip + else + fail_test "see above" + fi +} + # [ $1: fail msg ] mark_as_skipped() { @@ -337,7 +363,7 @@ reset_with_checksum() local ns1_enable=$1 local ns2_enable=$2 - reset "checksum test ${1} ${2}" || return 1 + reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1 ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable @@ -420,12 +446,17 @@ reset_with_fail() fi } +start_events() +{ + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + reset_with_events() { reset "${1}" || return 1 - mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid - mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid + start_events } reset_with_tcp_filter() @@ -436,9 +467,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -661,7 +693,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -833,7 +865,7 @@ chk_cestab_nr() local cestab=$2 local count - print_check "cestab $cestab" + print_check "currently established: $cestab" count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") if [ -z "$count" ]; then print_skip @@ -1109,28 +1141,29 @@ chk_csum_nr() csum_ns2=${csum_ns2:1} fi - print_check "sum" + print_check "checksum server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns1" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || - { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns1" else print_ok fi - print_check "csum" + + print_check "checksum client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns2" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || - { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns2" else print_ok @@ -1147,6 +1180,8 @@ chk_fail_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 + local tx="server" + local rx="client" local extra_msg="" local allow_tx_lost=0 local allow_rx_lost=0 @@ -1154,7 +1189,8 @@ chk_fail_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi if [[ "${fail_tx}" = "-"* ]]; then @@ -1166,29 +1202,29 @@ chk_fail_nr() fail_rx=${fail_rx:1} fi - print_check "ftx" + print_check "fail tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") - if [ "$count" != "$fail_tx" ]; then - extra_msg+=",tx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then + extra_msg+=" tx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else print_ok fi - print_check "failrx" + print_check "fail rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") - if [ "$count" != "$fail_rx" ]; then - extra_msg+=",rx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then + extra_msg+=" rx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else print_ok @@ -1205,37 +1241,35 @@ chk_fclose_nr() local count local ns_tx=$ns2 local ns_rx=$ns1 - local extra_msg="" + local tx="client" + local rx="server" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns1 ns_rx=$ns2 - extra_msg="invert" + tx="server" + rx="client" fi - print_check "ctx" + print_check "fast close tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok fi - print_check "fclzrx" + print_check "fast close rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok fi - - print_info "$extra_msg" } chk_rst_nr() @@ -1246,15 +1280,17 @@ chk_rst_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 - local extra_msg="" + local tx="server" + local rx="client" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi - print_check "rtx" + print_check "reset tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip @@ -1266,7 +1302,7 @@ chk_rst_nr() print_ok fi - print_check "rstrx" + print_check "reset rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip @@ -1277,8 +1313,6 @@ chk_rst_nr() else print_ok fi - - print_info "$extra_msg" } chk_infi_nr() @@ -1287,7 +1321,7 @@ chk_infi_nr() local infi_rx=$2 local count - print_check "itx" + print_check "infi tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip @@ -1297,7 +1331,7 @@ chk_infi_nr() print_ok fi - print_check "infirx" + print_check "infi rx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip @@ -1308,17 +1342,66 @@ chk_infi_nr() fi } +chk_join_tx_nr() +{ + local syn_tx=${join_syn_tx:-0} + local create=${join_create_err:-0} + local bind=${join_bind_err:-0} + local connect=${join_connect_err:-0} + local rc=${KSFT_PASS} + local count + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_tx" ]; then + rc=${KSFT_FAIL} + print_check "syn tx" + fail_test "got $count JOIN[s] syn tx expected $syn_tx" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$create" ]; then + rc=${KSFT_FAIL} + print_check "syn tx create socket error" + fail_test "got $count JOIN[s] syn tx create socket error expected $create" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$bind" ]; then + rc=${KSFT_FAIL} + print_check "syn tx bind error" + fail_test "got $count JOIN[s] syn tx bind error expected $bind" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$connect" ]; then + rc=${KSFT_FAIL} + print_check "syn tx connect error" + fail_test "got $count JOIN[s] syn tx connect error expected $connect" + fi + + print_results "join Tx" ${rc} +} + chk_join_nr() { local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 - local csum_ns1=${4:-0} - local csum_ns2=${5:-0} - local fail_nr=${6:-0} - local rst_nr=${7:-0} - local infi_nr=${8:-0} - local corrupted_pkts=${9:-0} + local csum_ns1=${join_csum_ns1:-0} + local csum_ns2=${join_csum_ns2:-0} + local fail_nr=${join_fail_nr:-0} + local rst_nr=${join_rst_nr:-0} + local infi_nr=${join_infi_nr:-0} + local corrupted_pkts=${join_corrupted_pkts:-0} + local rc=${KSFT_PASS} local count local with_cookie @@ -1326,43 +1409,44 @@ chk_join_nr() print_info "${corrupted_pkts} corrupted pkts" fi - print_check "syn" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_nr" ]; then - fail_test "got $count JOIN[s] syn expected $syn_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "syn rx" + fail_test "got $count JOIN[s] syn rx expected $syn_nr" fi - print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation - if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then - print_ok - else - fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then + rc=${KSFT_FAIL} + print_check "synack rx" + fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr" fi - else - print_ok fi - print_check "ack" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$ack_nr" ]; then - fail_test "got $count JOIN[s] ack expected $ack_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "ack rx" + fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + + print_results "join Rx" ${rc} + + join_syn_tx="${join_syn_tx:-${syn_nr}}" \ + chk_join_tx_nr + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1415,18 +1499,30 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local tx="" + local rx="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + tx=" client" + rx=" server" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) - print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + print_check "add addr rx${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1437,8 +1533,8 @@ chk_add_nr() print_ok fi - print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + print_check "add addr echo rx${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1448,8 +1544,8 @@ chk_add_nr() fi if [ $port_nr -gt 0 ]; then - print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + print_check "add addr rx with port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1458,8 +1554,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + print_check "syn rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1469,8 +1565,8 @@ chk_add_nr() print_ok fi - print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + print_check "synack rx port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1480,8 +1576,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + print_check "ack rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1491,8 +1587,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + print_check "syn rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1502,8 +1598,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + print_check "ack rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1524,7 +1620,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - print_check "add TX" + print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip @@ -1536,7 +1632,7 @@ chk_add_tx_nr() print_ok fi - print_check "echo TX" + print_check "add addr echo tx" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip @@ -1556,6 +1652,8 @@ chk_rm_nr() local count local addr_ns=$ns1 local subflow_ns=$ns2 + local addr="server" + local subflow="client" local extra_msg="" shift 2 @@ -1565,16 +1663,14 @@ chk_rm_nr() shift done - if [ -z $invert ]; then - addr_ns=$ns1 - subflow_ns=$ns2 - elif [ $invert = "true" ]; then + if [ "$invert" = "true" ]; then addr_ns=$ns2 subflow_ns=$ns1 - extra_msg="invert" + addr="client" + subflow="server" fi - print_check "rm" + print_check "rm addr rx ${addr}" count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip @@ -1584,7 +1680,7 @@ chk_rm_nr() print_ok fi - print_check "rmsf" + print_check "rm subflow ${subflow}" count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip @@ -1598,7 +1694,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg+=" simult" + extra_msg="simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -1619,7 +1715,7 @@ chk_rm_tx_nr() { local rm_addr_tx_nr=$1 - print_check "rm TX" + print_check "rm addr tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip @@ -1634,9 +1730,11 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count - print_check "ptx" + print_check "mp_prio tx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip @@ -1646,7 +1744,7 @@ chk_prio_nr() print_ok fi - print_check "prx" + print_check "mp_prio rx client" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip @@ -1655,6 +1753,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1869,9 +1987,11 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + join_bind_err=1 \ + chk_join_nr 0 0 0 fi # multiple subflows, with subflow creation error @@ -1883,7 +2003,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ @@ -1895,7 +2016,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, check that the endpoint corresponding to @@ -1916,7 +2038,8 @@ subflows_error_tests() # additional subflow could be created only if the PM select # the later endpoint, skipping the already used one - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi } @@ -1955,6 +2078,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -1987,7 +2125,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 fi @@ -2155,7 +2294,8 @@ add_addr_timeout_tests() pm_nl_set_limits $ns2 2 2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 chk_add_nr 8 0 fi } @@ -2255,7 +2395,8 @@ remove_tests() pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 join_connect_err=1 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2320,7 +2461,8 @@ remove_tests() pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2612,33 +2754,46 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +2802,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +2830,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +2840,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -2868,37 +3032,16 @@ syncookies_tests() checksum_tests() { - # checksum test 0 0 - if reset_with_checksum 0 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 1 - if reset_with_checksum 1 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 0 1 - if reset_with_checksum 0 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 0 - if reset_with_checksum 1 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi + local checksum_enable + for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do + # checksum test 0 0, 1 1, 0 1, 1 0 + if reset_with_checksum ${checksum_enable}; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + done } deny_join_id0_tests() @@ -2987,6 +3130,9 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + fi fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3053,7 +3199,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3212,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3086,7 +3232,8 @@ fastclose_tests() MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 0 0 0 1 + join_rst_nr=1 \ + chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3105,7 +3252,10 @@ fail_tests() MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=128 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + join_csum_ns1=+1 join_csum_ns2=+0 \ + join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3118,7 +3268,10 @@ fail_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow test_linkfail=1024 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + join_csum_ns1=1 join_csum_ns2=0 \ + join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 1 1 1 fi } @@ -3260,6 +3413,36 @@ userspace_pm_chk_get_addr() fi } +# $1: ns ; $2: event type ; $3: count +chk_evt_nr() +{ + local ns=${1} + local evt_name="${2}" + local exp="${3}" + + local evts="${evts_ns1}" + local evt="${!evt_name}" + local count + + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + print_check "event ${ns} ${evt_name} (${exp})" + + if [[ "${evt_name}" = "LISTENER_"* ]] && + ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + count=$(grep -cw "type:${evt}" "${evts}") + if [ "${count}" != "${exp}" ]; then + fail_test "got ${count} events, expected ${exp}" + else + print_ok + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3318,7 +3501,7 @@ userspace_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 - chk_prio_nr 0 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3340,8 +3523,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3356,14 +3539,12 @@ userspace_tests() "signal" userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" - userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ - "id 20 flags signal 10.0.3.1" "after rm_addr 10" + "id 20 flags signal 10.0.3.1" "after rm_sf 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" - chk_rm_nr 2 2 invert + chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3375,8 +3556,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3387,12 +3568,11 @@ userspace_tests() "id 20 flags subflow 10.0.3.2" \ "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" - userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ - "after rm_addr 20" - chk_rm_nr 1 1 + "after rm_sf 20" + chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids @@ -3404,8 +3584,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 chk_mptcp_info subflows 0 subflows 0 @@ -3425,8 +3605,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3449,8 +3629,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3480,8 +3660,8 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 & + { speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3500,31 +3680,185 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + start_events + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 & + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 - chk_subflow_nr "before delete" 2 + chk_subflow_nr "before delete id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "after delete" 1 + chk_subflow_nr "after delete id 2" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 + chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + + local i + for i in $(seq 3); do + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0 ($i)" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0 ($i)" 3 + chk_mptcp_info subflows 3 subflows 3 + done + + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab + + join_syn_tx=7 \ + chk_join_nr 6 6 6 + chk_rm_nr 4 4 + fi + + # remove and re-add + if reset_with_events "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal + { test_linkfail=4 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_del_endpoint $ns1 42 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + + pm_nl_del_endpoint $ns1 99 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after re-delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + mptcp_lib_kill_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + join_connect_err=1 \ + chk_join_nr 5 5 5 + chk_add_nr 6 6 + chk_rm_nr 4 3 invert + fi + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + { test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 mptcp_lib_kill_wait $tests_pid + + join_syn_tx=3 join_connect_err=1 \ + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert fi } @@ -3627,9 +3961,11 @@ if [ ${#tests[@]} -eq 0 ]; then tests=("${all_tests_names[@]}") fi +mptcp_lib_subtests_last_ts_reset for subtests in "${tests[@]}"; do "${subtests}" done +append_prev_results if [ ${ret} -ne 0 ]; then echo @@ -3640,7 +3976,6 @@ if [ ${ret} -ne 0 ]; then echo fi -append_prev_results mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 438280e68434..975d4d4c862a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -12,10 +12,14 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" # These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED +declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED @@ -25,6 +29,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_SUBTESTS_LAST_TS_MS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -201,6 +206,11 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +mptcp_lib_subtests_last_ts_reset() { + MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" +} +mptcp_lib_subtests_last_ts_reset + __mptcp_lib_result_check_duplicated() { local subtest @@ -215,13 +225,22 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" + local time="time=" + local ts_prev_ms shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) __mptcp_lib_result_check_duplicated "${*}" - MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + + ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + mptcp_lib_subtests_last_ts_reset + time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } # $1: test name diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 68899a303a1a..5e8d5b83e2d0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -349,6 +349,7 @@ init make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT +mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2757378b1b13..2e6648a2b2c0 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -137,6 +137,8 @@ check() fi } +mptcp_lib_subtests_last_ts_reset + check "show_endpoints" "" "defaults addr list" default_limits="$(get_limits)" diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7ad5a59adff2..994a556f46c1 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -19,12 +19,6 @@ #include "linux/mptcp.h" -#ifndef MPTCP_PM_NAME -#define MPTCP_PM_NAME "mptcp_pm" -#endif -#ifndef MPTCP_PM_EVENTS -#define MPTCP_PM_EVENTS "mptcp_pm_events" -#endif #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 #endif @@ -116,7 +110,7 @@ static int capture_events(int fd, int event_group) if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group, sizeof(event_group)) < 0) - error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { FD_ZERO(&rfds); @@ -288,7 +282,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) *events_mcast_grp = *(__u32 *)RTA_DATA(grp); else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && - !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME)) got_events_grp = 1; grp = RTA_NEXT(grp, grp_len); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f74e1c3c126d..8fa77c8e9b65 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -286,6 +286,7 @@ while getopts "bcdhi" option;do done setup +mptcp_lib_subtests_last_ts_reset run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 25 "balanced bwidth with unbalanced delay" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9cb05978269d..3651f73451cf 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -150,6 +150,7 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid server_evts=$(mktemp) mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 +mptcp_lib_subtests_last_ts_reset print_title "Init" print_test "Created network namespaces ns1, ns2" diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c new file mode 100644 index 000000000000..64d6805381c5 --- /dev/null +++ b/tools/testing/selftests/net/ncdevmem.c @@ -0,0 +1,570 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/udmabuf.h> +#include <libmnl/libmnl.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * tr \\n \\0 | \ + * head -c 5G | \ + * nc <server IP> 5201 -p 5201 + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + */ + +static char *server_ip = "192.168.1.4"; +static char *client_ip = "192.168.1.2"; +static char *port = "5201"; +static size_t do_validation; +static int start_queue = 8; +static int num_queues = 8; +static char *ifname = "eth1"; +static unsigned int ifindex; +static unsigned int dmabuf_id; + +void print_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + int i; + + for (i = 0; i < size; i++) + printf("%02hhX ", p[i]); + printf("\n"); +} + +void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); + printf("\n"); +} + +void validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + int errors = 0; + size_t i; + + for (i = 0; i < size; i++) { + if (ptr[i] != seed) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + seed, ptr[i], i); + errors++; + if (errors > 20) + error(1, 0, "validation failed."); + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); +} + +#define run_command(cmd, ...) \ + ({ \ + char command[256]; \ + memset(command, 0, sizeof(command)); \ + snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ + printf("Running: %s\n", command); \ + system(command); \ + }) + +static int reset_flow_steering(void) +{ + int ret = 0; + + ret = run_command("sudo ethtool -K %s ntuple off", ifname); + if (ret) + return ret; + + return run_command("sudo ethtool -K %s ntuple on", ifname); +} + +static int configure_headersplit(bool on) +{ + return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, + on ? "on" : "off"); +} + +static int configure_rss(void) +{ + return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); +} + +static int configure_channels(unsigned int rx, unsigned int tx) +{ + return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); +} + +static int configure_flow_steering(void) +{ + return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", + ifname, client_ip, server_ip, port, port, start_queue); +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + printf("got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) +{ + struct udmabuf_create create; + int ret; + + *devfd = open("/dev/udmabuf", O_RDWR); + if (*devfd < 0) { + error(70, 0, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + } + + *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (*memfd < 0) + error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + /* Required for udmabuf */ + ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(*memfd, dmabuf_size); + if (ret == -1) + error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = *memfd; + create.offset = 0; + create.size = dmabuf_size; + *buf = ioctl(*devfd, UDMABUF_CREATE, &create); + if (*buf < 0) + error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); +} + +int do_server(void) +{ + char ctrl_data[sizeof(int) * 20000]; + struct netdev_queue_id *queues; + size_t non_page_aligned_frags = 0; + struct sockaddr_in client_addr; + struct sockaddr_in server_sin; + size_t page_aligned_frags = 0; + int devfd, memfd, buf, ret; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *buf_mem = NULL; + struct ynl_sock *ys; + size_t dmabuf_size; + char iobuf[819200]; + char buffer[256]; + int socket_fd; + int client_fd; + size_t i = 0; + int opt = 1; + + dmabuf_size = getpagesize() * NUM_PAGES; + + create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + + if (reset_flow_steering()) + error(1, 0, "Failed to reset flow steering\n"); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "Failed to configure rss\n"); + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering()) + error(1, 0, "Failed to configure flow steering\n"); + + sleep(1); + + queues = malloc(sizeof(*queues) * num_queues); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, + buf, 0); + if (buf_mem == MAP_FAILED) + error(1, 0, "mmap()"); + + server_sin.sin_family = AF_INET; + server_sin.sin_port = htons(atoi(port)); + + ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); + if (socket < 0) + error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); + if (socket < 0) + error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt)); + if (ret) + error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + + ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, + sizeof(opt)); + if (ret) + error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); + + printf("binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + + ret = listen(socket_fd, 1); + if (ret) + error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + + client_addr_len = sizeof(client_addr); + + inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, + sizeof(buffer)); + printf("Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + + inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, + sizeof(buffer)); + printf("Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct dma_buf_sync sync = { 0 }; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + printf("\n\n"); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + printf("recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + continue; + } + if (ret == 0) { + printf("client exited\n"); + goto cleanup; + } + + i++; + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stdout, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stdout, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, + dmabuf_cmsg->frag_token, total_received, + dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) + error(1, 0, + "received on wrong dmabuf_id: flow steering error\n"); + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; + ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + + if (do_validation) + validate_buffer( + ((unsigned char *)buf_mem) + + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + else + print_nonzero_bytes( + ((unsigned char *)buf_mem) + + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; + ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) + error(1, 0, + "SO_DEVMEM_DONTNEED not enough tokens"); + } + if (!is_devmem) + error(1, 0, "flow steering error\n"); + + printf("total_received=%lu\n", total_received); + } + + fprintf(stdout, "%s: ok\n", TEST_PREFIX); + + fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + + fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + + munmap(buf_mem, dmabuf_size); + close(client_fd); + close(socket_fd); + close(buf); + close(memfd); + close(devfd); + ynl_sock_destroy(ys); + + return 0; +} + +void run_devmem_tests(void) +{ + struct netdev_queue_id *queues; + int devfd, memfd, buf; + struct ynl_sock *ys; + size_t dmabuf_size; + size_t i = 0; + + dmabuf_size = getpagesize() * NUM_PAGES; + + create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "rss error\n"); + + queues = calloc(num_queues, sizeof(*queues)); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Binding empty queues array should have failed\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (configure_headersplit(0)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Configure dmabuf with header split off should have failed\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + /* Deactivating a bound queue should not be legal */ + if (!configure_channels(num_queues, num_queues - 1)) + error(1, 0, "Deactivating a bound queue should be illegal.\n"); + + /* Closing the netlink socket does an implicit unbind */ + ynl_sock_destroy(ys); +} + +int main(int argc, char *argv[]) +{ + int is_server = 0, opt; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case '?': + printf("unknown option: %c\n", optopt); + break; + } + } + + ifindex = if_nametoindex(ifname); + + for (; optind < argc; optind++) + printf("extra arguments: %s\n", argv[optind]); + + run_devmem_tests(); + + if (is_server) + return do_server(); + + return 0; +} diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index e3afcb424710..438f7b2acc5f 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -67,8 +67,12 @@ kci_net_setup() return $ksft_skip fi - # TODO what ipaddr to set ? DHCP ? - echo "SKIP: $netdev: set IP address" + if [ "$veth_created" ]; then + echo "XFAIL: $netdev: set IP address unsupported for veth*" + else + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + fi return $ksft_skip } @@ -86,7 +90,7 @@ kci_netdev_ethtool_test() ret=$? if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then - echo "SKIP: $netdev: ethtool $2 not supported" + echo "XFAIL: $netdev: ethtool $2 not supported" return $ksft_skip else echo "FAIL: $netdev: ethtool $2" @@ -124,11 +128,45 @@ kci_netdev_ethtool() return 1 fi echo "PASS: $netdev: ethtool list features" - #TODO for each non fixed features, try to turn them on/off + + while read -r FEATURE VALUE FIXED; do + [ "$FEATURE" != "Features" ] || continue # Skip "Features" + [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features + feature="${FEATURE%:*}" + + ethtool --offload "$netdev" "$feature" off + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned off feature: $feature" + else + echo "FAIL: $netdev: Failed to turn off feature:" \ + "$feature" + fi + + ethtool --offload "$netdev" "$feature" on + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned on feature: $feature" + else + echo "FAIL: $netdev: Failed to turn on feature:" \ + "$feature" + fi + + #restore the feature to its initial state + ethtool --offload "$netdev" "$feature" "$VALUE" + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Restore feature $feature" \ + "to initial state $VALUE" + else + echo "FAIL: $netdev: Failed to restore feature" \ + "$feature to initial state $VALUE" + fi + + done < "$TMP_ETHTOOL_FEATURES" + rm "$TMP_ETHTOOL_FEATURES" kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 } @@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then fi ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" + +if [ ! -s "$TMP_LIST_NETDEV" ]; then + echo "No valid network device found, creating veth pair" + ip link add veth0 type veth peer name veth1 + echo "veth0" > "$TMP_LIST_NETDEV" + veth_created=1 +fi + while read netdev do kci_test_netdev "$netdev" done < "$TMP_LIST_NETDEV" +#clean up veth interface pair if it was created +if [ "$veth_created" ]; then + ip link delete veth0 + echo "Removed veth pair" +fi + rm "$TMP_LIST_NETDEV" exit 0 diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f92..d13fb5ea3e89 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 000000000000..6a764d70ab06 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin <<EOF +table ip filter { + chain forward { + type filter hook forward priority 0; policy accept; + ct state new counter + ip protocol icmp counter queue num 0 bypass + } +} +EOF +./nf_queue -t 5 > /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 63ef80ef47a4..b2dd4db45215 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -87,3 +87,5 @@ CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m +CONFIG_INET_DIAG=m +CONFIG_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index c61d23a8c88d..d66e3c4dfec6 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -8,7 +8,7 @@ source lib.sh ret=0 -timeout=2 +timeout=5 cleanup() { @@ -25,6 +25,9 @@ cleanup() } checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +modprobe -q sctp trap cleanup EXIT @@ -36,7 +39,9 @@ TMPFILE2=$(mktemp) TMPFILE3=$(mktemp) TMPINPUT=$(mktemp) -dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" +COUNT=200 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25 +dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: No virtual ethernet pair device support in kernel" @@ -250,45 +255,49 @@ listener_ready() test_tcp_forward() { - ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 2 & local nfqpid=$! timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" + kill "$nfqpid" } test_tcp_localhost() { - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null wait "$rpid" && echo "PASS: tcp via loopback" - wait 2>/dev/null + kill "$nfqpid" } test_tcp_localhost_connectclose() { - ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" & - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & + local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3 + + kill "$nfqpid" wait && echo "PASS: tcp via loopback with connect/close" - wait 2>/dev/null } test_tcp_localhost_requeue() @@ -353,7 +362,7 @@ table inet filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 1 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 @@ -363,6 +372,7 @@ EOF for n in output post; do for d in tvrf eth0; do if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + kill "$nfqpid" echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 ip netns exec "$ns1" nft list ruleset ret=1 @@ -371,8 +381,96 @@ EOF done done - wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf" - wait 2>/dev/null + kill "$nfqpid" + echo "PASS: icmp+nfqueue via vrf" +} + +sctp_listener_ready() +{ + ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345 +} + +check_output_files() +{ + local f1="$1" + local f2="$2" + local err="$3" + + if ! cmp "$f1" "$f2" ; then + echo "FAIL: $err: input and output file differ" 1>&2 + echo -n " Input file" 1>&2 + ls -l "$f1" 1>&2 + echo -n "Output file" 1>&2 + ls -l "$f2" 1>&2 + ret=1 + fi +} + +test_sctp_forward() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet sctpq { + chain forward { + type filter hook forward priority 0; policy accept; + sctp dport 12345 queue num 10 + } +} +EOF + timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$nsrouter" ./nf_queue -q 10 -G & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" +} + +test_sctp_output() +{ + ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet sctpq { + chain output { + type filter hook output priority 0; policy accept; + sctp dport 12345 queue num 11 + } +} +EOF + # reduce test file size, software segmentation causes sk wmem increase. + dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" + + timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$ns1" ./nf_queue -q 11 & + local nfqpid=$! + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$ns1" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + # must wait before checking completeness of output file. + wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" } test_queue_removal() @@ -388,7 +486,7 @@ table ip filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 @@ -443,11 +541,16 @@ test_queue 10 # same. We queue to a second program as well. load_ruleset "filter2" 20 test_queue 20 +ip netns exec "$ns1" nft flush ruleset test_tcp_forward test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue +test_sctp_forward +test_sctp_output + +# should be last, adds vrf device in ns1 and changes routes test_icmp_vrf test_queue_removal diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..31cfb666ba8b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := ksft_runner.sh \ + defaults.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..0237ed98f3c0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,11 @@ +CONFIG_IPV6=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_NET_NS=y +CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_FQ=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYN_COOKIES=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..1095a7b22f44 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..7478c0c0c9aa --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +readonly ipv4_args=('--ip_version=ipv4 ' + '--local_ip=192.168.0.1 ' + '--gateway_ip=192.168.0.1 ' + '--netmask_ip=255.255.0.0 ' + '--remote_ip=192.0.2.1 ' + '-D CMSG_LEVEL_IP=SOL_IP ' + '-D CMSG_TYPE_RECVERR=IP_RECVERR ') + +readonly ipv6_args=('--ip_version=ipv6 ' + '--mtu=1520 ' + '--local_ip=fd3d:0a0b:17d6::1 ' + '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' + '--remote_ip=fd3d:fa7b:d17d::1 ' + '-D CMSG_LEVEL_IP=SOL_IPV6 ' + '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$1" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +ktap_print_header +ktap_set_plan 2 + +unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" +unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \ + && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py new file mode 100755 index 000000000000..5ddf456ae973 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Sets sysctl values and writes a file that restores them. + +The arguments are of the form "<proc-file>=<val>" separated by spaces. +The program first reads the current value of the proc-file and creates +a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which +restores the values when executed. It then sets the new values. + +PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt +file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh` +at the end. +""" + +import os +import subprocess +import sys + +filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID'] + +# Open file for restoring sysctl values +restore_file = open(filename, 'w') +print('#!/bin/bash', file=restore_file) + +for a in sys.argv[1:]: + sysctl = a.split('=') + # sysctl[0] contains the proc-file name, sysctl[1] the new value + + # read current value and add restore command to file + cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True) + print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file) + + # set new value + cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0]) + os.system(cmd) + +os.system('chmod u+x %s' % filename) diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..df49c67645ac --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..04a5e2590c62 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt new file mode 100644 index 000000000000..795c476d222d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every packet. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 1001 win 257 + +0 > P. 10001:12001(2000) ack 1 + + +0 < . 1:1(0) ack 2001 win 257 + +0 > P. 12001:14001(2000) ack 1 + ++.005 < . 1:1(0) ack 3001 win 257 + +0 > P. 14001:16001(2000) ack 1 + + +0 < . 1:1(0) ack 4001 win 257 + +0 > P. 16001:18001(2000) ack 1 + ++.005 < . 1:1(0) ack 5001 win 257 + +0 > P. 18001:20001(2000) ack 1 + + +0 < . 1:1(0) ack 6001 win 257 + +0 > P. 20001:22001(2000) ack 1 + ++.005 < . 1:1(0) ack 7001 win 257 + +0 > P. 22001:24001(2000) ack 1 + + +0 < . 1:1(0) ack 8001 win 257 + +0 > P. 24001:26001(2000) ack 1 + ++.005 < . 1:1(0) ack 9001 win 257 + +0 > P. 26001:28001(2000) ack 1 + + +0 < . 1:1(0) ack 10001 win 257 + +0 > P. 28001:30001(2000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt new file mode 100644 index 000000000000..9212ae1fd0f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, and not big enough to bump up cwnd. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 5 packets. + +0 write(4, ..., 5000) = 5000 + +0 > P. 1:5001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 5001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt new file mode 100644 index 000000000000..416c901ddf51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, but still big enough that in slow +// start we want to increase our cwnd a little. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 6 packets. + +0 write(4, ..., 6000) = 6000 + +0 > P. 1:6001(6000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt new file mode 100644 index 000000000000..a894b7d4559c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 > P. 10001:14001(4000) ack 1 + ++.005 < . 1:1(0) ack 4001 win 257 + +0 > P. 14001:18001(4000) ack 1 + ++.005 < . 1:1(0) ack 6001 win 257 + +0 > P. 18001:22001(4000) ack 1 + ++.005 < . 1:1(0) ack 8001 win 257 + +0 > P. 22001:26001(4000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt new file mode 100644 index 000000000000..065fae9e9abd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver sends one ACK per 4 packets. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.11 < . 1:1(0) ack 4001 win 257 + +0 > P. 10001:18001(8000) ack 1 + + +.01 < . 1:1(0) ack 8001 win 257 + +0 > P. 18001:26001(8000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt new file mode 100644 index 000000000000..11b213be1138 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after idle +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 511 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +2 write(4, ..., 20000) = 20000 +// If slow start after idle works properly, we should send 5 MSS here (cwnd/2) + +0 > P. 26001:31001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt new file mode 100644 index 000000000000..577ed8c8852c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after window update +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 0 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +0 write(4, ..., 20000) = 20000 +// 1st win0 probe ++.3~+.310 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 2nd win0 probe ++.6~+.620 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 3rd win0 probe ++1.2~+1.240 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +.9 < . 1:1(0) ack 26001 win 511 + +0 > P. 26001:31001(5000) ack 1 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt new file mode 100644 index 000000000000..869f32c35a2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we don't fully use our cwnd but instead +// send just 9 packets, then cwnd should grow to twice that +// value, or 18 packets. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 9000) = 9000 + +0 > P. 1:9001(9000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 9001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt new file mode 100644 index 000000000000..0f77b7955db6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we send exactly 10 packets then cwnd should grow to 20. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 10001 win 257 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt new file mode 100644 index 000000000000..7e9c83d617c2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow, even if TSQ triggers. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Note we use FQ/pacing to check if TCP Small Queues is not hurting + +`./defaults.sh +tc qdisc replace dev tun0 root fq +sysctl -q net/ipv4/tcp_pacing_ss_ratio=200 +sysctl -e -q net.ipv4.tcp_min_tso_segs=2` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 500 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 40000) = 40000 +// This might change if we cook the initial packet with 10 MSS. + +0 > P. 1:2921(2920) ack 1 + +0 > P. 2921:5841(2920) ack 1 + +0 > P. 5841:8761(2920) ack 1 + +0 > P. 8761:11681(2920) ack 1 + +0 > P. 11681:14601(2920) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2921 win 500 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + +// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts" +// FQ notices that this packet missed the 'time to send next packet' computed +// when prior packet (11681:14601(2920)) was sent. +// So FQ will allow following packet to be sent a bit earlier (quantum/2) +// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit) + +0 > P. 14601:17521(2920) ack 1 + ++.003 < . 1:1(0) ack 5841 win 500 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.001 > P. 17521:20441(2920) ack 1 + ++.001 < . 1:1(0) ack 8761 win 500 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + +// remaining packets are delivered at a constant rate. ++.007 > P. 20441:23361(2920) ack 1 + ++.002 < . 1:1(0) ack 11681 win 500 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% ++.001 < . 1:1(0) ack 14601 win 500 + ++.004 > P. 23361:26281(2920) ack 1 + ++.007 > P. 26281:29201(2920) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt new file mode 100644 index 000000000000..a82c8899d36b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +// basic zerocopy test: +// +// send a packet with MSG_ZEROCOPY and receive the notification ID +// repeat and verify IDs are consecutive + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt new file mode 100644 index 000000000000..c01915e7f4a1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// batch zerocopy test: +// +// send multiple packets, then read one range of all notifications. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt new file mode 100644 index 000000000000..6509882932e9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Minimal client-side zerocopy test + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt new file mode 100644 index 000000000000..2cd78755cb2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// send with MSG_ZEROCOPY on a non-established socket +// +// verify that a send in state TCP_CLOSE correctly aborts the zerocopy +// operation, specifically it does not increment the zerocopy counter. +// +// First send on a closed socket and wait for (absent) notification. +// Then connect and send and verify that notification nr. is zero. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe) + + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt new file mode 100644 index 000000000000..7671c20e01cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt new file mode 100644 index 000000000000..fadc480fdb7f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. this tests verify that the same behavior is +// maintained when we have EPOLLEXCLUSIVE. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt new file mode 100644 index 000000000000..5bfa0d1d2f4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// This is a test to confirm that EPOLLERR is only fired once for an FD when +// EPOLLONESHOT is set. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLONESHOT is set. send another packet +// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and +// confirm that EPOLLERR is correctly set. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive no EPOLLERR for the third send above. + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + +// rearm the FD and verify the EPOLLERR is fired again. + +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt new file mode 100644 index 000000000000..4a73bbf46961 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen client zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. +// +// Fastopen requires a stored cookie. Create two sockets. The first +// one will have no data in the initial send. On return 0 the +// zerocopy notification counter is not incremented. Verify this too. + +`./defaults.sh` + +// Send a FastOpen request, no cookie yet so no data in SYN + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop> + +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000> + +// Read from error queue: no zerocopy notification + +1 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +.01 close(3) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000> + +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001> + +// Send another Fastopen request, now SYN will have data + +.07 `sysctl -q net.ipv4.tcp_timestamps=0` + +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 + +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500 + +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop> + +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6> + +0 > . 501:501(0) ack 1 + +// Read from error queue: now has first zerocopy notification + +0.5 recvmsg(5, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt new file mode 100644 index 000000000000..36086c5877ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen server zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` + +// Set up a TFO server listening socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +// Client sends a SYN with data. + +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +// Server accepts and replies with data. ++.005 accept(3, ..., ...) = 4 + +0 read(4, ..., 1024) = 1000 + +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000 + +0 > P. 1:1001(1000) ack 1001 + +.05 < . 1001:1001(0) ack 1001 win 32792 + +// Read from error queue: now has first zerocopy notification + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt new file mode 100644 index 000000000000..672f817faca0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +// tcp_MAX_SKB_FRAGS test +// +// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will +// 1) fit in a single packet without zerocopy +// 2) spill over into a second packet with zerocopy, +// because each iovec element becomes a frag +// 3) the PSH bit is set on an skb when it runs out of fragments + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + // Each pinned zerocopy page is fully accounted to skb->truesize. + // This test generates a worst case packet with each frag storing + // one byte, but increasing truesize with a page (64KB on PPC). + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send an iov of 18 elements: just becomes a linear skb + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, 0) = 18 + + +0 > P. 1:19(18) ack 1 + +0 < . 1:1(0) ack 19 win 257 + + // send a zerocopy iov of 18 elements: + +1 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 18 + + // verify that it is split in one skb of 17 frags + 1 of 1 frag + // verify that both have the PSH bit set + +0 > P. 19:36(17) ack 1 + +0 < . 1:1(0) ack 36 win 257 + + +0 > P. 36:37(1) ack 1 + +0 < . 1:1(0) ack 37 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send a zerocopy iov of 64 elements: + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 64 + + // verify that it is split in skbs with 17 frags + +0 > P. 37:54(17) ack 1 + +0 < . 1:1(0) ack 54 win 257 + + +0 > P. 54:71(17) ack 1 + +0 < . 1:1(0) ack 71 win 257 + + +0 > P. 71:88(17) ack 1 + +0 < . 1:1(0) ack 88 win 257 + + +0 > P. 88:101(13) ack 1 + +0 < . 1:1(0) ack 101 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt new file mode 100644 index 000000000000..a9a1ac0aea4f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// small packet zerocopy test: +// +// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy +// packets of all sizes, including the smallest payload, 1B. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send 1B + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 1:2(1) ack 1 + +0 < . 1:1(0) ack 2 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send 1B again + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 2:3(1) ack 1 + +0 < . 1:1(0) ack 3 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 5175c0c83a23..569bce8b6383 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -681,13 +681,7 @@ setup_xfrm() { } setup_nettest_xfrm() { - if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - return 1 - fi - fi + check_gen_prog "nettest" [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} @@ -1447,7 +1441,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1 done rm -f "$tmpoutfile" diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 1a736f700be4..4f31e92ebd96 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -165,9 +165,9 @@ static void sock_fanout_set_ebpf(int fd) attr.insns = (unsigned long) prog; attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; - attr.log_buf = (unsigned long) log_buf, - attr.log_size = sizeof(log_buf), - attr.log_level = 1, + attr.log_buf = (unsigned long) log_buf; + attr.log_size = sizeof(log_buf); + attr.log_level = 1; pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); if (pfd < 0) { diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile new file mode 100644 index 000000000000..da9714bc7aad --- /dev/null +++ b/tools/testing/selftests/net/rds/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +all: + @echo mk_build_dir="$(shell pwd)" > include.sh + +TEST_PROGS := run.sh \ + include.sh \ + test.py + +EXTRA_CLEAN := /tmp/rds_logs + +include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt new file mode 100644 index 000000000000..cbde2951ab13 --- /dev/null +++ b/tools/testing/selftests/net/rds/README.txt @@ -0,0 +1,41 @@ +RDS self-tests +============== + +These scripts provide a coverage test for RDS-TCP by creating two +network namespaces and running rds packets between them. A loopback +network is provisioned with optional probability of packet loss or +corruption. A workload of 50000 hashes, each 64 characters in size, +are passed over an RDS socket on this test network. A passing test means +the RDS-TCP stack was able to recover properly. The provided config.sh +can be used to compile the kernel with the necessary gcov options. The +kernel may optionally be configured to omit the coverage report as well. + +USAGE: + run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] + [-u packet_duplcate] + +OPTIONS: + -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + + -l Simulates a percentage of packet loss + + -c Simulates a percentage of packet corruption + + -u Simulates a percentage of packet duplication. + +EXAMPLE: + + # Create a suitable gcov enabled .config + tools/testing/selftests/net/rds/config.sh -g + + # Alternatly create a gcov disabled .config + tools/testing/selftests/net/rds/config.sh + + # build the kernel + vng --build --config tools/testing/selftests/net/config + + # launch the tests in a VM + vng -v --rwdir ./ --run . --user root --cpus 4 -- \ + "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + +An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh new file mode 100755 index 000000000000..791c8dbe1095 --- /dev/null +++ b/tools/testing/selftests/net/rds/config.sh @@ -0,0 +1,53 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -x + +unset KBUILD_OUTPUT + +GENERATE_GCOV_REPORT=0 +while getopts "g" opt; do + case ${opt} in + g) + GENERATE_GCOV_REPORT=1 + ;; + :) + echo "USAGE: config.sh [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + +CONF_FILE="tools/testing/selftests/net/config" + +# no modules +scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES + +# enable RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + # instrument RDS and only RDS + scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS +else + scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS +fi + +# need network namespaces to run tests with veth network interfaces +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS +scripts/config --file "$CONF_FILE" --enable CONFIG_VETH + +# simulate packet loss +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM + diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh new file mode 100755 index 000000000000..8aee244f582a --- /dev/null +++ b/tools/testing/selftests/net/rds/run.sh @@ -0,0 +1,224 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +unset KBUILD_OUTPUT + +current_dir="$(realpath "$(dirname "$0")")" +build_dir="$current_dir" + +build_include="$current_dir/include.sh" +if test -f "$build_include"; then + # this include will define "$mk_build_dir" as the location the test was + # built. We will need this if the tests are installed in a location + # other than the kernel source + + source "$build_include" + build_dir="$mk_build_dir" +fi + +# This test requires kernel source and the *.gcda data therein +# Locate the top level of the kernel source, and the net/rds +# subfolder with the appropriate *.gcno object files +ksrc_dir="$(realpath "$build_dir"/../../../../../)" +kconfig="$ksrc_dir/.config" +obj_dir="$ksrc_dir/net/rds" + +GCOV_CMD=gcov + +#check to see if the host has the required packages to generate a gcov report +check_gcov_env() +{ + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find gcov. " + GENERATE_GCOV_REPORT=0 + return + fi + + # the gcov version must match the gcc version + GCC_VER=$(gcc -dumpfullversion) + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + #attempt to find a matching gcov version + GCOV_CMD=gcov-$(gcc -dumpversion) + + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + return + fi + + #recheck version number of found gcov executable + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ + awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + else + echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + fi + fi +} + +# Check to see if the kconfig has the required configs to generate a coverage report +check_gcov_conf() +{ + if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + + if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then + echo "To enable gcov reports, please run "\ + "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel" + else + # if we have the required kernel configs, proceed to check the environment to + # ensure we have the required gcov packages + check_gcov_env + fi +} + +# Kselftest framework requirement - SKIP code is 4. +check_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 enabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf_disabled() { + if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 disabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf() { + check_conf_enabled CONFIG_NET_SCH_NETEM + check_conf_enabled CONFIG_VETH + check_conf_enabled CONFIG_NET_NS + check_conf_enabled CONFIG_RDS_TCP + check_conf_enabled CONFIG_RDS + check_conf_disabled CONFIG_MODULES +} + +check_env() +{ + if ! test -d "$obj_dir"; then + echo "selftests: [SKIP] This test requires a kernel source tree" + exit 4 + fi + if ! test -e "$kconfig"; then + echo "selftests: [SKIP] This test requires a configured kernel source tree" + exit 4 + fi + if ! which strace > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without strace" + exit 4 + fi + if ! which tcpdump > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi + + if ! which python3 > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without python3" + exit 4 + fi + + python_major=$(python3 -c "import sys; print(sys.version_info[0])") + python_minor=$(python3 -c "import sys; print(sys.version_info[1])") + if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then + echo "selftests: [SKIP] Could not run test without at least python3.9" + python3 -V + exit 4 + fi +} + +LOG_DIR="$current_dir"/rds_logs +PLOSS=0 +PCORRUPT=0 +PDUP=0 +GENERATE_GCOV_REPORT=1 +while getopts "d:l:c:u:" opt; do + case ${opt} in + d) + LOG_DIR=${OPTARG} + ;; + l) + PLOSS=${OPTARG} + ;; + c) + PCORRUPT=${OPTARG} + ;; + u) + PDUP=${OPTARG} + ;; + :) + echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ + "[-u packet_duplcate] [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + + +check_env +check_conf +check_gcov_conf + + +rm -fr "$LOG_DIR" +TRACE_FILE="${LOG_DIR}/rds-strace.txt" +COVR_DIR="${LOG_DIR}/coverage/" +mkdir -p "$LOG_DIR" +mkdir -p "$COVR_DIR" + +set +e +echo running RDS tests... +echo Traces will be logged to "$TRACE_FILE" +rm -f "$TRACE_FILE" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ + -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" + +test_rc=$? +dmesg > "${LOG_DIR}/dmesg.out" + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo saving coverage data... + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ + while read -r f + do + cat < "/sys/kernel/debug/gcov/$f" > "/$f" + done) + + echo running gcovr... + gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ + -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" +else + echo "Coverage report will be skipped" +fi + +if [ "$test_rc" -eq 0 ]; then + echo "PASS: Test completed successfully" +else + echo "FAIL: Test failed" +fi + +exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py new file mode 100644 index 000000000000..e6bb109bcead --- /dev/null +++ b/tools/testing/selftests/net/rds/test.py @@ -0,0 +1,262 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import ctypes +import errno +import hashlib +import os +import select +import signal +import socket +import subprocess +import sys +import atexit +from pwd import getpwuid +from os import stat +from lib.py import ip + + +libc = ctypes.cdll.LoadLibrary('libc.so.6') +setns = libc.setns + +net0 = 'net0' +net1 = 'net1' + +veth0 = 'veth0' +veth1 = 'veth1' + +# Helper function for creating a socket inside a network namespace. +# We need this because otherwise RDS will detect that the two TCP +# sockets are on the same interface and use the loop transport instead +# of the TCP transport. +def netns_socket(netns, *args): + u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + child = os.fork() + if child == 0: + # change network namespace + with open(f'/var/run/netns/{netns}') as f: + try: + ret = setns(f.fileno(), 0) + except IOError as e: + print(e.errno) + print(e) + + # create socket in target namespace + s = socket.socket(*args) + + # send resulting socket to parent + socket.send_fds(u0, [], [s.fileno()]) + + sys.exit(0) + + # receive socket from child + _, s, _, _ = socket.recv_fds(u1, 0, 1) + os.waitpid(child, 0) + u0.close() + u1.close() + return socket.fromfd(s[0], *args) + +def signal_handler(sig, frame): + print('Test timed out') + sys.exit(1) + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default="/tmp") +parser.add_argument('--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +packet_loss=str(args.loss)+'%' +packet_corruption=str(args.corruption)+'%' +packet_duplicate=str(args.duplicate)+'%' + +ip(f"netns add {net0}") +ip(f"netns add {net1}") +ip(f"link add type veth") + +addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# move interfaces to separate namespaces so they can no longer be +# bound directly; this prevents rds from switching over from the tcp +# transport to the loop transport. +ip(f"link set {veth0} netns {net0} up") +ip(f"link set {veth1} netns {net1} up") + + + +# add addresses +ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") +ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") + +# add routes +ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") +ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") + +# sanity check that our two interfaces/addresses are correctly set up +# and communicating by doing a single ping +ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") + +# Start a packet capture on each network +for net in [net0, net1]: + tcpdump_pid = os.fork() + if tcpdump_pid == 0: + pcap = logdir+'/'+net+'.pcap' + subprocess.check_call(['touch', pcap]) + user = getpwuid(stat(pcap).st_uid).pw_name + ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") + sys.exit(0) + +# simulate packet loss, duplication and corruption +for net, iface in [(net0, veth0), (net1, veth1)]: + ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {packet_corruption} loss {packet_loss} duplicate \ + {packet_duplicate}") + +# add a timeout +if args.timeout > 0: + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) + +sockets = [ + netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), +] + +for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + +fileno_to_socket = { + s.fileno(): s for s in sockets +} + +addr_to_socket = { + addr: s for addr, s in zip(addrs, sockets) +} + +socket_to_addr = { + s: addr for addr, s in zip(addrs, sockets) +} + +send_hashes = {} +recv_hashes = {} + +ep = select.epoll() + +for s in sockets: + ep.register(s, select.EPOLLRDNORM) + +n = 50000 +nr_send = 0 +nr_recv = 0 + +while nr_send < n: + # Send as much as we can without blocking + print("sending...", nr_send, nr_recv) + while nr_send < n: + send_data = hashlib.sha256( + f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + + # pseudo-random send/receive pattern + sender = sockets[nr_send % 2] + receiver = sockets[1 - (nr_send % 3) % 2] + + try: + sender.sendto(send_data, socket_to_addr[receiver]) + send_hashes.setdefault((sender.fileno(), receiver.fileno()), + hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) + nr_send = nr_send + 1 + except BlockingIOError as e: + break + except OSError as e: + if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: + break + raise + + # Receive as much as we can without blocking + print("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + for fileno, eventmask in ep.poll(): + receiver = fileno_to_socket[fileno] + + if eventmask & select.EPOLLRDNORM: + while True: + try: + recv_data, address = receiver.recvfrom(1024) + sender = addr_to_socket[address] + recv_hashes.setdefault((sender.fileno(), + receiver.fileno()), hashlib.sha256()).update( + f'<{recv_data}>'.encode('utf-8')) + nr_recv = nr_recv + 1 + except BlockingIOError as e: + break + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + for net in [net0, net1]: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + +print("done", nr_send, nr_recv) + +# the Python socket module doesn't know these +RDS_INFO_FIRST = 10000 +RDS_INFO_LAST = 10017 + +nr_success = 0 +nr_error = 0 + +for s in sockets: + for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + s.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + +print(f"getsockopt(): {nr_success}/{nr_error}") + +print("Stopping network packet captures") +subprocess.check_call(['killall', '-q', 'tcpdump']) + +# We're done sending and receiving stuff, now let's check if what +# we received is what we sent. +for (sender, receiver), send_hash in send_hashes.items(): + recv_hash = recv_hashes.get((sender, receiver)) + + if recv_hash is None: + print("FAIL: No data received") + sys.exit(1) + + if send_hash.hexdigest() != recv_hash.hexdigest(): + print("FAIL: Send/recv mismatch") + print("hash expected:", send_hash.hexdigest()) + print("hash received:", recv_hash.hexdigest()) + sys.exit(1) + + print(f"{sender}/{receiver}: ok") + +print("Success") +sys.exit(0) diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 9eb42570294d..16ac4df55fdb 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = { SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), + SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER), + SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE), }; static struct socket_type socket_types[] = { @@ -98,6 +100,22 @@ static struct test_case test_cases[] = { {} }, { + { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + { .swtstamp = true } + }, + { { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, { .swtstamp = true } diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c new file mode 100644 index 000000000000..d87dd8d8d491 --- /dev/null +++ b/tools/testing/selftests/net/sk_so_peek_off.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include "../kselftest.h" + +static char *afstr(int af, int proto) +{ + if (proto == IPPROTO_TCP) + return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6"; + else + return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6"; +} + +int sk_peek_offset_probe(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + int optv = 0; + int ret = 0; + int s; + + s = socket(af, type, proto); + if (s < 0) { + ksft_perror("Temporary TCP socket creation failed"); + } else { + if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int))) + ret = 1; + else + printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto)); + close(s); + } + return ret; +} + +static void sk_peek_offset_set(int s, int offset) +{ + if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset))) + ksft_perror("Failed to set SO_PEEK_OFF value\n"); +} + +static int sk_peek_offset_get(int s) +{ + int offset; + socklen_t len = sizeof(offset); + + if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len)) + ksft_perror("Failed to get SO_PEEK_OFF value\n"); + return offset; +} + +static int sk_peek_offset_test(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + union { + struct sockaddr sa; + struct sockaddr_in a4; + struct sockaddr_in6 a6; + } a; + int res = 0; + int s[2] = {0, 0}; + int recv_sock = 0; + int offset = 0; + ssize_t len; + char buf[2]; + + memset(&a, 0, sizeof(a)); + a.sa.sa_family = af; + + s[0] = recv_sock = socket(af, type, proto); + s[1] = socket(af, type, proto); + + if (s[0] < 0 || s[1] < 0) { + ksft_perror("Temporary socket creation failed\n"); + goto out; + } + if (bind(s[0], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket bind() failed\n"); + goto out; + } + if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) { + ksft_perror("Temporary socket getsockname() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) { + ksft_perror("Temporary socket listen() failed\n"); + goto out; + } + if (connect(s[1], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket connect() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP) { + recv_sock = accept(s[0], NULL, NULL); + if (recv_sock <= 0) { + ksft_perror("Temporary socket accept() failed\n"); + goto out; + } + } + + /* Some basic tests of getting/setting offset */ + offset = sk_peek_offset_get(recv_sock); + if (offset != -1) { + ksft_perror("Initial value of socket offset not -1\n"); + goto out; + } + sk_peek_offset_set(recv_sock, 0); + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Failed to set socket offset to 0\n"); + goto out; + } + + /* Transfer a message */ + if (send(s[1], (char *)("ab"), 2, 0) != 2) { + ksft_perror("Temporary probe socket send() failed\n"); + goto out; + } + /* Read first byte */ + len = recv(recv_sock, buf, 1, MSG_PEEK); + if (len != 1 || buf[0] != 'a') { + ksft_perror("Failed to read first byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 1) { + ksft_perror("Offset not forwarded correctly at first byte\n"); + goto out; + } + /* Try to read beyond last byte */ + len = recv(recv_sock, buf, 2, MSG_PEEK); + if (len != 1 || buf[0] != 'b') { + ksft_perror("Failed to read last byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 2) { + ksft_perror("Offset not forwarded correctly at last byte\n"); + goto out; + } + /* Flush message */ + len = recv(recv_sock, buf, 2, MSG_TRUNC); + if (len != 2) { + ksft_perror("Failed to flush message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Offset not reverted correctly after flush\n"); + goto out; + } + + printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto)); + res = 1; +out: + if (proto == IPPROTO_TCP && recv_sock >= 0) + close(recv_sock); + if (s[1] >= 0) + close(s[1]); + if (s[0] >= 0) + close(s[0]); + return res; +} + +static int do_test(int proto) +{ + int res4, res6; + + res4 = sk_peek_offset_probe(AF_INET, proto); + res6 = sk_peek_offset_probe(AF_INET6, proto); + + if (!res4 && !res6) + return KSFT_SKIP; + + if (res4) + res4 = sk_peek_offset_test(AF_INET, proto); + + if (res6) + res6 = sk_peek_offset_test(AF_INET6, proto); + + if (!res4 || !res6) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(void) +{ + int restcp, resudp; + + restcp = do_test(IPPROTO_TCP); + resudp = do_test(IPPROTO_UDP); + if (restcp == KSFT_FAIL || resudp == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index bd88b90b902b..5b0205c70c39 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -31,7 +31,8 @@ CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ # Library -LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c +LIBSRC += proc.c repair.c setup.c sock.c utils.c LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) EXTRA_CLEAN += $(LIBOBJ) $(LIB) diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c index a1e6e007c291..6736484996a3 100644 --- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -355,6 +355,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(30, server_fn, client_fn); + test_init(31, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index d3277a9de987..3605e38711cb 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -7,4 +7,5 @@ CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y CONFIG_TCP_MD5SIG=y +CONFIG_TRACEPOINTS=y CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index 185a2f6e5ff3..d418162d335f 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -71,10 +71,12 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } } + synchronize_threads(); /* before counter checks */ if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) test_error("test_get_tcp_ao_counters()"); close(lsk); + if (pwd) test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); @@ -84,10 +86,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -180,6 +182,7 @@ static void try_connect(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* before counter checks */ if (ret < 0) { if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); @@ -212,30 +215,44 @@ out: static void *client_fn(void *arg) { - union tcp_addr wrong_addr, network_addr; + union tcp_addr wrong_addr, network_addr, addr_any = {}; unsigned int port = test_server_port; if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) test_error("Can't convert ip address %s", TEST_WRONG_IP); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server + Non-AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, + port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); @@ -259,6 +276,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(21, server_fn, client_fn); + test_init(22, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index 81653b47f303..f1d8d29e393f 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -67,14 +67,14 @@ static void *client_fn(void *arg) netstat_free(ns_after); if (nr_packets > (after_aogood - before_aogood)) { - test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")", nr_packets, after_aogood, before_aogood); return NULL; } if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; - test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", before_aogood, ao1.ao_info_pkt_good, ao1.key_cnts[0].pkt_good, after_aogood, ao2.ao_info_pkt_good, @@ -85,6 +85,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(1, server_fn, client_fn); + test_init(2, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index d69bcba3c929..a1614f0d8c44 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -444,6 +444,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(3, server_fn, client_fn); + test_init(4, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index 24e62120b792..d4385b52c10b 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, synchronize_threads(); /* 5: counters */ } -static void try_unmatched_keys(int sk, int *rnext_index) +static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) { struct test_key *key; unsigned int i = 0; @@ -1013,6 +1013,9 @@ static void try_unmatched_keys(int sk, int *rnext_index) test_error("all keys on server match the client"); if (test_set_key(sk, -1, key->server_keyid)) test_error("Can't change the current key"); + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + -1, key->server_keyid, -1); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); *rnext_index = i; @@ -1054,6 +1057,10 @@ static void check_current_back(const char *tst_name, unsigned int port, return; if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) test_error("Can't change the current key"); + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, + collection.keys[rotate_to_index].client_keyid, + collection.keys[current_index].client_keyid, -1); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("verify failed"); /* There is a race here: between setting the current_key with @@ -1085,6 +1092,11 @@ static void roll_over_keys(const char *tst_name, unsigned int port, for (i = rnext_index + 1; rotations > 0; i++, rotations--) { if (i >= collection.nr_keys) i = 0; + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, + this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + i == 0 ? -1 : collection.keys[i - 1].server_keyid, + collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { @@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port, rnext_index, msg_len, nr_packets); if (sk < 0) return; - try_unmatched_keys(sk, &rnext_index); + try_unmatched_keys(sk, &rnext_index, port); end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); } @@ -1181,6 +1193,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, server_fn, client_fn); + test_init(121, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index fbc7f6111815..db44e77428dd 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -37,17 +37,58 @@ extern void __test_xfail(const char *buf); extern void __test_error(const char *buf); extern void __test_skip(const char *buf); -__attribute__((__format__(__printf__, 2, 3))) -static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +static inline char *test_snprintf(const char *fmt, va_list vargs) { -#define TEST_MSG_BUFFER_SIZE 4096 - char buf[TEST_MSG_BUFFER_SIZE]; - va_list arg; - - va_start(arg, fmt); - vsnprintf(buf, sizeof(buf), fmt, arg); - va_end(arg); - fn(buf); + char *ret = NULL; + size_t size = 0; + va_list tmp; + int n = 0; + + va_copy(tmp, vargs); + n = vsnprintf(ret, size, fmt, tmp); + if (n < 0) + return NULL; + + size = n + 1; + ret = malloc(size); + if (!ret) + return NULL; + + n = vsnprintf(ret, size, fmt, vargs); + if (n < 0 || n > size - 1) { + free(ret); + return NULL; + } + return ret; +} + +static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...) +{ + va_list vargs; + char *ret; + + va_start(vargs, fmt); + ret = test_snprintf(fmt, vargs); + va_end(vargs); + + return ret; +} + +static __printf(2, 3) inline void __test_print(void (*fn)(const char *), + const char *fmt, ...) +{ + va_list vargs; + char *msg; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + + if (!msg) + return; + + fn(msg); + free(msg); } #define test_print(fmt, ...) \ @@ -103,6 +144,7 @@ enum test_needs_kconfig { KCONFIG_TCP_AO, /* required */ KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + KCONFIG_FTRACE, /* optional, for tracepoints checks */ __KCONFIG_LAST__ }; extern bool kernel_config_has(enum test_needs_kconfig k); @@ -142,6 +184,8 @@ static inline void test_init2(unsigned int ntests, __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); } extern void test_add_destructor(void (*d)(void)); +extern void test_init_ftrace(int nsfd1, int nsfd2); +extern int test_setup_tracing(void); /* To adjust optmem socket limit, approximately estimate a number, * that is bigger than sizeof(struct tcp_ao_key). @@ -216,12 +260,17 @@ static inline void test_init(unsigned int ntests, } extern void synchronize_threads(void); extern void switch_ns(int fd); +extern int switch_save_ns(int fd); +extern void switch_close_ns(int fd); extern __thread union tcp_addr this_ip_addr; extern __thread union tcp_addr this_ip_dest; extern int test_family; extern void randomize_buffer(void *buf, size_t buflen); +extern __printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...); + extern int open_netns(void); extern int unshare_open_netns(void); extern const char veth_name[]; @@ -602,4 +651,115 @@ static inline int test_add_repaired_key(int sk, return test_verify_socket_key(sk, &tmp); } +#define DEFAULT_FTRACE_BUFFER_KB 10000 +#define DEFAULT_TRACER_LINES_ARR 200 +struct test_ftracer; +extern uint64_t ns_cookie1, ns_cookie2; + +enum ftracer_op { + FTRACER_LINE_DISCARD = 0, + FTRACER_LINE_PRESERVE, + FTRACER_EXIT, +}; + +extern struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb); +extern int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter); +extern void destroy_ftracer(struct test_ftracer *tracer); +extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer); +extern const char **tracer_get_savedlines(struct test_ftracer *tracer); + +enum trace_events { + /* TCP_HASH_EVENT */ + TCP_HASH_BAD_HEADER = 0, + TCP_HASH_MD5_REQUIRED, + TCP_HASH_MD5_UNEXPECTED, + TCP_HASH_MD5_MISMATCH, + TCP_HASH_AO_REQUIRED, + /* TCP_AO_EVENT */ + TCP_AO_HANDSHAKE_FAILURE, + TCP_AO_WRONG_MACLEN, + TCP_AO_MISMATCH, + TCP_AO_KEY_NOT_FOUND, + TCP_AO_RNEXT_REQUEST, + /* TCP_AO_EVENT_SK */ + TCP_AO_SYNACK_NO_KEY, + /* TCP_AO_EVENT_SNE */ + TCP_AO_SND_SNE_UPDATE, + TCP_AO_RCV_SNE_UPDATE, + __MAX_TRACE_EVENTS +}; + +extern int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne); + +static inline void trace_hash_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + -1, -1, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + keyid, rnext, maclen, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sk_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, + int keyid, int rnext) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + keyid, rnext, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sne_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int sne) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + -1, -1, -1, sne); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +extern int setup_aolib_ftracer(void); + #endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c new file mode 100644 index 000000000000..24380c68fec6 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -0,0 +1,559 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include "aolib.h" + +static const char *trace_event_names[__MAX_TRACE_EVENTS] = { + /* TCP_HASH_EVENT */ + "tcp_hash_bad_header", + "tcp_hash_md5_required", + "tcp_hash_md5_unexpected", + "tcp_hash_md5_mismatch", + "tcp_hash_ao_required", + /* TCP_AO_EVENT */ + "tcp_ao_handshake_failure", + "tcp_ao_wrong_maclen", + "tcp_ao_mismatch", + "tcp_ao_key_not_found", + "tcp_ao_rnext_request", + /* TCP_AO_EVENT_SK */ + "tcp_ao_synack_no_key", + /* TCP_AO_EVENT_SNE */ + "tcp_ao_snd_sne_update", + "tcp_ao_rcv_sne_update" +}; + +struct expected_trace_point { + /* required */ + enum trace_events type; + int family; + union tcp_addr src; + union tcp_addr dst; + + /* optional */ + int src_port; + int dst_port; + int L3index; + + int fin; + int syn; + int rst; + int psh; + int ack; + + int keyid; + int rnext; + int maclen; + int sne; + + size_t matched; +}; + +static struct expected_trace_point *exp_tps; +static size_t exp_tps_nr; +static size_t exp_tps_size; +static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER; + +int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne) +{ + struct expected_trace_point new_tp = { + .type = type, + .family = family, + .src = src, + .dst = dst, + .src_port = src_port, + .dst_port = dst_port, + .L3index = L3index, + .fin = fin, + .syn = syn, + .rst = rst, + .psh = psh, + .ack = ack, + .keyid = keyid, + .rnext = rnext, + .maclen = maclen, + .sne = sne, + .matched = 0, + }; + int ret = 0; + + if (!kernel_config_has(KCONFIG_FTRACE)) + return 0; + + pthread_mutex_lock(&exp_tps_mutex); + if (exp_tps_nr == exp_tps_size) { + struct expected_trace_point *tmp; + + if (exp_tps_size == 0) + exp_tps_size = 10; + else + exp_tps_size = exp_tps_size * 1.6; + + tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0])); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + exp_tps = tmp; + } + exp_tps[exp_tps_nr] = new_tp; + exp_tps_nr++; +out: + pthread_mutex_unlock(&exp_tps_mutex); + return ret; +} + +static void free_expected_events(void) +{ + /* We're from the process destructor - not taking the mutex */ + exp_tps_size = 0; + exp_tps = NULL; + free(exp_tps); +} + +struct trace_point { + int family; + union tcp_addr src; + union tcp_addr dst; + unsigned int src_port; + unsigned int dst_port; + int L3index; + unsigned int fin:1, + syn:1, + rst:1, + psh:1, + ack:1; + + unsigned int keyid; + unsigned int rnext; + unsigned int maclen; + + unsigned int sne; +}; + +static bool lookup_expected_event(int event_type, struct trace_point *e) +{ + size_t i; + + pthread_mutex_lock(&exp_tps_mutex); + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *p = &exp_tps[i]; + size_t sk_size; + + if (p->type != event_type) + continue; + if (p->family != e->family) + continue; + if (p->family == AF_INET) + sk_size = sizeof(p->src.a4); + else + sk_size = sizeof(p->src.a6); + if (memcmp(&p->src, &e->src, sk_size)) + continue; + if (memcmp(&p->dst, &e->dst, sk_size)) + continue; + if (p->src_port >= 0 && p->src_port != e->src_port) + continue; + if (p->dst_port >= 0 && p->dst_port != e->dst_port) + continue; + if (p->L3index >= 0 && p->L3index != e->L3index) + continue; + + if (p->fin >= 0 && p->fin != e->fin) + continue; + if (p->syn >= 0 && p->syn != e->syn) + continue; + if (p->rst >= 0 && p->rst != e->rst) + continue; + if (p->psh >= 0 && p->psh != e->psh) + continue; + if (p->ack >= 0 && p->ack != e->ack) + continue; + + if (p->keyid >= 0 && p->keyid != e->keyid) + continue; + if (p->rnext >= 0 && p->rnext != e->rnext) + continue; + if (p->maclen >= 0 && p->maclen != e->maclen) + continue; + if (p->sne >= 0 && p->sne != e->sne) + continue; + p->matched++; + pthread_mutex_unlock(&exp_tps_mutex); + return true; + } + pthread_mutex_unlock(&exp_tps_mutex); + return false; +} + +static int check_event_type(const char *line) +{ + size_t i; + + /* + * This should have been a set or hashmap, but it's a selftest, + * so... KISS. + */ + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i]))) + return i; + } + return -1; +} + +static bool event_has_flags(enum trace_events event) +{ + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: + return true; + default: + return false; + } +} + +static int tracer_ip_split(int family, char *src, char **addr, char **port) +{ + char *p; + + if (family == AF_INET) { + /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */ + *addr = src; + p = strchr(src, ':'); + if (!p) { + test_print("Couldn't parse trace event addr:port %s", src); + return -EINVAL; + } + *p++ = '\0'; + *port = p; + return 0; + } + if (family != AF_INET6) + return -EAFNOSUPPORT; + + /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */ + *addr = strchr(src, '['); + p = strchr(src, ']'); + + if (!p || !*addr) { + test_print("Couldn't parse trace event [addr]:port %s", src); + return -EINVAL; + } + + *addr = *addr + 1; /* '[' */ + *p++ = '\0'; /* ']' */ + if (*p != ':') { + test_print("Couldn't parse trace event :port %s", p); + return -EINVAL; + } + *p++ = '\0'; /* ':' */ + *port = p; + return 0; +} + +static int tracer_scan_address(int family, char *src, + union tcp_addr *dst, unsigned int *port) +{ + char *addr, *port_str; + int ret; + + ret = tracer_ip_split(family, src, &addr, &port_str); + if (ret) + return ret; + + if (inet_pton(family, addr, dst) != 1) { + test_print("Couldn't parse trace event addr %s", addr); + return -EINVAL; + } + errno = 0; + *port = (unsigned int)strtoul(port_str, NULL, 10); + if (errno != 0) { + test_print("Couldn't parse trace event port %s", port_str); + return -errno; + } + return 0; +} + +static int tracer_scan_event(const char *line, enum trace_events event, + struct trace_point *out) +{ + char *src = NULL, *dst = NULL, *family = NULL; + char fin, syn, rst, psh, ack; + int nr_matched, ret = 0; + uint64_t netns_cookie; + + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack); + if (nr_matched != 10) + test_print("Couldn't parse trace event, matched = %d/10", + nr_matched); + break; + } + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack, + &out->keyid, &out->rnext, &out->maclen); + if (nr_matched != 13) + test_print("Couldn't parse trace event, matched = %d/13", + nr_matched); + break; + } + case TCP_AO_SYNACK_NO_KEY: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u", + &netns_cookie, &family, + &src, &dst, &out->keyid, &out->rnext); + if (nr_matched != 6) + test_print("Couldn't parse trace event, matched = %d/6", + nr_matched); + break; + } + case TCP_AO_SND_SNE_UPDATE: + case TCP_AO_RCV_SNE_UPDATE: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u", + &netns_cookie, &family, + &src, &dst, &out->sne); + if (nr_matched != 5) + test_print("Couldn't parse trace event, matched = %d/5", + nr_matched); + break; + } + default: + return -1; + } + + if (family) { + if (!strcmp(family, "AF_INET")) { + out->family = AF_INET; + } else if (!strcmp(family, "AF_INET6")) { + out->family = AF_INET6; + } else { + test_print("Couldn't parse trace event family %s", family); + ret = -EINVAL; + goto out_free; + } + } + + if (event_has_flags(event)) { + out->fin = (fin == 'F'); + out->syn = (syn == 'S'); + out->rst = (rst == 'R'); + out->psh = (psh == 'P'); + out->ack = (ack == '.'); + + if ((fin != 'F' && fin != ' ') || + (syn != 'S' && syn != ' ') || + (rst != 'R' && rst != ' ') || + (psh != 'P' && psh != ' ') || + (ack != '.' && ack != ' ')) { + test_print("Couldn't parse trace event flags %c%c%c%c%c", + fin, syn, rst, psh, ack); + ret = -EINVAL; + goto out_free; + } + } + + if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) { + ret = -EINVAL; + goto out_free; + } + + if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) { + ret = -EINVAL; + goto out_free; + } + + if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) { + test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64, + netns_cookie, ns_cookie1, ns_cookie2); + ret = -EINVAL; + } + +out_free: + free(src); + free(dst); + free(family); + return ret; +} + +static enum ftracer_op aolib_tracer_process_event(const char *line) +{ + int event_type = check_event_type(line); + struct trace_point tmp = {}; + + if (event_type < 0) + return FTRACER_LINE_PRESERVE; + + if (tracer_scan_event(line, event_type, &tmp)) + return FTRACER_LINE_PRESERVE; + + return lookup_expected_event(event_type, &tmp) ? + FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE; +} + +static void dump_trace_event(struct expected_trace_point *e) +{ + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; + + if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", + trace_event_names[e->type], + src, e->src_port, dst, e->dst_port, e->L3index, + (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "", + (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "", + (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "", + (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "", + (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "", + e->keyid, e->rnext, e->maclen, e->sne, e->matched); +} + +static void print_match_stats(bool unexpected_events) +{ + size_t matches_per_type[__MAX_TRACE_EVENTS] = {}; + bool expected_but_none = false; + size_t i, total_matched = 0; + char *stat_line = NULL; + + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *e = &exp_tps[i]; + + total_matched += e->matched; + matches_per_type[e->type] += e->matched; + if (!e->matched) + expected_but_none = true; + } + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!matches_per_type[i]) + continue; + stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "", + trace_event_names[i], + matches_per_type[i]); + if (!stat_line) + test_error("test_sprintf()"); + } + + if (unexpected_events || expected_but_none) { + for (i = 0; i < exp_tps_nr; i++) + dump_trace_event(&exp_tps[i]); + } + + if (unexpected_events) + return; + + if (expected_but_none) + test_fail("Some trace events were expected, but didn't occur"); + else if (total_matched) + test_ok("Trace events matched expectations: %zu %s", + total_matched, stat_line); + else + test_ok("No unexpected trace events during the test run"); +} + +#define dump_events(fmt, ...) \ + __test_print(__test_msg, fmt, ##__VA_ARGS__) +static void check_free_events(struct test_ftracer *tracer) +{ + const char **lines; + size_t nr; + + if (!kernel_config_has(KCONFIG_FTRACE)) { + test_skip("kernel config doesn't have ftrace - no checks"); + return; + } + + nr = tracer_get_savedlines_nr(tracer); + lines = tracer_get_savedlines(tracer); + print_match_stats(!!nr); + if (!nr) + return; + + errno = 0; + test_xfail("Trace events [%zu] were not expected:", nr); + while (nr) + dump_events("\t%s", lines[--nr]); +} + +static int setup_tcp_trace_events(struct test_ftracer *tracer) +{ + char *filter; + size_t i; + int ret; + + filter = test_sprintf("net_cookie == %zu || net_cookie == %zu", + ns_cookie1, ns_cookie2); + if (!filter) + return -ENOMEM; + + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + char *event_name = test_sprintf("tcp/%s", trace_event_names[i]); + + if (!event_name) { + ret = -ENOMEM; + break; + } + ret = setup_trace_event(tracer, event_name, filter); + free(event_name); + if (ret) + break; + } + + free(filter); + return ret; +} + +static void aolib_tracer_destroy(struct test_ftracer *tracer) +{ + check_free_events(tracer); + free_expected_events(); +} + +static bool aolib_tracer_expecting_more(void) +{ + size_t i; + + for (i = 0; i < exp_tps_nr; i++) + if (!exp_tps[i].matched) + return true; + return false; +} + +int setup_aolib_ftracer(void) +{ + struct test_ftracer *f; + + f = create_ftracer("aolib", aolib_tracer_process_event, + aolib_tracer_destroy, aolib_tracer_expecting_more, + DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR); + if (!f) + return -1; + + return setup_tcp_trace_events(f); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c new file mode 100644 index 000000000000..e4d0b173bc94 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <unistd.h> +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +static char ftrace_path[] = "ksft-ftrace-XXXXXX"; +static bool ftrace_mounted; +uint64_t ns_cookie1, ns_cookie2; + +struct test_ftracer { + pthread_t tracer_thread; + int error; + char *instance_path; + FILE *trace_pipe; + + enum ftracer_op (*process_line)(const char *line); + void (*destructor)(struct test_ftracer *tracer); + bool (*expecting_more)(void); + + char **saved_lines; + size_t saved_lines_size; + size_t next_line_ind; + + pthread_cond_t met_all_expected; + pthread_mutex_t met_all_expected_lock; + + struct test_ftracer *next; +}; + +static struct test_ftracer *ftracers; +static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER; + +static int mount_ftrace(void) +{ + if (!mkdtemp(ftrace_path)) + test_error("Can't create temp dir"); + + if (mount("tracefs", ftrace_path, "tracefs", 0, "rw")) + return -errno; + + ftrace_mounted = true; + + return 0; +} + +static void unmount_ftrace(void) +{ + if (ftrace_mounted && umount(ftrace_path)) + test_print("Failed on cleanup: can't unmount tracefs: %m"); + + if (rmdir(ftrace_path)) + test_error("Failed on cleanup: can't remove ftrace dir %s", + ftrace_path); +} + +struct opts_list_t { + char *opt_name; + struct opts_list_t *next; +}; + +static int disable_trace_options(const char *ftrace_path) +{ + struct opts_list_t *opts_list = NULL; + char *fopts, *line = NULL; + size_t buf_len = 0; + ssize_t line_len; + int ret = 0; + FILE *opts; + + fopts = test_sprintf("%s/%s", ftrace_path, "trace_options"); + if (!fopts) + return -ENOMEM; + + opts = fopen(fopts, "r+"); + if (!opts) { + ret = -errno; + goto out_free; + } + + while ((line_len = getline(&line, &buf_len, opts)) != -1) { + struct opts_list_t *tmp; + + if (!strncmp(line, "no", 2)) + continue; + + tmp = malloc(sizeof(*tmp)); + if (!tmp) { + ret = -ENOMEM; + goto out_free_opts_list; + } + tmp->next = opts_list; + tmp->opt_name = test_sprintf("no%s", line); + if (!tmp->opt_name) { + ret = -ENOMEM; + free(tmp); + goto out_free_opts_list; + } + opts_list = tmp; + } + + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + fseek(opts, 0, SEEK_SET); + fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts); + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + +out_free_opts_list: + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + free(line); + fclose(opts); +out_free: + free(fopts); + return ret; +} + +static int setup_buffer_size(const char *ftrace_path, size_t sz) +{ + char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path); + int ret; + + if (!fbuf_size) + return -1; + + ret = test_echo(fbuf_size, 0, "%zu", sz); + free(fbuf_size); + return ret; +} + +static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name) +{ + char *tmp; + + tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name); + if (!tmp) + return -ENOMEM; + + tracer->instance_path = mkdtemp(tmp); + if (!tracer->instance_path) { + free(tmp); + return -errno; + } + + return 0; +} + +static void remove_ftrace_instance(struct test_ftracer *tracer) +{ + if (rmdir(tracer->instance_path)) + test_print("Failed on cleanup: can't remove ftrace instance %s", + tracer->instance_path); + free(tracer->instance_path); +} + +static void tracer_cleanup(void *arg) +{ + struct test_ftracer *tracer = arg; + + fclose(tracer->trace_pipe); +} + +static void tracer_set_error(struct test_ftracer *tracer, int error) +{ + if (!tracer->error) + tracer->error = error; +} + +const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer) +{ + return tracer->next_line_ind; +} + +const char **tracer_get_savedlines(struct test_ftracer *tracer) +{ + return (const char **)tracer->saved_lines; +} + +static void *tracer_thread_func(void *arg) +{ + struct test_ftracer *tracer = arg; + + pthread_cleanup_push(tracer_cleanup, arg); + + while (tracer->next_line_ind < tracer->saved_lines_size) { + char **lp = &tracer->saved_lines[tracer->next_line_ind]; + enum ftracer_op op; + size_t buf_len = 0; + ssize_t line_len; + + line_len = getline(lp, &buf_len, tracer->trace_pipe); + if (line_len == -1) + break; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + op = tracer->process_line(*lp); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + if (tracer->expecting_more) { + pthread_mutex_lock(&tracer->met_all_expected_lock); + if (!tracer->expecting_more()) + pthread_cond_signal(&tracer->met_all_expected); + pthread_mutex_unlock(&tracer->met_all_expected_lock); + } + + if (op == FTRACER_LINE_DISCARD) + continue; + if (op == FTRACER_EXIT) + break; + if (op != FTRACER_LINE_PRESERVE) + test_error("unexpected tracer command %d", op); + + tracer->next_line_ind++; + buf_len = 0; + } + test_print("too many lines in ftracer buffer %zu, exiting tracer", + tracer->next_line_ind); + + pthread_cleanup_pop(1); + return NULL; +} + +static int setup_trace_thread(struct test_ftracer *tracer) +{ + int ret = 0; + char *path; + + path = test_sprintf("%s/trace_pipe", tracer->instance_path); + if (!path) + return -ENOMEM; + + tracer->trace_pipe = fopen(path, "r"); + if (!tracer->trace_pipe) { + ret = -errno; + goto out_free; + } + + if (pthread_create(&tracer->tracer_thread, NULL, + tracer_thread_func, (void *)tracer)) { + ret = -errno; + fclose(tracer->trace_pipe); + } + +out_free: + free(path); + return ret; +} + +static void stop_trace_thread(struct test_ftracer *tracer) +{ + void *res; + + if (pthread_cancel(tracer->tracer_thread)) { + test_print("Can't stop tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (pthread_join(tracer->tracer_thread, &res)) { + test_print("Can't join tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (res != PTHREAD_CANCELED) { + test_print("Tracer thread wasn't canceled"); + tracer_set_error(tracer, -errno); + } + if (tracer->error) + test_fail("tracer errored by %s", strerror(tracer->error)); +} + +static void final_wait_for_events(struct test_ftracer *tracer, + unsigned timeout_sec) +{ + struct timespec timeout; + struct timeval now; + int ret = 0; + + if (!tracer->expecting_more) + return; + + pthread_mutex_lock(&tracer->met_all_expected_lock); + gettimeofday(&now, NULL); + timeout.tv_sec = now.tv_sec + timeout_sec; + timeout.tv_nsec = now.tv_usec * 1000; + + while (tracer->expecting_more() && ret != ETIMEDOUT) + ret = pthread_cond_timedwait(&tracer->met_all_expected, + &tracer->met_all_expected_lock, &timeout); + pthread_mutex_unlock(&tracer->met_all_expected_lock); +} + +int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter) +{ + char *enable_path, *filter_path, *instance = tracer->instance_path; + int ret; + + enable_path = test_sprintf("%s/events/%s/enable", instance, event); + if (!enable_path) + return -ENOMEM; + + filter_path = test_sprintf("%s/events/%s/filter", instance, event); + if (!filter_path) { + ret = -ENOMEM; + goto out_free; + } + + ret = test_echo(filter_path, 0, "%s", filter); + if (!ret) + ret = test_echo(enable_path, 0, "1"); + +out_free: + free(filter_path); + free(enable_path); + return ret; +} + +struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb) +{ + struct test_ftracer *tracer; + int err; + + /* XXX: separate __create_ftracer() helper and do here + * if (!kernel_config_has(KCONFIG_FTRACE)) + * return NULL; + */ + + tracer = malloc(sizeof(*tracer)); + if (!tracer) { + test_print("malloc()"); + return NULL; + } + + memset(tracer, 0, sizeof(*tracer)); + + err = setup_ftrace_instance(tracer, name); + if (err) { + test_print("setup_ftrace_instance(): %d", err); + goto err_free; + } + + err = disable_trace_options(tracer->instance_path); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + err = setup_buffer_size(tracer->instance_path, buffer_size_kb); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0])); + if (!tracer->saved_lines) { + test_print("calloc()"); + goto err_remove; + } + tracer->saved_lines_size = lines_buf_sz; + + tracer->process_line = process_line; + tracer->destructor = destructor; + tracer->expecting_more = expecting_more; + + err = pthread_cond_init(&tracer->met_all_expected, NULL); + if (err) { + test_print("pthread_cond_init(): %d", err); + goto err_free_lines; + } + + err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL); + if (err) { + test_print("pthread_mutex_init(): %d", err); + goto err_cond_destroy; + } + + err = setup_trace_thread(tracer); + if (err) { + test_print("setup_trace_thread(): %d", err); + goto err_mutex_destroy; + } + + pthread_mutex_lock(&ftracers_lock); + tracer->next = ftracers; + ftracers = tracer; + pthread_mutex_unlock(&ftracers_lock); + + return tracer; + +err_mutex_destroy: + pthread_mutex_destroy(&tracer->met_all_expected_lock); +err_cond_destroy: + pthread_cond_destroy(&tracer->met_all_expected); +err_free_lines: + free(tracer->saved_lines); +err_remove: + remove_ftrace_instance(tracer); +err_free: + free(tracer); + return NULL; +} + +static void __destroy_ftracer(struct test_ftracer *tracer) +{ + size_t i; + + final_wait_for_events(tracer, TEST_TIMEOUT_SEC); + stop_trace_thread(tracer); + remove_ftrace_instance(tracer); + if (tracer->destructor) + tracer->destructor(tracer); + for (i = 0; i < tracer->saved_lines_size; i++) + free(tracer->saved_lines[i]); + pthread_cond_destroy(&tracer->met_all_expected); + pthread_mutex_destroy(&tracer->met_all_expected_lock); + free(tracer); +} + +void destroy_ftracer(struct test_ftracer *tracer) +{ + pthread_mutex_lock(&ftracers_lock); + if (tracer == ftracers) { + ftracers = tracer->next; + } else { + struct test_ftracer *f = ftracers; + + while (f->next != tracer) { + if (!f->next) + test_error("tracers list corruption or double free %p", tracer); + f = f->next; + } + f->next = tracer->next; + } + tracer->next = NULL; + pthread_mutex_unlock(&ftracers_lock); + __destroy_ftracer(tracer); +} + +static void destroy_all_ftracers(void) +{ + struct test_ftracer *f; + + pthread_mutex_lock(&ftracers_lock); + f = ftracers; + ftracers = NULL; + pthread_mutex_unlock(&ftracers_lock); + + while (f) { + struct test_ftracer *n = f->next; + + f->next = NULL; + __destroy_ftracer(f); + f = n; + } +} + +static void test_unset_tracing(void) +{ + destroy_all_ftracers(); + unmount_ftrace(); +} + +int test_setup_tracing(void) +{ + /* + * Just a basic protection - this should be called only once from + * lib/kconfig. Not thread safe, which is fine as it's early, before + * threads are created. + */ + static int already_set; + int err; + + if (already_set) + return -1; + + /* Needs net-namespace cookies for filters */ + if (ns_cookie1 == ns_cookie2) { + test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing", + ns_cookie1, ns_cookie2); + return -1; + } + + already_set = 1; + + test_add_destructor(test_unset_tracing); + + err = mount_ftrace(); + if (err) { + test_print("failed to mount_ftrace(): %d", err); + return err; + } + + return setup_aolib_ftracer(); +} + +static int get_ns_cookie(int nsfd, uint64_t *out) +{ + int old_ns = switch_save_ns(nsfd); + socklen_t size = sizeof(*out); + int sk; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) { + test_print("getsockopt(SO_NETNS_COOKIE): %m"); + close(sk); + return -errno; + } + + close(sk); + switch_close_ns(old_ns); + return 0; +} + +void test_init_ftrace(int nsfd1, int nsfd2) +{ + get_ns_cookie(nsfd1, &ns_cookie1); + get_ns_cookie(nsfd2, &ns_cookie2); + /* Populate kernel config state */ + kernel_config_has(KCONFIG_FTRACE); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c index f279ffc3843b..9f1c175846f8 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -6,7 +6,7 @@ #include "aolib.h" struct kconfig_t { - int _errno; /* the returned error if not supported */ + int _error; /* negative errno if not supported */ int (*check_kconfig)(int *error); }; @@ -62,7 +62,7 @@ static int has_tcp_ao(int *err) memcpy(&tmp.addr, &addr, sizeof(addr)); *err = 0; if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT) ret = -errno; } @@ -87,7 +87,7 @@ static int has_tcp_md5(int *err) */ *err = 0; if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT && errno == ENOMEM) { test_print("setsockopt(TCP_MD5SIG_EXT): %m"); ret = -errno; @@ -116,13 +116,21 @@ static int has_vrfs(int *err) return ret; } +static int has_ftrace(int *err) +{ + *err = test_setup_tracing(); + return 0; +} + +#define KCONFIG_UNKNOWN 1 static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; static struct kconfig_t kconfig[__KCONFIG_LAST__] = { - { -1, has_net_ns }, - { -1, has_veth }, - { -1, has_tcp_ao }, - { -1, has_tcp_md5 }, - { -1, has_vrfs }, + { KCONFIG_UNKNOWN, has_net_ns }, + { KCONFIG_UNKNOWN, has_veth }, + { KCONFIG_UNKNOWN, has_tcp_ao }, + { KCONFIG_UNKNOWN, has_tcp_md5 }, + { KCONFIG_UNKNOWN, has_vrfs }, + { KCONFIG_UNKNOWN, has_ftrace }, }; const char *tests_skip_reason[__KCONFIG_LAST__] = { @@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = { "Tests require TCP-AO support (CONFIG_TCP_AO)", "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", "VRFs are not supported (CONFIG_NET_VRF)", + "Ftrace points are not supported (CONFIG_TRACEPOINTS)", }; bool kernel_config_has(enum test_needs_kconfig k) @@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k) bool ret; pthread_mutex_lock(&kconfig_lock); - if (kconfig[k]._errno == -1) { - if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + if (kconfig[k]._error == KCONFIG_UNKNOWN) { + if (kconfig[k].check_kconfig(&kconfig[k]._error)) test_error("Failed to initialize kconfig %u", k); } - ret = kconfig[k]._errno == 0; + ret = kconfig[k]._error == 0; pthread_mutex_unlock(&kconfig_lock); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index e408b9243b2c..a27cc03c9fbd 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -111,7 +111,7 @@ static void sig_int(int signo) int open_netns(void) { - const char *netns_path = "/proc/self/ns/net"; + const char *netns_path = "/proc/thread-self/ns/net"; int fd; fd = open(netns_path, O_RDONLY); @@ -142,6 +142,13 @@ int switch_save_ns(int new_ns) return ret; } +void switch_close_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); + close(fd); +} + static int nsfd_outside = -1; static int nsfd_parent = -1; static int nsfd_child = -1; @@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix, test_print("rand seed %u", (unsigned int)seed); srand(seed); - ksft_print_header(); init_namespaces(); + test_init_ftrace(nsfd_parent, nsfd_child); if (add_veth(veth_name, nsfd_parent, nsfd_child)) test_error("Failed to add veth"); @@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void) int old_ns = switch_save_ns(nsfd_child); optmem_ns = !access(optmem_file, F_OK); - switch_ns(old_ns); + switch_close_ns(old_ns); } return !!optmem_ns; } @@ -317,7 +324,7 @@ size_t test_get_optmem(void) test_error("can't read from %s", optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); return ret; } @@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old) test_error("can't write %zu to %s", new, optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); } static void test_revert_optmem(void) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 15aeb0963058..0ffda966c677 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -379,7 +379,6 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) key_dump[0].nkeys = nr_keys; key_dump[0].get_all = 1; - key_dump[0].get_all = 1; err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, key_dump, &key_dump_sz); if (err) { diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c index 372daca525f5..bdf5522c9213 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/utils.c +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen) } } +__printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...) +{ + size_t len, written; + va_list vargs; + char *msg; + FILE *f; + + f = fopen(fname, append ? "a" : "w"); + if (!f) + return -errno; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + if (!msg) { + fclose(f); + return -1; + } + len = strlen(msg); + written = fwrite(msg, 1, len, f); + fclose(f); + free(msg); + return written == len ? 0 : -1; +} + const struct sockaddr_in6 addr_any6 = { .sin6_family = AF_INET6, }; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index 8fdc808df325..ecc6f1e3a414 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -64,6 +64,7 @@ static void try_server_run(const char *tst_name, unsigned int port, else test_ok("%s: server alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); @@ -71,10 +72,10 @@ static void try_server_run(const char *tst_name, unsigned int port, test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -82,7 +83,7 @@ static void try_server_run(const char *tst_name, unsigned int port, * Before close() as that will send FIN and move the peer in TCP_CLOSE * and that will prevent reading AO counters from the peer's socket. */ - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ out: close(sk); } @@ -176,6 +177,7 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, else test_ok("%s: post-migrate connection is alive", tst_name); } + synchronize_threads(); /* 3: counters checks */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); @@ -183,13 +185,13 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ close(sk); } @@ -206,22 +208,36 @@ static void *client_fn(void *arg) test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snt_isn += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong send ISN", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_isn += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong receive ISN", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snd_sne += 1; + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + /* not expecting server => client mismatches as only snd sne is broken */ test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_sne += 1; + /* not expecting client => server mismatches as only rcv sne is broken */ + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); @@ -231,6 +247,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(20, server_fn, client_fn); + test_init(21, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index a2fe88d35ac0..6364facaa63e 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -455,6 +455,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(14, server_fn, client_fn); + test_init(15, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index a5698b0a3718..3ecd2b58de6a 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -87,7 +87,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); close(sk); return; @@ -148,7 +148,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); close(sk); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); return; } @@ -163,17 +163,26 @@ static void *client_fn(void *arg) setup_lo_intf("lo"); tcp_self_connect("self-connect(same keyids)", port++, false, false); + + /* expecting rnext to change based on the first segment RNext != Current */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); tcp_self_connect("self-connect(different keyids)", port++, true, false); tcp_self_connect("self-connect(restore)", port, false, true); - port += 2; + port += 2; /* restore test restores over different port */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); + /* intentionally on restore they are added to the socket in different order */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1); tcp_self_connect("self-connect(restore, different keyids)", port, true, true); - port += 2; + port += 2; /* restore test restores over different port */ return NULL; } int main(int argc, char *argv[]) { - test_init(4, client_fn, NULL); + test_init(5, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index ad4e77d6823e..8901a6785dc8 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -116,7 +116,15 @@ static void *server_fn(void *arg) sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, client_new_port, &ao1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { if (bytes > 0) @@ -127,6 +135,7 @@ static void *server_fn(void *arg) test_ok("server alive"); } + synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); @@ -134,15 +143,15 @@ static void *server_fn(void *arg) test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); test_enable_repair(sk); @@ -206,12 +215,13 @@ static void *client_fn(void *arg) sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, test_server_port + 1, &ao1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); + synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ if (test_get_tcp_ao_counters(sk, &ao2)) test_error("test_get_tcp_ao_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); @@ -219,15 +229,15 @@ static void *client_fn(void *arg) test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); @@ -240,6 +250,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(7, server_fn, client_fn); + test_init(8, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 517930f9721b..084db4ecdff6 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -30,8 +30,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, #define __cmp_ao(member) \ do { \ if (info->member != tmp.member) { \ - test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ - tst, (size_t)info->member, (size_t)tmp.member); \ + test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \ + tst, (uint64_t)info->member, (uint64_t)tmp.member); \ return; \ } \ } while(0) @@ -830,6 +830,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, client_fn, NULL); + test_init(121, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index 6b59a652159f..f779e5892bc1 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -70,6 +70,7 @@ static void try_accept(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; err = test_wait_fd(lsk, timeout, 0); + synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { if (!fault(TIMEOUT)) test_fail("timed out for accept()"); @@ -100,10 +101,10 @@ static void try_accept(const char *tst_name, unsigned int port, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) @@ -283,6 +284,7 @@ static void try_connect(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); @@ -451,6 +453,7 @@ static void try_to_add(const char *tst_name, unsigned int port, timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret <= 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; @@ -671,24 +674,38 @@ static void *client_fn(void *arg) try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("no sign server: AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("no sign server: MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("no sign server: no sign client", port++, NULL, 0, @@ -696,25 +713,37 @@ static void *client_fn(void *arg) try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 1, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, non-matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: no sign client (unmatched)", port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); @@ -736,6 +765,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(72, server_fn, client_fn); + test_init(73, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index ec60a16c9307..d626f22f9550 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -356,8 +356,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } } - if (batch > 1) + if (batch > 1) { fprintf(stderr, "batched %d timestamps\n", batch); + } else if (!batch) { + fprintf(stderr, "Failed to report timestamps\n"); + test_failed = true; + } } static int recv_errmsg(int fd) diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..d5ffd8c9172e 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bf..3f2fca02fec5 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -256,6 +259,13 @@ struct testcase testcases_v6[] = { .r_num_mss = 2, }, { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, + { /* send 2B + 2B + 1B segments */ .tlen = 5, .gso_len = 2, @@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index f52aa5f7da52..3e751234ccfe 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -30,14 +30,7 @@ source lib.sh -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" result=0 diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 152171fb1fc8..e9c2f71da207 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -59,7 +59,6 @@ # while it is forwarded between different vrfs. source lib.sh -PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -636,6 +635,8 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) +check_gen_prog "nettest" + TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c index 7f7d3eb8b9c9..f9ccae50349b 100644 --- a/tools/testing/selftests/riscv/mm/mmap_bottomup.c +++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c @@ -7,8 +7,6 @@ TEST(infinite_rlimit) { EXPECT_EQ(BOTTOM_UP, memory_layout()); - - TEST_MMAPS; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c index 2ba3ec990006..3f53b6ecc326 100644 --- a/tools/testing/selftests/riscv/mm/mmap_default.c +++ b/tools/testing/selftests/riscv/mm/mmap_default.c @@ -7,8 +7,6 @@ TEST(default_rlimit) { EXPECT_EQ(TOP_DOWN, memory_layout()); - - TEST_MMAPS; } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h index 3b29ca3bb3d4..75918d15919f 100644 --- a/tools/testing/selftests/riscv/mm/mmap_test.h +++ b/tools/testing/selftests/riscv/mm/mmap_test.h @@ -10,76 +10,9 @@ #define TOP_DOWN 0 #define BOTTOM_UP 1 -#if __riscv_xlen == 64 -uint64_t random_addresses[] = { - 0x19764f0d73b3a9f0, 0x016049584cecef59, 0x3580bdd3562f4acd, - 0x1164219f20b17da0, 0x07d97fcb40ff2373, 0x76ec528921272ee7, - 0x4dd48c38a3de3f70, 0x2e11415055f6997d, 0x14b43334ac476c02, - 0x375a60795aff19f6, 0x47f3051725b8ee1a, 0x4e697cf240494a9f, - 0x456b59b5c2f9e9d1, 0x101724379d63cb96, 0x7fe9ad31619528c1, - 0x2f417247c495c2ea, 0x329a5a5b82943a5e, 0x06d7a9d6adcd3827, - 0x327b0b9ee37f62d5, 0x17c7b1851dfd9b76, 0x006ebb6456ec2cd9, - 0x00836cd14146a134, 0x00e5c4dcde7126db, 0x004c29feadf75753, - 0x00d8b20149ed930c, 0x00d71574c269387a, 0x0006ebe4a82acb7a, - 0x0016135df51f471b, 0x00758bdb55455160, 0x00d0bdd949b13b32, - 0x00ecea01e7c5f54b, 0x00e37b071b9948b1, 0x0011fdd00ff57ab3, - 0x00e407294b52f5ea, 0x00567748c200ed20, 0x000d073084651046, - 0x00ac896f4365463c, 0x00eb0d49a0b26216, 0x0066a2564a982a31, - 0x002e0d20237784ae, 0x0000554ff8a77a76, 0x00006ce07a54c012, - 0x000009570516d799, 0x00000954ca15b84d, 0x0000684f0d453379, - 0x00002ae5816302b5, 0x0000042403fb54bf, 0x00004bad7392bf30, - 0x00003e73bfa4b5e3, 0x00005442c29978e0, 0x00002803f11286b6, - 0x000073875d745fc6, 0x00007cede9cb8240, 0x000027df84cc6a4f, - 0x00006d7e0e74242a, 0x00004afd0b836e02, 0x000047d0e837cd82, - 0x00003b42405efeda, 0x00001531bafa4c95, 0x00007172cae34ac4, -}; -#else -uint32_t random_addresses[] = { - 0x8dc302e0, 0x929ab1e0, 0xb47683ba, 0xea519c73, 0xa19f1c90, 0xc49ba213, - 0x8f57c625, 0xadfe5137, 0x874d4d95, 0xaa20f09d, 0xcf21ebfc, 0xda7737f1, - 0xcedf392a, 0x83026c14, 0xccedca52, 0xc6ccf826, 0xe0cd9415, 0x997472ca, - 0xa21a44c1, 0xe82196f5, 0xa23fd66b, 0xc28d5590, 0xd009cdce, 0xcf0be646, - 0x8fc8c7ff, 0xe2a85984, 0xa3d3236b, 0x89a0619d, 0xc03db924, 0xb5d4cc1b, - 0xb96ee04c, 0xd191da48, 0xb432a000, 0xaa2bebbc, 0xa2fcb289, 0xb0cca89b, - 0xb0c18d6a, 0x88f58deb, 0xa4d42d1c, 0xe4d74e86, 0x99902b09, 0x8f786d31, - 0xbec5e381, 0x9a727e65, 0xa9a65040, 0xa880d789, 0x8f1b335e, 0xfc821c1e, - 0x97e34be4, 0xbbef84ed, 0xf447d197, 0xfd7ceee2, 0xe632348d, 0xee4590f4, - 0x958992a5, 0xd57e05d6, 0xfd240970, 0xc5b0dcff, 0xd96da2c2, 0xa7ae041d, -}; -#endif - -// Only works on 64 bit -#if __riscv_xlen == 64 #define PROT (PROT_READ | PROT_WRITE) #define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS) -/* mmap must return a value that doesn't use more bits than the hint address. */ -static inline unsigned long get_max_value(unsigned long input) -{ - unsigned long max_bit = (1UL << (((sizeof(unsigned long) * 8) - 1 - - __builtin_clzl(input)))); - - return max_bit + (max_bit - 1); -} - -#define TEST_MMAPS \ - ({ \ - void *mmap_addr; \ - for (int i = 0; i < ARRAY_SIZE(random_addresses); i++) { \ - mmap_addr = mmap((void *)random_addresses[i], \ - 5 * sizeof(int), PROT, FLAGS, 0, 0); \ - EXPECT_NE(MAP_FAILED, mmap_addr); \ - EXPECT_GE((void *)get_max_value(random_addresses[i]), \ - mmap_addr); \ - mmap_addr = mmap((void *)random_addresses[i], \ - 5 * sizeof(int), PROT, FLAGS, 0, 0); \ - EXPECT_NE(MAP_FAILED, mmap_addr); \ - EXPECT_GE((void *)get_max_value(random_addresses[i]), \ - mmap_addr); \ - } \ - }) -#endif /* __riscv_xlen == 64 */ - static inline int memory_layout(void) { void *value1 = mmap(NULL, sizeof(int), PROT, FLAGS, 0, 0); diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e3f97f90d8db..8c3a73461475 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -60,7 +60,9 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636f..4f255cec0c22 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 07c81c0093c0..16bd49492efa 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -6,10 +6,13 @@ * * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com> */ - +#define _GNU_SOURCE #include <sys/time.h> +#include <sys/types.h> #include <stdio.h> #include <signal.h> +#include <stdint.h> +#include <string.h> #include <unistd.h> #include <time.h> #include <pthread.h> @@ -18,6 +21,21 @@ #define DELAY 2 #define USECS_PER_SEC 1000000 +#define NSECS_PER_SEC 1000000000 + +static void __fatal_error(const char *test, const char *name, const char *what) +{ + char buf[64]; + + strerror_r(errno, buf, sizeof(buf)); + + if (name && strlen(name)) + ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf); + else + ksft_exit_fail_msg("%s %s %s\n", test, what, buf); +} + +#define fatal_error(name, what) __fatal_error(__func__, name, what) static volatile int done; @@ -74,24 +92,13 @@ static int check_diff(struct timeval start, struct timeval end) return 0; } -static int check_itimer(int which) +static void check_itimer(int which, const char *name) { - const char *name; - int err; struct timeval start, end; struct itimerval val = { .it_value.tv_sec = DELAY, }; - if (which == ITIMER_VIRTUAL) - name = "ITIMER_VIRTUAL"; - else if (which == ITIMER_PROF) - name = "ITIMER_PROF"; - else if (which == ITIMER_REAL) - name = "ITIMER_REAL"; - else - return -1; - done = 0; if (which == ITIMER_VIRTUAL) @@ -101,17 +108,11 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); - err = setitimer(which, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (setitimer(which, &val, NULL) < 0) + fatal_error(name, "setitimer()"); if (which == ITIMER_VIRTUAL) user_loop(); @@ -120,68 +121,41 @@ static int check_itimer(int which) else if (which == ITIMER_REAL) idle_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, "%s\n", name); - - return 0; } -static int check_timer_create(int which) +static void check_timer_create(int which, const char *name) { - const char *type; - int err; - timer_t id; struct timeval start, end; struct itimerspec val = { .it_value.tv_sec = DELAY, }; - - if (which == CLOCK_THREAD_CPUTIME_ID) { - type = "thread"; - } else if (which == CLOCK_PROCESS_CPUTIME_ID) { - type = "process"; - } else { - ksft_print_msg("Unknown timer_create() type %d\n", which); - return -1; - } + timer_t id; done = 0; - err = timer_create(which, NULL, &id); - if (err < 0) { - ksft_perror("Can't create timer"); - return -1; - } - signal(SIGALRM, sig_handler); - err = gettimeofday(&start, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (timer_create(which, NULL, &id) < 0) + fatal_error(name, "timer_create()"); - err = timer_settime(id, 0, &val, NULL); - if (err < 0) { - ksft_perror("Can't set timer"); - return -1; - } + if (signal(SIGALRM, sig_handler) == SIG_ERR) + fatal_error(name, "signal()"); + + if (gettimeofday(&start, NULL) < 0) + fatal_error(name, "gettimeofday()"); + + if (timer_settime(id, 0, &val, NULL) < 0) + fatal_error(name, "timer_settime()"); user_loop(); - err = gettimeofday(&end, NULL); - if (err < 0) { - ksft_perror("Can't call gettimeofday()"); - return -1; - } + if (gettimeofday(&end, NULL) < 0) + fatal_error(name, "gettimeofday()"); ksft_test_result(check_diff(start, end) == 0, - "timer_create() per %s\n", type); - - return 0; + "timer_create() per %s\n", name); } static pthread_t ctd_thread; @@ -209,15 +183,14 @@ static void *ctd_thread_func(void *arg) ctd_count = 100; if (timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id)) - return "Can't create timer\n"; + fatal_error(NULL, "timer_create()"); if (timer_settime(id, 0, &val, NULL)) - return "Can't set timer\n"; - + fatal_error(NULL, "timer_settime()"); while (ctd_count > 0 && !ctd_failed) ; if (timer_delete(id)) - return "Can't delete timer\n"; + fatal_error(NULL, "timer_delete()"); return NULL; } @@ -225,19 +198,16 @@ static void *ctd_thread_func(void *arg) /* * Test that only the running thread receives the timer signal. */ -static int check_timer_distribution(void) +static void check_timer_distribution(void) { - const char *errmsg; + if (signal(SIGALRM, ctd_sighandler) == SIG_ERR) + fatal_error(NULL, "signal()"); - signal(SIGALRM, ctd_sighandler); - - errmsg = "Can't create thread\n"; if (pthread_create(&ctd_thread, NULL, ctd_thread_func, NULL)) - goto err; + fatal_error(NULL, "pthread_create()"); - errmsg = "Can't join thread\n"; - if (pthread_join(ctd_thread, (void **)&errmsg) || errmsg) - goto err; + if (pthread_join(ctd_thread, NULL)) + fatal_error(NULL, "pthread_join()"); if (!ctd_failed) ksft_test_result_pass("check signal distribution\n"); @@ -245,31 +215,399 @@ static int check_timer_distribution(void) ksft_test_result_fail("check signal distribution\n"); else ksft_test_result_skip("check signal distribution (old kernel)\n"); - return 0; -err: - ksft_print_msg("%s", errmsg); - return -1; +} + +struct tmrsig { + int signals; + int overruns; +}; + +static void siginfo_handler(int sig, siginfo_t *si, void *uc) +{ + struct tmrsig *tsig = si ? si->si_ptr : NULL; + + if (tsig) { + tsig->signals++; + tsig->overruns += si->si_overrun; + } +} + +static void *ignore_thread(void *arg) +{ + unsigned int *tid = arg; + sigset_t set; + + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + *tid = gettid(); + sleep(100); + + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + return NULL; +} + +static void check_sig_ign(int thread) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + unsigned int tid = 0; + struct sigaction sa; + struct sigevent sev; + pthread_t pthread; + timer_t timerid; + sigset_t set; + + if (thread) { + if (pthread_create(&pthread, NULL, ignore_thread, &tid)) + fatal_error(NULL, "pthread_create()"); + sleep(1); + } + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (thread) { + sev.sigev_notify = SIGEV_THREAD_ID; + sev._sigev_un._tid = tid; + } + + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + sleep(1); + + /* Set the signal to be ignored */ + if (signal(SIGUSR1, SIG_IGN) == SIG_ERR) + fatal_error(NULL, "signal(SIG_IGN)"); + + sleep(1); + + if (thread) { + /* Stop the thread first. No signal should be delivered to it */ + if (pthread_cancel(pthread)) + fatal_error(NULL, "pthread_cancel()"); + if (pthread_join(pthread, NULL)) + fatal_error(NULL, "pthread_join()"); + } + + /* Restore the handler */ + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + sleep(1); + + /* Unblock it, which should deliver the signal in the !thread case*/ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + if (!thread) { + ksft_test_result(tsig.signals == 1 && tsig.overruns == 29, + "check_sig_ign SIGEV_SIGNAL\n"); + } else { + ksft_test_result(tsig.signals == 0 && tsig.overruns == 0, + "check_sig_ign SIGEV_THREAD_ID\n"); + } +} + +static void check_rearm(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + /* Reprogram the timer to single shot */ + its.it_value.tv_sec = 10; + its.it_value.tv_nsec = 0; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 0; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + ksft_test_result(!tsig.signals, "check_rearm\n"); +} + +static void check_delete(void) +{ + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(NULL, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(CLOCK_MONOTONIC, &sev, &timerid)) + fatal_error(NULL, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(NULL, "timer_settime()"); + + sleep(1); + + if (timer_delete(timerid)) + fatal_error(NULL, "timer_delete()"); + + /* Unblock it, which should not deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(NULL, "sigprocmask(SIG_UNBLOCK)"); + + ksft_test_result(!tsig.signals, "check_delete\n"); +} + +static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) +{ + int64_t diff; + + diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); + return diff; +} + +static void check_sigev_none(int which, const char *name) +{ + struct timespec start, now; + struct itimerspec its; + struct sigevent sev; + timer_t timerid; + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_NONE; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + timer_settime(timerid, 0, &its, NULL); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(its.it_value.tv_sec || its.it_value.tv_nsec, + "check_sigev_none %s\n", name); +} + +static void check_gettime(int which, const char *name) +{ + struct itimerspec its, prev; + struct timespec start, now; + struct sigevent sev; + timer_t timerid; + int wraps = 0; + sigset_t set; + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (timer_gettime(timerid, &prev)) + fatal_error(name, "timer_gettime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + if (timer_gettime(timerid, &its)) + fatal_error(name, "timer_gettime()"); + if (its.it_value.tv_nsec > prev.it_value.tv_nsec) + wraps++; + prev = its; + + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(wraps > 1, "check_gettime %s\n", name); +} + +static void check_overrun(int which, const char *name) +{ + struct timespec start, now; + struct tmrsig tsig = { }; + struct itimerspec its; + struct sigaction sa; + struct sigevent sev; + timer_t timerid; + sigset_t set; + + sa.sa_flags = SA_SIGINFO; + sa.sa_sigaction = siginfo_handler; + sigemptyset(&sa.sa_mask); + if (sigaction(SIGUSR1, &sa, NULL)) + fatal_error(name, "sigaction()"); + + /* Block the signal */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + if (sigprocmask(SIG_BLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_BLOCK)"); + + memset(&sev, 0, sizeof(sev)); + sev.sigev_notify = SIGEV_SIGNAL; + sev.sigev_signo = SIGUSR1; + sev.sigev_value.sival_ptr = &tsig; + if (timer_create(which, &sev, &timerid)) + fatal_error(name, "timer_create()"); + + /* Start the timer to expire in 100ms and 100ms intervals */ + its.it_value.tv_sec = 0; + its.it_value.tv_nsec = 100000000; + its.it_interval.tv_sec = 0; + its.it_interval.tv_nsec = 100000000; + if (timer_settime(timerid, 0, &its, NULL)) + fatal_error(name, "timer_settime()"); + + if (clock_gettime(which, &start)) + fatal_error(name, "clock_gettime()"); + + do { + if (clock_gettime(which, &now)) + fatal_error(name, "clock_gettime()"); + } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + + /* Unblock it, which should deliver a signal */ + if (sigprocmask(SIG_UNBLOCK, &set, NULL)) + fatal_error(name, "sigprocmask(SIG_UNBLOCK)"); + + if (timer_delete(timerid)) + fatal_error(name, "timer_delete()"); + + ksft_test_result(tsig.signals == 1 && tsig.overruns == 9, + "check_overrun %s\n", name); } int main(int argc, char **argv) { ksft_print_header(); - ksft_set_plan(6); + ksft_set_plan(18); ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n"); ksft_print_msg("based timers if other threads run on the CPU...\n"); - if (check_itimer(ITIMER_VIRTUAL) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_PROF) < 0) - ksft_exit_fail(); - - if (check_itimer(ITIMER_REAL) < 0) - ksft_exit_fail(); - - if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0) - ksft_exit_fail(); + check_itimer(ITIMER_VIRTUAL, "ITIMER_VIRTUAL"); + check_itimer(ITIMER_PROF, "ITIMER_PROF"); + check_itimer(ITIMER_REAL, "ITIMER_REAL"); + check_timer_create(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); /* * It's unfortunately hard to reliably test a timer expiration @@ -280,11 +618,21 @@ int main(int argc, char **argv) * to ensure true parallelism. So test only one thread until we * find a better solution. */ - if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0) - ksft_exit_fail(); - - if (check_timer_distribution() < 0) - ksft_exit_fail(); + check_timer_create(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_timer_distribution(); + + check_sig_ign(0); + check_sig_ign(1); + check_rearm(); + check_delete(); + check_sigev_none(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_sigev_none(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_gettime(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_gettime(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); + check_overrun(CLOCK_MONOTONIC, "CLOCK_MONOTONIC"); + check_overrun(CLOCK_PROCESS_CPUTIME_ID, "CLOCK_PROCESS_CPUTIME_ID"); + check_overrun(CLOCK_THREAD_CPUTIME_ID, "CLOCK_THREAD_CPUTIME_ID"); ksft_finished(); } diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 5c8757a25998..d51249f14e2f 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -77,7 +77,7 @@ all_32: $(BINARIES_32) all_64: $(BINARIES_64) -EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) +EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso $(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm diff --git a/tools/testing/selftests/x86/srso.c b/tools/testing/selftests/x86/srso.c new file mode 100644 index 000000000000..394ec8bdeb00 --- /dev/null +++ b/tools/testing/selftests/x86/srso.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/perf_event.h> +#include <cpuid.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include <unistd.h> + +int main(void) +{ + struct perf_event_attr ret_attr, mret_attr; + long long count_rets, count_rets_mispred; + int rrets_fd, mrrets_fd; + unsigned int cpuid1_eax, b, c, d; + + __cpuid(1, cpuid1_eax, b, c, d); + + if (cpuid1_eax < 0x00800f00 || + cpuid1_eax > 0x00afffff) { + fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax); + exit(EXIT_FAILURE); + } + + memset(&ret_attr, 0, sizeof(struct perf_event_attr)); + memset(&mret_attr, 0, sizeof(struct perf_event_attr)); + + ret_attr.type = mret_attr.type = PERF_TYPE_RAW; + ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr); + ret_attr.config = 0xc8; + mret_attr.config = 0xc9; + ret_attr.disabled = mret_attr.disabled = 1; + ret_attr.exclude_user = mret_attr.exclude_user = 1; + ret_attr.exclude_hv = mret_attr.exclude_hv = 1; + + rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0); + if (rrets_fd == -1) { + perror("opening retired RETs fd"); + exit(EXIT_FAILURE); + } + + mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0); + if (mrrets_fd == -1) { + perror("opening retired mispredicted RETs fd"); + exit(EXIT_FAILURE); + } + + ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0); + + ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0); + + printf("Sleeping for 10 seconds\n"); + sleep(10); + + ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0); + ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0); + + read(rrets_fd, &count_rets, sizeof(long long)); + read(mrrets_fd, &count_rets_mispred, sizeof(long long)); + + printf("RETs: (%lld retired <-> %lld mispredicted)\n", + count_rets, count_rets_mispred); + printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n"); + + return 0; +} diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 554b290fefdc..a3d448a075e3 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -139,7 +139,7 @@ int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_po } /* Connect to <cid, port> and return the file descriptor. */ -static int vsock_connect(unsigned int cid, unsigned int port, int type) +int vsock_connect(unsigned int cid, unsigned int port, int type) { union { struct sockaddr sa; @@ -226,8 +226,8 @@ static int vsock_listen(unsigned int cid, unsigned int port, int type) /* Listen on <cid, port> and return the first incoming connection. The remote * address is stored to clientaddrp. clientaddrp may be NULL. */ -static int vsock_accept(unsigned int cid, unsigned int port, - struct sockaddr_vm *clientaddrp, int type) +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type) { union { struct sockaddr sa; diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index e95e62485959..fff22d4a14c0 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -39,6 +39,9 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); unsigned int parse_port(const char *str); +int vsock_connect(unsigned int cid, unsigned int port, int type); +int vsock_accept(unsigned int cid, unsigned int port, + struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index f851f8961247..8d38dbf8f41f 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -20,6 +20,8 @@ #include <sys/mman.h> #include <poll.h> #include <signal.h> +#include <sys/ioctl.h> +#include <linux/sockios.h> #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -1238,6 +1240,79 @@ static void test_double_bind_connect_client(const struct test_opts *opts) } } +#define MSG_BUF_IOCTL_LEN 64 +static void test_unsent_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + recv_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("RECEIVED"); + + close(client_fd); +} + +static void test_unsent_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int ret, fd, sock_bytes_unsent; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + control_expectln("RECEIVED"); + + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + } else { + perror("ioctl"); + exit(EXIT_FAILURE); + } + } else if (ret == 0 && sock_bytes_unsent != 0) { + fprintf(stderr, + "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", + sock_bytes_unsent); + exit(EXIT_FAILURE); + } + + close(fd); +} + +static void test_stream_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unsent_bytes_client(const struct test_opts *opts) +{ + test_unsent_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) +{ + test_unsent_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1523,6 +1598,16 @@ static struct test_case test_cases[] = { .run_client = test_stream_rcvlowat_def_cred_upd_client, .run_server = test_stream_cred_upd_on_low_rx_bytes, }, + { + .name = "SOCK_STREAM ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_stream_unsent_bytes_client, + .run_server = test_stream_unsent_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCOUTQ) 0 unsent bytes", + .run_client = test_seqpacket_unsent_bytes_client, + .run_server = test_seqpacket_unsent_bytes_server, + }, {}, }; diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840..2f756628613d 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; |