diff options
Diffstat (limited to 'tools')
1068 files changed, 38748 insertions, 10654 deletions
diff --git a/tools/arch/arm64/include/asm/brk-imm.h b/tools/arch/arm64/include/asm/brk-imm.h new file mode 100644 index 000000000000..beb42c62b6ac --- /dev/null +++ b/tools/arch/arm64/include/asm/brk-imm.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 ARM Ltd. + */ + +#ifndef __ASM_BRK_IMM_H +#define __ASM_BRK_IMM_H + +/* + * #imm16 values used for BRK instruction generation + * 0x004: for installing kprobes + * 0x005: for installing uprobes + * 0x006: for kprobe software single-step + * 0x007: for kretprobe return + * Allowed values for kgdb are 0x400 - 0x7ff + * 0x100: for triggering a fault on purpose (reserved) + * 0x400: for dynamic BRK instruction + * 0x401: for compile time BRK instruction + * 0x800: kernel-mode BUG() and WARN() traps + * 0x9xx: tag-based KASAN trap (allowed values 0x900 - 0x9ff) + * 0x55xx: Undefined Behavior Sanitizer traps ('U' << 8) + * 0x8xxx: Control-Flow Integrity traps + */ +#define KPROBES_BRK_IMM 0x004 +#define UPROBES_BRK_IMM 0x005 +#define KPROBES_BRK_SS_IMM 0x006 +#define KRETPROBES_BRK_IMM 0x007 +#define FAULT_BRK_IMM 0x100 +#define KGDB_DYN_DBG_BRK_IMM 0x400 +#define KGDB_COMPILED_DBG_BRK_IMM 0x401 +#define BUG_BRK_IMM 0x800 +#define KASAN_BRK_IMM 0x900 +#define KASAN_BRK_MASK 0x0ff +#define UBSAN_BRK_IMM 0x5500 +#define UBSAN_BRK_MASK 0x00ff + +#define CFI_BRK_IMM_TARGET GENMASK(4, 0) +#define CFI_BRK_IMM_TYPE GENMASK(9, 5) +#define CFI_BRK_IMM_BASE 0x8000 +#define CFI_BRK_IMM_MASK (CFI_BRK_IMM_TARGET | CFI_BRK_IMM_TYPE) + +#endif diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5fd7caea4419..488f8e751349 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -94,6 +94,7 @@ #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 #define ARM_CPU_PART_CORTEX_A725 0xD87 +#define ARM_CPU_PART_NEOVERSE_N3 0xD8E #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -143,6 +144,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -175,6 +177,7 @@ #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) #define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) +#define MIDR_NEOVERSE_N3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N3) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) @@ -212,6 +215,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/tools/arch/arm64/include/asm/esr.h b/tools/arch/arm64/include/asm/esr.h new file mode 100644 index 000000000000..bd592ca81571 --- /dev/null +++ b/tools/arch/arm64/include/asm/esr.h @@ -0,0 +1,455 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#ifndef __ASM_ESR_H +#define __ASM_ESR_H + +#include <asm/sysreg.h> + +#define ESR_ELx_EC_UNKNOWN UL(0x00) +#define ESR_ELx_EC_WFx UL(0x01) +/* Unallocated EC: 0x02 */ +#define ESR_ELx_EC_CP15_32 UL(0x03) +#define ESR_ELx_EC_CP15_64 UL(0x04) +#define ESR_ELx_EC_CP14_MR UL(0x05) +#define ESR_ELx_EC_CP14_LS UL(0x06) +#define ESR_ELx_EC_FP_ASIMD UL(0x07) +#define ESR_ELx_EC_CP10_ID UL(0x08) /* EL2 only */ +#define ESR_ELx_EC_PAC UL(0x09) /* EL2 and above */ +/* Unallocated EC: 0x0A - 0x0B */ +#define ESR_ELx_EC_CP14_64 UL(0x0C) +#define ESR_ELx_EC_BTI UL(0x0D) +#define ESR_ELx_EC_ILL UL(0x0E) +/* Unallocated EC: 0x0F - 0x10 */ +#define ESR_ELx_EC_SVC32 UL(0x11) +#define ESR_ELx_EC_HVC32 UL(0x12) /* EL2 only */ +#define ESR_ELx_EC_SMC32 UL(0x13) /* EL2 and above */ +/* Unallocated EC: 0x14 */ +#define ESR_ELx_EC_SVC64 UL(0x15) +#define ESR_ELx_EC_HVC64 UL(0x16) /* EL2 and above */ +#define ESR_ELx_EC_SMC64 UL(0x17) /* EL2 and above */ +#define ESR_ELx_EC_SYS64 UL(0x18) +#define ESR_ELx_EC_SVE UL(0x19) +#define ESR_ELx_EC_ERET UL(0x1a) /* EL2 only */ +/* Unallocated EC: 0x1B */ +#define ESR_ELx_EC_FPAC UL(0x1C) /* EL1 and above */ +#define ESR_ELx_EC_SME UL(0x1D) +/* Unallocated EC: 0x1E */ +#define ESR_ELx_EC_IMP_DEF UL(0x1f) /* EL3 only */ +#define ESR_ELx_EC_IABT_LOW UL(0x20) +#define ESR_ELx_EC_IABT_CUR UL(0x21) +#define ESR_ELx_EC_PC_ALIGN UL(0x22) +/* Unallocated EC: 0x23 */ +#define ESR_ELx_EC_DABT_LOW UL(0x24) +#define ESR_ELx_EC_DABT_CUR UL(0x25) +#define ESR_ELx_EC_SP_ALIGN UL(0x26) +#define ESR_ELx_EC_MOPS UL(0x27) +#define ESR_ELx_EC_FP_EXC32 UL(0x28) +/* Unallocated EC: 0x29 - 0x2B */ +#define ESR_ELx_EC_FP_EXC64 UL(0x2C) +/* Unallocated EC: 0x2D - 0x2E */ +#define ESR_ELx_EC_SERROR UL(0x2F) +#define ESR_ELx_EC_BREAKPT_LOW UL(0x30) +#define ESR_ELx_EC_BREAKPT_CUR UL(0x31) +#define ESR_ELx_EC_SOFTSTP_LOW UL(0x32) +#define ESR_ELx_EC_SOFTSTP_CUR UL(0x33) +#define ESR_ELx_EC_WATCHPT_LOW UL(0x34) +#define ESR_ELx_EC_WATCHPT_CUR UL(0x35) +/* Unallocated EC: 0x36 - 0x37 */ +#define ESR_ELx_EC_BKPT32 UL(0x38) +/* Unallocated EC: 0x39 */ +#define ESR_ELx_EC_VECTOR32 UL(0x3A) /* EL2 only */ +/* Unallocated EC: 0x3B */ +#define ESR_ELx_EC_BRK64 UL(0x3C) +/* Unallocated EC: 0x3D - 0x3F */ +#define ESR_ELx_EC_MAX UL(0x3F) + +#define ESR_ELx_EC_SHIFT (26) +#define ESR_ELx_EC_WIDTH (6) +#define ESR_ELx_EC_MASK (UL(0x3F) << ESR_ELx_EC_SHIFT) +#define ESR_ELx_EC(esr) (((esr) & ESR_ELx_EC_MASK) >> ESR_ELx_EC_SHIFT) + +#define ESR_ELx_IL_SHIFT (25) +#define ESR_ELx_IL (UL(1) << ESR_ELx_IL_SHIFT) +#define ESR_ELx_ISS_MASK (GENMASK(24, 0)) +#define ESR_ELx_ISS(esr) ((esr) & ESR_ELx_ISS_MASK) +#define ESR_ELx_ISS2_SHIFT (32) +#define ESR_ELx_ISS2_MASK (GENMASK_ULL(55, 32)) +#define ESR_ELx_ISS2(esr) (((esr) & ESR_ELx_ISS2_MASK) >> ESR_ELx_ISS2_SHIFT) + +/* ISS field definitions shared by different classes */ +#define ESR_ELx_WNR_SHIFT (6) +#define ESR_ELx_WNR (UL(1) << ESR_ELx_WNR_SHIFT) + +/* Asynchronous Error Type */ +#define ESR_ELx_IDS_SHIFT (24) +#define ESR_ELx_IDS (UL(1) << ESR_ELx_IDS_SHIFT) +#define ESR_ELx_AET_SHIFT (10) +#define ESR_ELx_AET (UL(0x7) << ESR_ELx_AET_SHIFT) + +#define ESR_ELx_AET_UC (UL(0) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEU (UL(1) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UEO (UL(2) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_UER (UL(3) << ESR_ELx_AET_SHIFT) +#define ESR_ELx_AET_CE (UL(6) << ESR_ELx_AET_SHIFT) + +/* Shared ISS field definitions for Data/Instruction aborts */ +#define ESR_ELx_SET_SHIFT (11) +#define ESR_ELx_SET_MASK (UL(3) << ESR_ELx_SET_SHIFT) +#define ESR_ELx_FnV_SHIFT (10) +#define ESR_ELx_FnV (UL(1) << ESR_ELx_FnV_SHIFT) +#define ESR_ELx_EA_SHIFT (9) +#define ESR_ELx_EA (UL(1) << ESR_ELx_EA_SHIFT) +#define ESR_ELx_S1PTW_SHIFT (7) +#define ESR_ELx_S1PTW (UL(1) << ESR_ELx_S1PTW_SHIFT) + +/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */ +#define ESR_ELx_FSC (0x3F) +#define ESR_ELx_FSC_TYPE (0x3C) +#define ESR_ELx_FSC_LEVEL (0x03) +#define ESR_ELx_FSC_EXTABT (0x10) +#define ESR_ELx_FSC_MTE (0x11) +#define ESR_ELx_FSC_SERROR (0x11) +#define ESR_ELx_FSC_ACCESS (0x08) +#define ESR_ELx_FSC_FAULT (0x04) +#define ESR_ELx_FSC_PERM (0x0C) +#define ESR_ELx_FSC_SEA_TTW(n) (0x14 + (n)) +#define ESR_ELx_FSC_SECC (0x18) +#define ESR_ELx_FSC_SECC_TTW(n) (0x1c + (n)) + +/* Status codes for individual page table levels */ +#define ESR_ELx_FSC_ACCESS_L(n) (ESR_ELx_FSC_ACCESS + (n)) +#define ESR_ELx_FSC_PERM_L(n) (ESR_ELx_FSC_PERM + (n)) + +#define ESR_ELx_FSC_FAULT_nL (0x2C) +#define ESR_ELx_FSC_FAULT_L(n) (((n) < 0 ? ESR_ELx_FSC_FAULT_nL : \ + ESR_ELx_FSC_FAULT) + (n)) + +/* ISS field definitions for Data Aborts */ +#define ESR_ELx_ISV_SHIFT (24) +#define ESR_ELx_ISV (UL(1) << ESR_ELx_ISV_SHIFT) +#define ESR_ELx_SAS_SHIFT (22) +#define ESR_ELx_SAS (UL(3) << ESR_ELx_SAS_SHIFT) +#define ESR_ELx_SSE_SHIFT (21) +#define ESR_ELx_SSE (UL(1) << ESR_ELx_SSE_SHIFT) +#define ESR_ELx_SRT_SHIFT (16) +#define ESR_ELx_SRT_MASK (UL(0x1F) << ESR_ELx_SRT_SHIFT) +#define ESR_ELx_SF_SHIFT (15) +#define ESR_ELx_SF (UL(1) << ESR_ELx_SF_SHIFT) +#define ESR_ELx_AR_SHIFT (14) +#define ESR_ELx_AR (UL(1) << ESR_ELx_AR_SHIFT) +#define ESR_ELx_CM_SHIFT (8) +#define ESR_ELx_CM (UL(1) << ESR_ELx_CM_SHIFT) + +/* ISS2 field definitions for Data Aborts */ +#define ESR_ELx_TnD_SHIFT (10) +#define ESR_ELx_TnD (UL(1) << ESR_ELx_TnD_SHIFT) +#define ESR_ELx_TagAccess_SHIFT (9) +#define ESR_ELx_TagAccess (UL(1) << ESR_ELx_TagAccess_SHIFT) +#define ESR_ELx_GCS_SHIFT (8) +#define ESR_ELx_GCS (UL(1) << ESR_ELx_GCS_SHIFT) +#define ESR_ELx_Overlay_SHIFT (6) +#define ESR_ELx_Overlay (UL(1) << ESR_ELx_Overlay_SHIFT) +#define ESR_ELx_DirtyBit_SHIFT (5) +#define ESR_ELx_DirtyBit (UL(1) << ESR_ELx_DirtyBit_SHIFT) +#define ESR_ELx_Xs_SHIFT (0) +#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0)) + +/* ISS field definitions for exceptions taken in to Hyp */ +#define ESR_ELx_FSC_ADDRSZ (0x00) +#define ESR_ELx_FSC_ADDRSZ_L(n) (ESR_ELx_FSC_ADDRSZ + (n)) +#define ESR_ELx_CV (UL(1) << 24) +#define ESR_ELx_COND_SHIFT (20) +#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT) +#define ESR_ELx_WFx_ISS_RN (UL(0x1F) << 5) +#define ESR_ELx_WFx_ISS_RV (UL(1) << 2) +#define ESR_ELx_WFx_ISS_TI (UL(3) << 0) +#define ESR_ELx_WFx_ISS_WFxT (UL(2) << 0) +#define ESR_ELx_WFx_ISS_WFI (UL(0) << 0) +#define ESR_ELx_WFx_ISS_WFE (UL(1) << 0) +#define ESR_ELx_xVC_IMM_MASK ((UL(1) << 16) - 1) + +#define DISR_EL1_IDS (UL(1) << 24) +/* + * DISR_EL1 and ESR_ELx share the bottom 13 bits, but the RES0 bits may mean + * different things in the future... + */ +#define DISR_EL1_ESR_MASK (ESR_ELx_AET | ESR_ELx_EA | ESR_ELx_FSC) + +/* ESR value templates for specific events */ +#define ESR_ELx_WFx_MASK (ESR_ELx_EC_MASK | \ + (ESR_ELx_WFx_ISS_TI & ~ESR_ELx_WFx_ISS_WFxT)) +#define ESR_ELx_WFx_WFI_VAL ((ESR_ELx_EC_WFx << ESR_ELx_EC_SHIFT) | \ + ESR_ELx_WFx_ISS_WFI) + +/* BRK instruction trap from AArch64 state */ +#define ESR_ELx_BRK64_ISS_COMMENT_MASK 0xffff + +/* ISS field definitions for System instruction traps */ +#define ESR_ELx_SYS64_ISS_RES0_SHIFT 22 +#define ESR_ELx_SYS64_ISS_RES0_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_RES0_SHIFT) +#define ESR_ELx_SYS64_ISS_DIR_MASK 0x1 +#define ESR_ELx_SYS64_ISS_DIR_READ 0x1 +#define ESR_ELx_SYS64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_SYS64_ISS_RT_SHIFT 5 +#define ESR_ELx_SYS64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_SYS64_ISS_RT_SHIFT) +#define ESR_ELx_SYS64_ISS_CRM_SHIFT 1 +#define ESR_ELx_SYS64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRM_SHIFT) +#define ESR_ELx_SYS64_ISS_CRN_SHIFT 10 +#define ESR_ELx_SYS64_ISS_CRN_MASK (UL(0xf) << ESR_ELx_SYS64_ISS_CRN_SHIFT) +#define ESR_ELx_SYS64_ISS_OP1_SHIFT 14 +#define ESR_ELx_SYS64_ISS_OP1_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP1_SHIFT) +#define ESR_ELx_SYS64_ISS_OP2_SHIFT 17 +#define ESR_ELx_SYS64_ISS_OP2_MASK (UL(0x7) << ESR_ELx_SYS64_ISS_OP2_SHIFT) +#define ESR_ELx_SYS64_ISS_OP0_SHIFT 20 +#define ESR_ELx_SYS64_ISS_OP0_MASK (UL(0x3) << ESR_ELx_SYS64_ISS_OP0_SHIFT) +#define ESR_ELx_SYS64_ISS_SYS_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_CRM_MASK) +#define ESR_ELx_SYS64_ISS_SYS_VAL(op0, op1, op2, crn, crm) \ + (((op0) << ESR_ELx_SYS64_ISS_OP0_SHIFT) | \ + ((op1) << ESR_ELx_SYS64_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_SYS64_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_SYS64_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_SYS64_ISS_CRM_SHIFT)) + +#define ESR_ELx_SYS64_ISS_SYS_OP_MASK (ESR_ELx_SYS64_ISS_SYS_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_RT(esr) \ + (((esr) & ESR_ELx_SYS64_ISS_RT_MASK) >> ESR_ELx_SYS64_ISS_RT_SHIFT) +/* + * User space cache operations have the following sysreg encoding + * in System instructions. + * op0=1, op1=3, op2=1, crn=7, crm={ 5, 10, 11, 12, 13, 14 }, WRITE (L=0) + */ +#define ESR_ELx_SYS64_ISS_CRM_DC_CIVAC 14 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVADP 13 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAP 12 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAU 11 +#define ESR_ELx_SYS64_ISS_CRM_DC_CVAC 10 +#define ESR_ELx_SYS64_ISS_CRM_IC_IVAU 5 + +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_OP2_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_EL0_CACHE_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(1, 3, 1, 7, 0) | \ + ESR_ELx_SYS64_ISS_DIR_WRITE) +/* + * User space MRS operations which are supported for emulation + * have the following sysreg encoding in System instructions. + * op0 = 3, op1= 0, crn = 0, {crm = 0, 4-7}, READ (L = 1) + */ +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_MASK (ESR_ELx_SYS64_ISS_OP0_MASK | \ + ESR_ELx_SYS64_ISS_OP1_MASK | \ + ESR_ELx_SYS64_ISS_CRN_MASK | \ + ESR_ELx_SYS64_ISS_DIR_MASK) +#define ESR_ELx_SYS64_ISS_SYS_MRS_OP_VAL \ + (ESR_ELx_SYS64_ISS_SYS_VAL(3, 0, 0, 0, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CTR ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 1, 0, 0) +#define ESR_ELx_SYS64_ISS_SYS_CTR_READ (ESR_ELx_SYS64_ISS_SYS_CTR | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCT (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 2, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTVCTSS (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 6, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define ESR_ELx_SYS64_ISS_SYS_CNTFRQ (ESR_ELx_SYS64_ISS_SYS_VAL(3, 3, 0, 14, 0) | \ + ESR_ELx_SYS64_ISS_DIR_READ) + +#define esr_sys64_to_sysreg(e) \ + sys_reg((((e) & ESR_ELx_SYS64_ISS_OP0_MASK) >> \ + ESR_ELx_SYS64_ISS_OP0_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +#define esr_cp15_to_sysreg(e) \ + sys_reg(3, \ + (((e) & ESR_ELx_SYS64_ISS_OP1_MASK) >> \ + ESR_ELx_SYS64_ISS_OP1_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRN_MASK) >> \ + ESR_ELx_SYS64_ISS_CRN_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_CRM_MASK) >> \ + ESR_ELx_SYS64_ISS_CRM_SHIFT), \ + (((e) & ESR_ELx_SYS64_ISS_OP2_MASK) >> \ + ESR_ELx_SYS64_ISS_OP2_SHIFT)) + +/* ISS field definitions for ERET/ERETAA/ERETAB trapping */ +#define ESR_ELx_ERET_ISS_ERET 0x2 +#define ESR_ELx_ERET_ISS_ERETA 0x1 + +/* + * ISS field definitions for floating-point exception traps + * (FP_EXC_32/FP_EXC_64). + * + * (The FPEXC_* constants are used instead for common bits.) + */ + +#define ESR_ELx_FP_EXC_TFV (UL(1) << 23) + +/* + * ISS field definitions for CP15 accesses + */ +#define ESR_ELx_CP15_32_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_32_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_32_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_32_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_32_ISS_RT_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_32_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRM_SHIFT) +#define ESR_ELx_CP15_32_ISS_CRN_SHIFT 10 +#define ESR_ELx_CP15_32_ISS_CRN_MASK (UL(0xf) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP1_SHIFT 14 +#define ESR_ELx_CP15_32_ISS_OP1_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_32_ISS_OP2_SHIFT 17 +#define ESR_ELx_CP15_32_ISS_OP2_MASK (UL(0x7) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) + +#define ESR_ELx_CP15_32_ISS_SYS_MASK (ESR_ELx_CP15_32_ISS_OP1_MASK | \ + ESR_ELx_CP15_32_ISS_OP2_MASK | \ + ESR_ELx_CP15_32_ISS_CRN_MASK | \ + ESR_ELx_CP15_32_ISS_CRM_MASK | \ + ESR_ELx_CP15_32_ISS_DIR_MASK) +#define ESR_ELx_CP15_32_ISS_SYS_VAL(op1, op2, crn, crm) \ + (((op1) << ESR_ELx_CP15_32_ISS_OP1_SHIFT) | \ + ((op2) << ESR_ELx_CP15_32_ISS_OP2_SHIFT) | \ + ((crn) << ESR_ELx_CP15_32_ISS_CRN_SHIFT) | \ + ((crm) << ESR_ELx_CP15_32_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_DIR_MASK 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_READ 0x1 +#define ESR_ELx_CP15_64_ISS_DIR_WRITE 0x0 + +#define ESR_ELx_CP15_64_ISS_RT_SHIFT 5 +#define ESR_ELx_CP15_64_ISS_RT_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT_SHIFT) + +#define ESR_ELx_CP15_64_ISS_RT2_SHIFT 10 +#define ESR_ELx_CP15_64_ISS_RT2_MASK (UL(0x1f) << ESR_ELx_CP15_64_ISS_RT2_SHIFT) + +#define ESR_ELx_CP15_64_ISS_OP1_SHIFT 16 +#define ESR_ELx_CP15_64_ISS_OP1_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) +#define ESR_ELx_CP15_64_ISS_CRM_SHIFT 1 +#define ESR_ELx_CP15_64_ISS_CRM_MASK (UL(0xf) << ESR_ELx_CP15_64_ISS_CRM_SHIFT) + +#define ESR_ELx_CP15_64_ISS_SYS_VAL(op1, crm) \ + (((op1) << ESR_ELx_CP15_64_ISS_OP1_SHIFT) | \ + ((crm) << ESR_ELx_CP15_64_ISS_CRM_SHIFT)) + +#define ESR_ELx_CP15_64_ISS_SYS_MASK (ESR_ELx_CP15_64_ISS_OP1_MASK | \ + ESR_ELx_CP15_64_ISS_CRM_MASK | \ + ESR_ELx_CP15_64_ISS_DIR_MASK) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCT (ESR_ELx_CP15_64_ISS_SYS_VAL(1, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_64_ISS_SYS_CNTVCTSS (ESR_ELx_CP15_64_ISS_SYS_VAL(9, 14) | \ + ESR_ELx_CP15_64_ISS_DIR_READ) + +#define ESR_ELx_CP15_32_ISS_SYS_CNTFRQ (ESR_ELx_CP15_32_ISS_SYS_VAL(0, 0, 14, 0) |\ + ESR_ELx_CP15_32_ISS_DIR_READ) + +/* + * ISS values for SME traps + */ + +#define ESR_ELx_SME_ISS_SME_DISABLED 0 +#define ESR_ELx_SME_ISS_ILL 1 +#define ESR_ELx_SME_ISS_SM_DISABLED 2 +#define ESR_ELx_SME_ISS_ZA_DISABLED 3 +#define ESR_ELx_SME_ISS_ZT_DISABLED 4 + +/* ISS field definitions for MOPS exceptions */ +#define ESR_ELx_MOPS_ISS_MEM_INST (UL(1) << 24) +#define ESR_ELx_MOPS_ISS_FROM_EPILOGUE (UL(1) << 18) +#define ESR_ELx_MOPS_ISS_WRONG_OPTION (UL(1) << 17) +#define ESR_ELx_MOPS_ISS_OPTION_A (UL(1) << 16) +#define ESR_ELx_MOPS_ISS_DESTREG(esr) (((esr) & (UL(0x1f) << 10)) >> 10) +#define ESR_ELx_MOPS_ISS_SRCREG(esr) (((esr) & (UL(0x1f) << 5)) >> 5) +#define ESR_ELx_MOPS_ISS_SIZEREG(esr) (((esr) & (UL(0x1f) << 0)) >> 0) + +#ifndef __ASSEMBLY__ +#include <asm/types.h> + +static inline unsigned long esr_brk_comment(unsigned long esr) +{ + return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK; +} + +static inline bool esr_is_data_abort(unsigned long esr) +{ + const unsigned long ec = ESR_ELx_EC(esr); + + return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR; +} + +static inline bool esr_is_cfi_brk(unsigned long esr) +{ + return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 && + (esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE; +} + +static inline bool esr_fsc_is_translation_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_FAULT_L(3)) || + (esr == ESR_ELx_FSC_FAULT_L(2)) || + (esr == ESR_ELx_FSC_FAULT_L(1)) || + (esr == ESR_ELx_FSC_FAULT_L(0)) || + (esr == ESR_ELx_FSC_FAULT_L(-1)); +} + +static inline bool esr_fsc_is_permission_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_PERM_L(3)) || + (esr == ESR_ELx_FSC_PERM_L(2)) || + (esr == ESR_ELx_FSC_PERM_L(1)) || + (esr == ESR_ELx_FSC_PERM_L(0)); +} + +static inline bool esr_fsc_is_access_flag_fault(unsigned long esr) +{ + esr = esr & ESR_ELx_FSC; + + return (esr == ESR_ELx_FSC_ACCESS_L(3)) || + (esr == ESR_ELx_FSC_ACCESS_L(2)) || + (esr == ESR_ELx_FSC_ACCESS_L(1)) || + (esr == ESR_ELx_FSC_ACCESS_L(0)); +} + +/* Indicate whether ESR.EC==0x1A is for an ERETAx instruction */ +static inline bool esr_iss_is_eretax(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERET; +} + +/* Indicate which key is used for ERETAx (false: A-Key, true: B-Key) */ +static inline bool esr_iss_is_eretab(unsigned long esr) +{ + return esr & ESR_ELx_ERET_ISS_ERETA; +} + +const char *esr_get_class_string(unsigned long esr); +#endif /* __ASSEMBLY */ + +#endif /* __ASM_ESR_H */ diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso deleted file mode 120000 index 233c7a26f6e5..000000000000 --- a/tools/arch/arm64/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/arm64/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso deleted file mode 120000 index ebda43a82db7..000000000000 --- a/tools/arch/loongarch/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/loongarch/vdso
\ No newline at end of file diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso deleted file mode 120000 index 4e676d1d1cb4..000000000000 --- a/tools/arch/powerpc/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/powerpc/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h index 05eaf6db3ad4..60345dd2cba2 100644 --- a/tools/arch/s390/include/uapi/asm/kvm.h +++ b/tools/arch/s390/include/uapi/asm/kvm.h @@ -469,7 +469,8 @@ struct kvm_s390_vm_cpu_subfunc { __u8 kdsa[16]; /* with MSA9 */ __u8 sortl[32]; /* with STFLE.150 */ __u8 dfltcc[32]; /* with STFLE.151 */ - __u8 reserved[1728]; + __u8 pfcr[16]; /* with STFLE.201 */ + __u8 reserved[1712]; }; #define KVM_S390_VM_CPU_PROCESSOR_UV_FEAT_GUEST 6 diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso deleted file mode 120000 index 6cf4c1cebdcd..000000000000 --- a/tools/arch/s390/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/s390/kernel/vdso64
\ No newline at end of file diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index dd4682857c12..23698d0f4bb4 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -472,7 +472,7 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 82c6a4d350e0..3ae84c3b8e6d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -36,6 +36,20 @@ #define EFER_FFXSR (1<<_EFER_FFXSR) #define EFER_AUTOIBRS (1<<_EFER_AUTOIBRS) +/* + * Architectural memory types that are common to MTRRs, PAT, VMX MSRs, etc. + * Most MSRs support/allow only a subset of memory types, but the values + * themselves are common across all relevant MSRs. + */ +#define X86_MEMTYPE_UC 0ull /* Uncacheable, a.k.a. Strong Uncacheable */ +#define X86_MEMTYPE_WC 1ull /* Write Combining */ +/* RESERVED 2 */ +/* RESERVED 3 */ +#define X86_MEMTYPE_WT 4ull /* Write Through */ +#define X86_MEMTYPE_WP 5ull /* Write Protected */ +#define X86_MEMTYPE_WB 6ull /* Write Back */ +#define X86_MEMTYPE_UC_MINUS 7ull /* Weak Uncacheabled (PAT only) */ + /* FRED MSRs */ #define MSR_IA32_FRED_RSP0 0x1cc /* Level 0 stack pointer */ #define MSR_IA32_FRED_RSP1 0x1cd /* Level 1 stack pointer */ @@ -247,6 +261,8 @@ #define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SBAF_BIT 8 +#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT) #define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) #define MSR_LBR_NHM_FROM 0x00000680 @@ -363,6 +379,12 @@ #define MSR_IA32_CR_PAT 0x00000277 +#define PAT_VALUE(p0, p1, p2, p3, p4, p5, p6, p7) \ + ((X86_MEMTYPE_ ## p0) | (X86_MEMTYPE_ ## p1 << 8) | \ + (X86_MEMTYPE_ ## p2 << 16) | (X86_MEMTYPE_ ## p3 << 24) | \ + (X86_MEMTYPE_ ## p4 << 32) | (X86_MEMTYPE_ ## p5 << 40) | \ + (X86_MEMTYPE_ ## p6 << 48) | (X86_MEMTYPE_ ## p7 << 56)) + #define MSR_IA32_DEBUGCTLMSR 0x000001d9 #define MSR_IA32_LASTBRANCHFROMIP 0x000001db #define MSR_IA32_LASTBRANCHTOIP 0x000001dc @@ -1157,15 +1179,6 @@ #define MSR_IA32_VMX_VMFUNC 0x00000491 #define MSR_IA32_VMX_PROCBASED_CTLS3 0x00000492 -/* VMX_BASIC bits and bitmasks */ -#define VMX_BASIC_VMCS_SIZE_SHIFT 32 -#define VMX_BASIC_TRUE_CTLS (1ULL << 55) -#define VMX_BASIC_64 0x0001000000000000LLU -#define VMX_BASIC_MEM_TYPE_SHIFT 50 -#define VMX_BASIC_MEM_TYPE_MASK 0x003c000000000000LLU -#define VMX_BASIC_MEM_TYPE_WB 6LLU -#define VMX_BASIC_INOUT 0x0040000000000000LLU - /* Resctrl MSRs: */ /* - Intel: */ #define MSR_IA32_L3_QOS_CFG 0xc81 @@ -1183,11 +1196,6 @@ #define MSR_IA32_SMBA_BW_BASE 0xc0000280 #define MSR_IA32_EVT_CFG_BASE 0xc0000400 -/* MSR_IA32_VMX_MISC bits */ -#define MSR_IA32_VMX_MISC_INTEL_PT (1ULL << 14) -#define MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS (1ULL << 29) -#define MSR_IA32_VMX_MISC_PREEMPTION_TIMER_SCALE 0x1F - /* AMD-V MSRs */ #define MSR_VM_CR 0xc0010114 #define MSR_VM_IGNNE 0xc0010115 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index bf57a824f722..a8debbf2f702 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -439,6 +439,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) +#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/tools/arch/x86/include/uapi/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 9de35df1afc3..63182a023e9d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 132 #endif +#ifndef __NR_capget +#define __NR_capget 184 +#endif #ifndef __NR_gettid #define __NR_gettid 224 #endif diff --git a/tools/arch/x86/include/uapi/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index d0f2043d7132..77311e8d1b5d 100644 --- a/tools/arch/x86/include/uapi/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -11,6 +11,9 @@ #ifndef __NR_getpgid #define __NR_getpgid 121 #endif +#ifndef __NR_capget +#define __NR_capget 125 +#endif #ifndef __NR_gettid #define __NR_gettid 186 #endif diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index a43b37346a22..ab5cdc3337da 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -13,7 +13,7 @@ #endif #include "../include/asm/inat.h" /* __ignore_sync_check__ */ #include "../include/asm/insn.h" /* __ignore_sync_check__ */ -#include "../include/asm-generic/unaligned.h" /* __ignore_sync_check__ */ +#include "../include/linux/unaligned.h" /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso deleted file mode 120000 index 7eb962fd3454..000000000000 --- a/tools/arch/x86/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/x86/entry/vdso/
\ No newline at end of file diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index a90a5d110f92..1baee9e2aba9 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -210,7 +210,7 @@ static uint8_t *get_last_jit_image(char *haystack, size_t hlen, return NULL; } if (proglen > 1000000) { - printf("proglen of %d too big, stopping\n", proglen); + printf("proglen of %u too big, stopping\n", proglen); return NULL; } diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index ba927379eb20..a4263dfb5e03 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -147,7 +147,11 @@ ifeq ($(feature-llvm),1) # If LLVM is available, use it for JIT disassembly CFLAGS += -DHAVE_LLVM_SUPPORT LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets - CFLAGS += $(shell $(LLVM_CONFIG) --cflags) + # llvm-config always adds -D_GNU_SOURCE, however, it may already be in CFLAGS + # (e.g. when bpftool build is called from selftests build as selftests + # Makefile includes lib.mk which sets -D_GNU_SOURCE) which would cause + # compilation error due to redefinition. Let's filter it out here. + CFLAGS += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags)) LIBS += $(shell $(LLVM_CONFIG) --libs $(LLVM_CONFIG_LIB_COMPONENTS)) ifeq ($(shell $(LLVM_CONFIG) --shared-mode),static) LIBS += $(shell $(LLVM_CONFIG) --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)) diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 7d2af1ff3c8d..d005e4fd6128 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -1,11 +1,15 @@ // SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) /* Copyright (C) 2019 Facebook */ +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif #include <errno.h> #include <fcntl.h> #include <linux/err.h> #include <stdbool.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> #include <unistd.h> #include <linux/btf.h> @@ -21,6 +25,7 @@ #include "main.h" #define KFUNC_DECL_TAG "bpf_kfunc" +#define FASTCALL_DECL_TAG "bpf_fastcall" static const char * const btf_kind_str[NR_BTF_KINDS] = { [BTF_KIND_UNKN] = "UNKNOWN", @@ -284,7 +289,7 @@ static int dump_btf_type(const struct btf *btf, __u32 id, } else { if (btf_kflag(t)) printf("\n\t'%s' val=%lldLL", name, - (unsigned long long)val); + (long long)val); else printf("\n\t'%s' val=%lluULL", name, (unsigned long long)val); @@ -464,19 +469,59 @@ static int dump_btf_raw(const struct btf *btf, return 0; } +struct ptr_array { + __u32 cnt; + __u32 cap; + const void **elems; +}; + +static int ptr_array_push(const void *ptr, struct ptr_array *arr) +{ + __u32 new_cap; + void *tmp; + + if (arr->cnt == arr->cap) { + new_cap = (arr->cap ?: 16) * 2; + tmp = realloc(arr->elems, sizeof(*arr->elems) * new_cap); + if (!tmp) + return -ENOMEM; + arr->elems = tmp; + arr->cap = new_cap; + } + arr->elems[arr->cnt++] = ptr; + return 0; +} + +static void ptr_array_free(struct ptr_array *arr) +{ + free(arr->elems); +} + +static int cmp_kfuncs(const void *pa, const void *pb, void *ctx) +{ + struct btf *btf = ctx; + const struct btf_type *a = *(void **)pa; + const struct btf_type *b = *(void **)pb; + + return strcmp(btf__str_by_offset(btf, a->name_off), + btf__str_by_offset(btf, b->name_off)); +} + static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) { LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts); - int cnt = btf__type_cnt(btf); - int i; + __u32 cnt = btf__type_cnt(btf), i, j; + struct ptr_array fastcalls = {}; + struct ptr_array kfuncs = {}; + int err = 0; printf("\n/* BPF kfuncs */\n"); printf("#ifndef BPF_NO_KFUNC_PROTOTYPES\n"); for (i = 1; i < cnt; i++) { const struct btf_type *t = btf__type_by_id(btf, i); + const struct btf_type *ft; const char *name; - int err; if (!btf_is_decl_tag(t)) continue; @@ -484,27 +529,53 @@ static int dump_btf_kfuncs(struct btf_dump *d, const struct btf *btf) if (btf_decl_tag(t)->component_idx != -1) continue; - name = btf__name_by_offset(btf, t->name_off); - if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG))) + ft = btf__type_by_id(btf, t->type); + if (!btf_is_func(ft)) continue; - t = btf__type_by_id(btf, t->type); - if (!btf_is_func(t)) - continue; + name = btf__name_by_offset(btf, t->name_off); + if (strncmp(name, KFUNC_DECL_TAG, sizeof(KFUNC_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &kfuncs); + if (err) + goto out; + } + + if (strncmp(name, FASTCALL_DECL_TAG, sizeof(FASTCALL_DECL_TAG)) == 0) { + err = ptr_array_push(ft, &fastcalls); + if (err) + goto out; + } + } + + /* Sort kfuncs by name for improved vmlinux.h stability */ + qsort_r(kfuncs.elems, kfuncs.cnt, sizeof(*kfuncs.elems), cmp_kfuncs, (void *)btf); + for (i = 0; i < kfuncs.cnt; i++) { + const struct btf_type *t = kfuncs.elems[i]; printf("extern "); + /* Assume small amount of fastcall kfuncs */ + for (j = 0; j < fastcalls.cnt; j++) { + if (fastcalls.elems[j] == t) { + printf("__bpf_fastcall "); + break; + } + } + opts.field_name = btf__name_by_offset(btf, t->name_off); err = btf_dump__emit_type_decl(d, t->type, &opts); if (err) - return err; + goto out; printf(" __weak __ksym;\n"); } printf("#endif\n\n"); - return 0; +out: + ptr_array_free(&fastcalls); + ptr_array_free(&kfuncs); + return err; } static void __printf(2, 0) btf_dump_printf(void *ctx, @@ -718,6 +789,13 @@ static int dump_btf_c(const struct btf *btf, printf("#ifndef __weak\n"); printf("#define __weak __attribute__((weak))\n"); printf("#endif\n\n"); + printf("#ifndef __bpf_fastcall\n"); + printf("#if __has_attribute(bpf_fastcall)\n"); + printf("#define __bpf_fastcall __attribute__((bpf_fastcall))\n"); + printf("#else\n"); + printf("#define __bpf_fastcall\n"); + printf("#endif\n"); + printf("#endif\n\n"); if (root_type_cnt) { for (i = 0; i < root_type_cnt; i++) { diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c index 7b8d9ec89ebd..c032d2c6ab6d 100644 --- a/tools/bpf/bpftool/jit_disasm.c +++ b/tools/bpf/bpftool/jit_disasm.c @@ -80,7 +80,8 @@ symbol_lookup_callback(__maybe_unused void *disasm_info, static int init_context(disasm_ctx_t *ctx, const char *arch, __maybe_unused const char *disassembler_options, - __maybe_unused unsigned char *image, __maybe_unused ssize_t len) + __maybe_unused unsigned char *image, __maybe_unused ssize_t len, + __maybe_unused __u64 func_ksym) { char *triple; @@ -109,12 +110,13 @@ static void destroy_context(disasm_ctx_t *ctx) } static int -disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc) +disassemble_insn(disasm_ctx_t *ctx, unsigned char *image, ssize_t len, int pc, + __u64 func_ksym) { char buf[256]; int count; - count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, pc, + count = LLVMDisasmInstruction(*ctx, image + pc, len - pc, func_ksym + pc, buf, sizeof(buf)); if (json_output) printf_json(buf); @@ -136,8 +138,21 @@ int disasm_init(void) #ifdef HAVE_LIBBFD_SUPPORT #define DISASM_SPACER "\t" +struct disasm_info { + struct disassemble_info info; + __u64 func_ksym; +}; + +static void disasm_print_addr(bfd_vma addr, struct disassemble_info *info) +{ + struct disasm_info *dinfo = container_of(info, struct disasm_info, info); + + addr += dinfo->func_ksym; + generic_print_address(addr, info); +} + typedef struct { - struct disassemble_info *info; + struct disasm_info *info; disassembler_ftype disassemble; bfd *bfdf; } disasm_ctx_t; @@ -215,7 +230,7 @@ static int fprintf_json_styled(void *out, static int init_context(disasm_ctx_t *ctx, const char *arch, const char *disassembler_options, - unsigned char *image, ssize_t len) + unsigned char *image, ssize_t len, __u64 func_ksym) { struct disassemble_info *info; char tpath[PATH_MAX]; @@ -238,12 +253,13 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, } bfdf = ctx->bfdf; - ctx->info = malloc(sizeof(struct disassemble_info)); + ctx->info = malloc(sizeof(struct disasm_info)); if (!ctx->info) { p_err("mem alloc failed"); goto err_close; } - info = ctx->info; + ctx->info->func_ksym = func_ksym; + info = &ctx->info->info; if (json_output) init_disassemble_info_compat(info, stdout, @@ -272,6 +288,7 @@ static int init_context(disasm_ctx_t *ctx, const char *arch, info->disassembler_options = disassembler_options; info->buffer = image; info->buffer_length = len; + info->print_address_func = disasm_print_addr; disassemble_init_for_target(info); @@ -304,9 +321,10 @@ static void destroy_context(disasm_ctx_t *ctx) static int disassemble_insn(disasm_ctx_t *ctx, __maybe_unused unsigned char *image, - __maybe_unused ssize_t len, int pc) + __maybe_unused ssize_t len, int pc, + __maybe_unused __u64 func_ksym) { - return ctx->disassemble(pc, ctx->info); + return ctx->disassemble(pc, &ctx->info->info); } int disasm_init(void) @@ -331,7 +349,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, if (!len) return -1; - if (init_context(&ctx, arch, disassembler_options, image, len)) + if (init_context(&ctx, arch, disassembler_options, image, len, func_ksym)) return -1; if (json_output) @@ -360,7 +378,7 @@ int disasm_print_insn(unsigned char *image, ssize_t len, int opcodes, printf("%4x:" DISASM_SPACER, pc); } - count = disassemble_insn(&ctx, image, len, pc); + count = disassemble_insn(&ctx, image, len, pc, func_ksym); if (json_output) { /* Operand array, was started in fprintf_json. Before diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index 9b898571b49e..23f488cf1740 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -54,6 +54,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[refs->ref_cnt]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt++; return; @@ -77,6 +78,7 @@ static void add_ref(struct hashmap *map, struct pid_iter_entry *e) ref = &refs->refs[0]; ref->pid = e->pid; memcpy(ref->comm, e->comm, sizeof(ref->comm)); + ref->comm[sizeof(ref->comm) - 1] = '\0'; refs->ref_cnt = 1; refs->has_bpf_cookie = e->has_bpf_cookie; refs->bpf_cookie = e->bpf_cookie; diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c index d54aaa0619df..bd9f960bce3d 100644 --- a/tools/bpf/resolve_btfids/main.c +++ b/tools/bpf/resolve_btfids/main.c @@ -679,8 +679,8 @@ static int sets_patch(struct object *obj) next = rb_first(&obj->sets); while (next) { - struct btf_id_set8 *set8; - struct btf_id_set *set; + struct btf_id_set8 *set8 = NULL; + struct btf_id_set *set = NULL; unsigned long addr, off; struct btf_id *id; diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c index 9a5c1f008fe6..fced54a3adf6 100644 --- a/tools/bpf/runqslower/runqslower.bpf.c +++ b/tools/bpf/runqslower/runqslower.bpf.c @@ -70,7 +70,6 @@ int handle__sched_switch(u64 *ctx) struct task_struct *next = (struct task_struct *)ctx[2]; struct runq_event event = {}; u64 *tsp, delta_us; - long state; u32 pid; /* ivcsw: treat like an enqueue event and store timestamp */ diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index ffd117135094..bca47d136f05 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -30,9 +30,7 @@ endef # FEATURE_TESTS_BASIC := \ backtrace \ - dwarf \ - dwarf_getlocations \ - dwarf_getcfi \ + libdw \ eventfd \ fortify-source \ get_current_dir_name \ @@ -53,6 +51,7 @@ FEATURE_TESTS_BASIC := \ libslang-include-subdir \ libtraceevent \ libtracefs \ + libcpupower \ libcrypto \ libunwind \ pthread-attr-setaffinity-np \ @@ -60,7 +59,6 @@ FEATURE_TESTS_BASIC := \ reallocarray \ stackprotector-all \ timerfd \ - libdw-dwarf-unwind \ zlib \ lzma \ get_cpuid \ @@ -120,8 +118,7 @@ ifeq ($(FEATURE_TESTS),all) endif FEATURE_DISPLAY ?= \ - dwarf \ - dwarf_getlocations \ + libdw \ glibc \ libbfd \ libbfd-buildid \ @@ -133,7 +130,6 @@ FEATURE_DISPLAY ?= \ libpython \ libcrypto \ libunwind \ - libdw-dwarf-unwind \ libcapstone \ llvm-perf \ zlib \ @@ -233,7 +229,7 @@ endef # # generates feature value assignment for name, like: -# $(call feature_assign,dwarf) == feature-dwarf=1 +# $(call feature_assign,libdw) == feature-libdw=1 # feature_assign = feature-$(1)=$(feature-$(1)) diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 5938cf799dc6..043dfd00fce7 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -5,9 +5,7 @@ FILES= \ test-all.bin \ test-backtrace.bin \ test-bionic.bin \ - test-dwarf.bin \ - test-dwarf_getlocations.bin \ - test-dwarf_getcfi.bin \ + test-libdw.bin \ test-eventfd.bin \ test-fortify-source.bin \ test-get_current_dir_name.bin \ @@ -38,6 +36,7 @@ FILES= \ test-libslang.bin \ test-libslang-include-subdir.bin \ test-libtraceevent.bin \ + test-libcpupower.bin \ test-libtracefs.bin \ test-libcrypto.bin \ test-libunwind.bin \ @@ -52,7 +51,6 @@ FILES= \ test-pthread-barrier.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ - test-libdw-dwarf-unwind.bin \ test-libbabeltrace.bin \ test-libcapstone.bin \ test-compile-32.bin \ @@ -168,29 +166,26 @@ $(OUTPUT)test-libopencsd.bin: $(BUILD) # -lopencsd_c_api -lopencsd provided by # $(FEATURE_CHECK_LDFLAGS-libopencsd) -DWARFLIBS := -ldw +DWLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + DWLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) # Elfutils merged libebl.a into libdw.a starting from version 0.177, # Link libebl.a only if libdw is older than this version. ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) - DWARFLIBS += -lebl + DWLIBS += -lebl endif -endif - -$(OUTPUT)test-dwarf.bin: - $(BUILD) $(DWARFLIBS) -$(OUTPUT)test-dwarf_getlocations.bin: - $(BUILD) $(DWARFLIBS) + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl +endif -$(OUTPUT)test-dwarf_getcfi.bin: - $(BUILD) $(DWARFLIBS) +$(OUTPUT)test-libdw.bin: + $(BUILD) $(DWLIBS) $(OUTPUT)test-libelf-getphdrnum.bin: $(BUILD) -lelf @@ -245,6 +240,9 @@ $(OUTPUT)test-libslang-include-subdir.bin: $(OUTPUT)test-libtraceevent.bin: $(BUILD) -ltraceevent +$(OUTPUT)test-libcpupower.bin: + $(BUILD) -lcpupower + $(OUTPUT)test-libtracefs.bin: $(BUILD) $(shell $(PKG_CONFIG) --cflags libtracefs 2>/dev/null) -ltracefs @@ -314,9 +312,6 @@ $(OUTPUT)test-backtrace.bin: $(OUTPUT)test-timerfd.bin: $(BUILD) -$(OUTPUT)test-libdw-dwarf-unwind.bin: - $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) - $(OUTPUT)test-libbabeltrace.bin: $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 6f4bf386a3b5..59ef3d7fe6a4 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -38,12 +38,8 @@ # include "test-glibc.c" #undef main -#define main main_test_dwarf -# include "test-dwarf.c" -#undef main - -#define main main_test_dwarf_getlocations -# include "test-dwarf_getlocations.c" +#define main main_test_libdw +# include "test-libdw.c" #undef main #define main main_test_eventfd @@ -98,10 +94,6 @@ # include "test-stackprotector-all.c" #undef main -#define main main_test_libdw_dwarf_unwind -# include "test-libdw-dwarf-unwind.c" -#undef main - #define main main_test_zlib # include "test-zlib.c" #undef main @@ -187,8 +179,7 @@ int main(int argc, char *argv[]) main_test_get_current_dir_name(); main_test_gettid(); main_test_glibc(); - main_test_dwarf(); - main_test_dwarf_getlocations(); + main_test_libdw(); main_test_eventfd(); main_test_libelf_getphdrnum(); main_test_libelf_gelf_getnote(); @@ -202,7 +193,6 @@ int main(int argc, char *argv[]) main_test_numa_num_possible_cpus(); main_test_timerfd(); main_test_stackprotector_all(); - main_test_libdw_dwarf_unwind(); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); main_test_pthread_barrier(); diff --git a/tools/build/feature/test-dwarf.c b/tools/build/feature/test-dwarf.c deleted file mode 100644 index 8d474bd7371b..000000000000 --- a/tools/build/feature/test-dwarf.c +++ /dev/null @@ -1,11 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <dwarf.h> -#include <elfutils/libdw.h> -#include <elfutils/version.h> - -int main(void) -{ - Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); - - return (long)dbg; -} diff --git a/tools/build/feature/test-dwarf_getcfi.c b/tools/build/feature/test-dwarf_getcfi.c deleted file mode 100644 index 50e7d7cb7bdf..000000000000 --- a/tools/build/feature/test-dwarf_getcfi.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf *dwarf = NULL; - return dwarf_getcfi(dwarf) == NULL; -} diff --git a/tools/build/feature/test-dwarf_getlocations.c b/tools/build/feature/test-dwarf_getlocations.c deleted file mode 100644 index 78fb4a1fa68c..000000000000 --- a/tools/build/feature/test-dwarf_getlocations.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <stdlib.h> -#include <elfutils/libdw.h> - -int main(void) -{ - Dwarf_Addr base, start, end; - Dwarf_Attribute attr; - Dwarf_Op *op; - size_t nops; - ptrdiff_t offset = 0; - return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); -} diff --git a/tools/build/feature/test-libcpupower.c b/tools/build/feature/test-libcpupower.c new file mode 100644 index 000000000000..a346aa332a71 --- /dev/null +++ b/tools/build/feature/test-libcpupower.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <cpuidle.h> + +int main(void) +{ + int rv = cpuidle_state_count(0); + return rv; +} diff --git a/tools/build/feature/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c deleted file mode 100644 index ed03d9505609..000000000000 --- a/tools/build/feature/test-libdw-dwarf-unwind.c +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <elfutils/libdwfl.h> - -int main(void) -{ - /* - * This function is guarded via: __nonnull_attribute__ (1, 2). - * Passing '1' as arguments value. This code is never executed, - * only compiled. - */ - dwfl_thread_getframes((void *) 1, (void *) 1, NULL); - return 0; -} diff --git a/tools/build/feature/test-libdw.c b/tools/build/feature/test-libdw.c new file mode 100644 index 000000000000..2fb59479ab77 --- /dev/null +++ b/tools/build/feature/test-libdw.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdlib.h> +#include <dwarf.h> +#include <elfutils/libdw.h> +#include <elfutils/libdwfl.h> +#include <elfutils/version.h> + +int test_libdw(void) +{ + Dwarf *dbg = dwarf_begin(0, DWARF_C_READ); + + return (long)dbg; +} + +int test_libdw_unwind(void) +{ + /* + * This function is guarded via: __nonnull_attribute__ (1, 2). + * Passing '1' as arguments value. This code is never executed, + * only compiled. + */ + dwfl_thread_getframes((void *) 1, (void *) 1, NULL); + return 0; +} + +int test_libdw_getlocations(void) +{ + Dwarf_Addr base, start, end; + Dwarf_Attribute attr; + Dwarf_Op *op; + size_t nops; + ptrdiff_t offset = 0; + + return (int)dwarf_getlocations(&attr, offset, &base, &start, &end, &op, &nops); +} + +int test_libdw_getcfi(void) +{ + Dwarf *dwarf = NULL; + + return dwarf_getcfi(dwarf) == NULL; +} + +int test_elfutils(void) +{ + Dwarf_CFI *cfi = NULL; + + dwarf_cfi_end(cfi); + return 0; +} + +int main(void) +{ + return test_libdw() + test_libdw_unwind() + test_libdw_getlocations() + + test_libdw_getcfi() + test_elfutils(); +} diff --git a/tools/build/feature/test-libtraceevent.c b/tools/build/feature/test-libtraceevent.c index 416b11ffd4b4..804ad80dbbd9 100644 --- a/tools/build/feature/test-libtraceevent.c +++ b/tools/build/feature/test-libtraceevent.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include <traceevent/trace-seq.h> +#include <trace-seq.h> int main(void) { diff --git a/tools/firewire/decode-fcp.c b/tools/firewire/decode-fcp.c index b67ebc88434d..f115a3be8d1e 100644 --- a/tools/firewire/decode-fcp.c +++ b/tools/firewire/decode-fcp.c @@ -160,7 +160,7 @@ decode_avc(struct link_transaction *t) name = info->name; } - printf("av/c %s, subunit_type=%s, subunit_id=%d, opcode=%s", + printf("av/c %s, subunit_type=%s, subunit_id=%u, opcode=%s", ctype_names[frame->ctype], subunit_type_names[frame->subunit_type], frame->subunit_id, name); diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c index 156e0356e814..9a906de3a9ef 100644 --- a/tools/firewire/nosy-dump.c +++ b/tools/firewire/nosy-dump.c @@ -771,7 +771,7 @@ print_packet(uint32_t *data, size_t length) if (pp->phy_config.set_root) printf(" set_root_id=%02x", pp->phy_config.root_id); if (pp->phy_config.set_gap_count) - printf(" set_gap_count=%d", pp->phy_config.gap_count); + printf(" set_gap_count=%u", pp->phy_config.gap_count); } break; @@ -781,13 +781,13 @@ print_packet(uint32_t *data, size_t length) case PHY_PACKET_SELF_ID: if (pp->self_id.extended) { - printf("extended self id: phy_id=%02x, seq=%d", + printf("extended self id: phy_id=%02x, seq=%u", pp->ext_self_id.phy_id, pp->ext_self_id.sequence); } else { static const char * const speed_names[] = { "S100", "S200", "S400", "BETA" }; - printf("self id: phy_id=%02x, link %s, gap_count=%d, speed=%s%s%s", + printf("self id: phy_id=%02x, link %s, gap_count=%u speed=%s%s%s", pp->self_id.phy_id, (pp->self_id.link_active ? "active" : "not active"), pp->self_id.gap_count, diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c index 5dee2b98ab60..b70813b0bf8e 100644 --- a/tools/gpio/gpio-event-mon.c +++ b/tools/gpio/gpio-event-mon.c @@ -69,14 +69,14 @@ int monitor_device(const char *device_name, } if (num_lines == 1) { - fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name); + fprintf(stdout, "Monitoring line %u on %s\n", lines[0], device_name); fprintf(stdout, "Initial line value: %d\n", gpiotools_test_bit(values.bits, 0)); } else { - fprintf(stdout, "Monitoring lines %d", lines[0]); + fprintf(stdout, "Monitoring lines %u", lines[0]); for (i = 1; i < num_lines - 1; i++) - fprintf(stdout, ", %d", lines[i]); - fprintf(stdout, " and %d on %s\n", lines[i], device_name); + fprintf(stdout, ", %u", lines[i]); + fprintf(stdout, " and %u on %s\n", lines[i], device_name); fprintf(stdout, "Initial line values: %d", gpiotools_test_bit(values.bits, 0)); for (i = 1; i < num_lines - 1; i++) diff --git a/tools/gpio/gpio-sloppy-logic-analyzer.sh b/tools/gpio/gpio-sloppy-logic-analyzer.sh index ed21a110df5e..3ef2278e49f9 100755 --- a/tools/gpio/gpio-sloppy-logic-analyzer.sh +++ b/tools/gpio/gpio-sloppy-logic-analyzer.sh @@ -113,7 +113,7 @@ init_cpu() taskset -p "$newmask" "$p" || continue done 2>/dev/null >/dev/null - # Big hammer! Working with 'rcu_momentary_dyntick_idle()' for a more fine-grained solution + # Big hammer! Working with 'rcu_momentary_eqs()' for a more fine-grained solution # still printed warnings. Same for re-enabling the stall detector after sampling. echo 1 > /sys/module/rcupdate/parameters/rcu_cpu_stall_suppress diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 0eb24d21aac2..60044b608817 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -36,4 +36,19 @@ #define GENMASK_ULL(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __GENMASK_U128() depends on _BIT128() which would not work + * in the asm code, as it shifts an 'unsigned __init128' data + * type instead of direct representation of 128 bit constants + * such as long and unsigned long. The fundamental problem is + * that a 128 bit constant will get silently truncated by the + * gcc compiler. + */ +#define GENMASK_U128(h, l) \ + (GENMASK_INPUT_CHECK(h, l) + __GENMASK_U128(h, l)) +#endif + #endif /* __LINUX_BITS_H */ diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 62e7c901ac28..e20f98e14e81 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_LINUX_COMPILER_H_ -#error "Please don't include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." +#error "Please do not include <linux/compiler-gcc.h> directly, include <linux/compiler.h> instead." #endif /* diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/linux/unaligned.h index cdd2fd078027..395a4464fe73 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_GENERIC_UNALIGNED_H -#define __ASM_GENERIC_UNALIGNED_H +#ifndef __LINUX_UNALIGNED_H +#define __LINUX_UNALIGNED_H /* * This is the most generic implementation of unaligned accesses @@ -9,16 +9,7 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wpacked" #pragma GCC diagnostic ignored "-Wattributes" - -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x; \ -}) - -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ - __pptr->x = (val); \ -} while (0) +#include <vdso/unaligned.h> #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) #define put_unaligned(val, ptr) __put_unaligned_t(typeof(*(ptr)), (val), (ptr)) @@ -154,4 +145,4 @@ static inline u64 get_unaligned_be48(const void *p) } #pragma GCC diagnostic pop -#endif /* __ASM_GENERIC_UNALIGNED_H */ +#endif /* __LINUX_UNALIGNED_H */ diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 2ec13d8b9a2d..f9ab83a219b8 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -10,6 +10,7 @@ #include "compiler.h" #include "crt.h" +#include "std.h" /* Syscalls for s390: * - registers are 64-bit diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index 9bc6a706a332..fa1f547e7f13 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -32,4 +32,10 @@ # define __no_stack_protector __attribute__((__optimize__("-fno-stack-protector"))) #endif /* __nolibc_has_attribute(no_stack_protector) */ +#if __nolibc_has_attribute(fallthrough) +# define __nolibc_fallthrough do { } while (0); __attribute__((fallthrough)) +#else +# define __nolibc_fallthrough do { } while (0) +#endif /* __nolibc_has_attribute(fallthrough) */ + #endif /* _NOLIBC_COMPILER_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index c968dbbc4ef8..3892034198dd 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -15,6 +15,7 @@ #include "stdarg.h" #include "stdlib.h" #include "string.h" +#include "compiler.h" #ifndef EOF #define EOF (-1) @@ -264,7 +265,7 @@ int vfprintf(FILE *stream, const char *fmt, va_list args) case 'p': *(out++) = '0'; *(out++) = 'x'; - /* fall through */ + __nolibc_fallthrough; default: /* 'x' and 'p' above */ u64toh_r(v, out); break; diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 6ce1f1ceb432..1ea2c4c33b86 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -79,6 +79,9 @@ #define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */ +#define MADV_GUARD_INSTALL 102 /* fatal signal on access to range */ +#define MADV_GUARD_REMOVE 103 /* unguard range */ + /* compatibility flags */ #define MAP_FILE 0 diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h index 54d9c8bf7c55..281df9139d2b 100644 --- a/tools/include/uapi/asm-generic/socket.h +++ b/tools/include/uapi/asm-generic/socket.h @@ -124,6 +124,8 @@ #define SO_PASSPIDFD 76 #define SO_PEERPIDFD 77 +#define SCM_TS_OPT_ID 78 + #if !defined(__KERNEL__) #if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__)) diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 3c2a101986a3..5ee30f882736 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -12,4 +12,7 @@ (((~_ULL(0)) - (_ULL(1) << (l)) + 1) & \ (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_U128(h, l) \ + ((_BIT128((h)) << 1) - (_BIT128(l))) + #endif /* _UAPI_LINUX_BITS_H */ diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 1fb3cb2636e6..4162afc6b5d0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1116,11 +1116,15 @@ enum bpf_attach_type { BPF_NETKIT_PRIMARY, BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, + BPF_TRACE_UPROBE_SESSION, __MAX_BPF_ATTACH_TYPE }; #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +/* Add BPF_LINK_TYPE(type, name) in bpf_types.h to keep bpf_link_type_strs[] + * in sync with the definitions below. + */ enum bpf_link_type { BPF_LINK_TYPE_UNSPEC = 0, BPF_LINK_TYPE_RAW_TRACEPOINT = 1, @@ -1970,6 +1974,8 @@ union bpf_attr { * program. * Return * The SMP id of the processor running the program. + * Attributes + * __bpf_fastcall * * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description @@ -3101,10 +3107,6 @@ union bpf_attr { * with the **CONFIG_BPF_KPROBE_OVERRIDE** configuration * option, and in this case it only works on functions tagged with * **ALLOW_ERROR_INJECTION** in the kernel code. - * - * Also, the helper is only available for the architectures having - * the CONFIG_FUNCTION_ERROR_INJECTION option. As of this writing, - * x86 architecture is the only one to support this feature. * Return * 0 * @@ -5369,7 +5371,7 @@ union bpf_attr { * Currently, the **flags** must be 0. Currently, nr_loops is * limited to 1 << 23 (~8 million) loops. * - * long (\*callback_fn)(u32 index, void \*ctx); + * long (\*callback_fn)(u64 index, void \*ctx); * * where **index** is the current index in the loop. The index * is zero-indexed. @@ -5519,11 +5521,12 @@ union bpf_attr { * **-EOPNOTSUPP** if the hash calculation failed or **-EINVAL** if * invalid arguments are passed. * - * void *bpf_kptr_xchg(void *map_value, void *ptr) + * void *bpf_kptr_xchg(void *dst, void *ptr) * Description - * Exchange kptr at pointer *map_value* with *ptr*, and return the - * old value. *ptr* can be NULL, otherwise it must be a referenced - * pointer which will be released when this helper is called. + * Exchange kptr at pointer *dst* with *ptr*, and return the old value. + * *dst* can be map value or local kptr. *ptr* can be NULL, otherwise + * it must be a referenced pointer which will be released when this helper + * is called. * Return * The old value of kptr (which can be NULL). The returned pointer * if not NULL, is a reference which must be released using its @@ -6046,11 +6049,6 @@ enum { BPF_F_MARK_ENFORCE = (1ULL << 6), }; -/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -enum { - BPF_F_INGRESS = (1ULL << 0), -}; - /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ enum { BPF_F_TUNINFO_IPV6 = (1ULL << 0), @@ -6197,10 +6195,12 @@ enum { BPF_F_BPRM_SECUREEXEC = (1ULL << 0), }; -/* Flags for bpf_redirect_map helper */ +/* Flags for bpf_redirect and bpf_redirect_map helpers */ enum { - BPF_F_BROADCAST = (1ULL << 3), - BPF_F_EXCLUDE_INGRESS = (1ULL << 4), + BPF_F_INGRESS = (1ULL << 0), /* used for skb path */ + BPF_F_BROADCAST = (1ULL << 3), /* used for XDP path */ + BPF_F_EXCLUDE_INGRESS = (1ULL << 4), /* used for XDP path */ +#define BPF_F_REDIRECT_FLAGS (BPF_F_INGRESS | BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) }; #define __bpf_md_ptr(type, name) \ diff --git a/tools/include/uapi/linux/const.h b/tools/include/uapi/linux/const.h index a429381e7ca5..e16be0d37746 100644 --- a/tools/include/uapi/linux/const.h +++ b/tools/include/uapi/linux/const.h @@ -28,6 +28,23 @@ #define _BITUL(x) (_UL(1) << (x)) #define _BITULL(x) (_ULL(1) << (x)) +#if !defined(__ASSEMBLY__) +/* + * Missing asm support + * + * __BIT128() would not work in the asm code, as it shifts an + * 'unsigned __init128' data type as direct representation of + * 128 bit constants is not supported in the gcc compiler, as + * they get silently truncated. + * + * TODO: Please revisit this implementation when gcc compiler + * starts representing 128 bit constants directly like long + * and unsigned long etc. Subsequently drop the comment for + * GENMASK_U128() which would then start supporting asm code. + */ +#define _BIT128(x) ((unsigned __int128)(1) << (x)) +#endif + #define __ALIGN_KERNEL(x, a) __ALIGN_KERNEL_MASK(x, (__typeof__(x))(a) - 1) #define __ALIGN_KERNEL_MASK(x, mask) (((x) + (mask)) & ~(mask)) diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index f0d71b2a3f1e..8516c1ccd57a 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -377,6 +377,7 @@ enum { IFLA_GSO_IPV4_MAX_SIZE, IFLA_GRO_IPV4_MAX_SIZE, IFLA_DPLL_PIN, + IFLA_MAX_PACING_OFFLOAD_HORIZON, __IFLA_MAX }; @@ -461,6 +462,286 @@ enum in6_addr_gen_mode { /* Bridge section */ +/** + * DOC: Bridge enum definition + * + * Please *note* that the timer values in the following section are expected + * in clock_t format, which is seconds multiplied by USER_HZ (generally + * defined as 100). + * + * @IFLA_BR_FORWARD_DELAY + * The bridge forwarding delay is the time spent in LISTENING state + * (before moving to LEARNING) and in LEARNING state (before moving + * to FORWARDING). Only relevant if STP is enabled. + * + * The valid values are between (2 * USER_HZ) and (30 * USER_HZ). + * The default value is (15 * USER_HZ). + * + * @IFLA_BR_HELLO_TIME + * The time between hello packets sent by the bridge, when it is a root + * bridge or a designated bridge. Only relevant if STP is enabled. + * + * The valid values are between (1 * USER_HZ) and (10 * USER_HZ). + * The default value is (2 * USER_HZ). + * + * @IFLA_BR_MAX_AGE + * The hello packet timeout is the time until another bridge in the + * spanning tree is assumed to be dead, after reception of its last hello + * message. Only relevant if STP is enabled. + * + * The valid values are between (6 * USER_HZ) and (40 * USER_HZ). + * The default value is (20 * USER_HZ). + * + * @IFLA_BR_AGEING_TIME + * Configure the bridge's FDB entries aging time. It is the time a MAC + * address will be kept in the FDB after a packet has been received from + * that address. After this time has passed, entries are cleaned up. + * Allow values outside the 802.1 standard specification for special cases: + * + * * 0 - entry never ages (all permanent) + * * 1 - entry disappears (no persistence) + * + * The default value is (300 * USER_HZ). + * + * @IFLA_BR_STP_STATE + * Turn spanning tree protocol on (*IFLA_BR_STP_STATE* > 0) or off + * (*IFLA_BR_STP_STATE* == 0) for this bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_PRIORITY + * Set this bridge's spanning tree priority, used during STP root bridge + * election. + * + * The valid values are between 0 and 65535. + * + * @IFLA_BR_VLAN_FILTERING + * Turn VLAN filtering on (*IFLA_BR_VLAN_FILTERING* > 0) or off + * (*IFLA_BR_VLAN_FILTERING* == 0). When disabled, the bridge will not + * consider the VLAN tag when handling packets. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_PROTOCOL + * Set the protocol used for VLAN filtering. + * + * The valid values are 0x8100(802.1Q) or 0x88A8(802.1AD). The default value + * is 0x8100(802.1Q). + * + * @IFLA_BR_GROUP_FWD_MASK + * The group forwarding mask. This is the bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses (of the form 01:80:C2:00:00:0X). + * + * The default value is 0, which means the bridge does not forward any + * link-local frames coming on this port. + * + * @IFLA_BR_ROOT_ID + * The bridge root id, read only. + * + * @IFLA_BR_BRIDGE_ID + * The bridge id, read only. + * + * @IFLA_BR_ROOT_PORT + * The bridge root port, read only. + * + * @IFLA_BR_ROOT_PATH_COST + * The bridge root path cost, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE + * The bridge topology change, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_DETECTED + * The bridge topology change detected, read only. + * + * @IFLA_BR_HELLO_TIMER + * The bridge hello timer, read only. + * + * @IFLA_BR_TCN_TIMER + * The bridge tcn timer, read only. + * + * @IFLA_BR_TOPOLOGY_CHANGE_TIMER + * The bridge topology change timer, read only. + * + * @IFLA_BR_GC_TIMER + * The bridge gc timer, read only. + * + * @IFLA_BR_GROUP_ADDR + * Set the MAC address of the multicast group this bridge uses for STP. + * The address must be a link-local address in standard Ethernet MAC address + * format. It is an address of the form 01:80:C2:00:00:0X, with X in [0, 4..f]. + * + * The default value is 0. + * + * @IFLA_BR_FDB_FLUSH + * Flush bridge's fdb dynamic entries. + * + * @IFLA_BR_MCAST_ROUTER + * Set bridge's multicast router if IGMP snooping is enabled. + * The valid values are: + * + * * 0 - disabled. + * * 1 - automatic (queried). + * * 2 - permanently enabled. + * + * The default value is 1. + * + * @IFLA_BR_MCAST_SNOOPING + * Turn multicast snooping on (*IFLA_BR_MCAST_SNOOPING* > 0) or off + * (*IFLA_BR_MCAST_SNOOPING* == 0). + * + * The default value is 1. + * + * @IFLA_BR_MCAST_QUERY_USE_IFADDR + * If enabled use the bridge's own IP address as source address for IGMP + * queries (*IFLA_BR_MCAST_QUERY_USE_IFADDR* > 0) or the default of 0.0.0.0 + * (*IFLA_BR_MCAST_QUERY_USE_IFADDR* == 0). + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_QUERIER + * Enable (*IFLA_BR_MULTICAST_QUERIER* > 0) or disable + * (*IFLA_BR_MULTICAST_QUERIER* == 0) IGMP querier, ie sending of multicast + * queries by the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_HASH_ELASTICITY + * Set multicast database hash elasticity, It is the maximum chain length in + * the multicast hash table. This attribute is *deprecated* and the value + * is always 16. + * + * @IFLA_BR_MCAST_HASH_MAX + * Set maximum size of the multicast hash table + * + * The default value is 4096, the value must be a power of 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_CNT + * The Last Member Query Count is the number of Group-Specific Queries + * sent before the router assumes there are no local members. The Last + * Member Query Count is also the number of Group-and-Source-Specific + * Queries sent before the router assumes there are no listeners for a + * particular source. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_STARTUP_QUERY_CNT + * The Startup Query Count is the number of Queries sent out on startup, + * separated by the Startup Query Interval. + * + * The default value is 2. + * + * @IFLA_BR_MCAST_LAST_MEMBER_INTVL + * The Last Member Query Interval is the Max Response Time inserted into + * Group-Specific Queries sent in response to Leave Group messages, and + * is also the amount of time between Group-Specific Query messages. + * + * The default value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_MEMBERSHIP_INTVL + * The interval after which the bridge will leave a group, if no membership + * reports for this group are received. + * + * The default value is (260 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERIER_INTVL + * The interval between queries sent by other routers. if no queries are + * seen after this delay has passed, the bridge will start to send its own + * queries (as if *IFLA_BR_MCAST_QUERIER_INTVL* was enabled). + * + * The default value is (255 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_INTVL + * The Query Interval is the interval between General Queries sent by + * the Querier. + * + * The default value is (125 * USER_HZ). The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_MCAST_QUERY_RESPONSE_INTVL + * The Max Response Time used to calculate the Max Resp Code inserted + * into the periodic General Queries. + * + * The default value is (10 * USER_HZ). + * + * @IFLA_BR_MCAST_STARTUP_QUERY_INTVL + * The interval between queries in the startup phase. + * + * The default value is (125 * USER_HZ) / 4. The minimum value is (1 * USER_HZ). + * + * @IFLA_BR_NF_CALL_IPTABLES + * Enable (*NF_CALL_IPTABLES* > 0) or disable (*NF_CALL_IPTABLES* == 0) + * iptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_IP6TABLES + * Enable (*NF_CALL_IP6TABLES* > 0) or disable (*NF_CALL_IP6TABLES* == 0) + * ip6tables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_NF_CALL_ARPTABLES + * Enable (*NF_CALL_ARPTABLES* > 0) or disable (*NF_CALL_ARPTABLES* == 0) + * arptables hooks on the bridge. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_VLAN_DEFAULT_PVID + * VLAN ID applied to untagged and priority-tagged incoming packets. + * + * The default value is 1. Setting to the special value 0 makes all ports of + * this bridge not have a PVID by default, which means that they will + * not accept VLAN-untagged traffic. + * + * @IFLA_BR_PAD + * Bridge attribute padding type for netlink message. + * + * @IFLA_BR_VLAN_STATS_ENABLED + * Enable (*IFLA_BR_VLAN_STATS_ENABLED* == 1) or disable + * (*IFLA_BR_VLAN_STATS_ENABLED* == 0) per-VLAN stats accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_STATS_ENABLED + * Enable (*IFLA_BR_MCAST_STATS_ENABLED* > 0) or disable + * (*IFLA_BR_MCAST_STATS_ENABLED* == 0) multicast (IGMP/MLD) stats + * accounting. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MCAST_IGMP_VERSION + * Set the IGMP version. + * + * The valid values are 2 and 3. The default value is 2. + * + * @IFLA_BR_MCAST_MLD_VERSION + * Set the MLD version. + * + * The valid values are 1 and 2. The default value is 1. + * + * @IFLA_BR_VLAN_STATS_PER_PORT + * Enable (*IFLA_BR_VLAN_STATS_PER_PORT* == 1) or disable + * (*IFLA_BR_VLAN_STATS_PER_PORT* == 0) per-VLAN per-port stats accounting. + * Can be changed only when there are no port VLANs configured. + * + * The default value is 0 (disabled). + * + * @IFLA_BR_MULTI_BOOLOPT + * The multi_boolopt is used to control new boolean options to avoid adding + * new netlink attributes. You can look at ``enum br_boolopt_id`` for those + * options. + * + * @IFLA_BR_MCAST_QUERIER_STATE + * Bridge mcast querier states, read only. + * + * @IFLA_BR_FDB_N_LEARNED + * The number of dynamically learned FDB entries for the current bridge, + * read only. + * + * @IFLA_BR_FDB_MAX_LEARNED + * Set the number of max dynamically learned FDB entries for the current + * bridge. + */ enum { IFLA_BR_UNSPEC, IFLA_BR_FORWARD_DELAY, @@ -510,6 +791,8 @@ enum { IFLA_BR_VLAN_STATS_PER_PORT, IFLA_BR_MULTI_BOOLOPT, IFLA_BR_MCAST_QUERIER_STATE, + IFLA_BR_FDB_N_LEARNED, + IFLA_BR_FDB_MAX_LEARNED, __IFLA_BR_MAX, }; @@ -520,11 +803,252 @@ struct ifla_bridge_id { __u8 addr[6]; /* ETH_ALEN */ }; +/** + * DOC: Bridge mode enum definition + * + * @BRIDGE_MODE_HAIRPIN + * Controls whether traffic may be sent back out of the port on which it + * was received. This option is also called reflective relay mode, and is + * used to support basic VEPA (Virtual Ethernet Port Aggregator) + * capabilities. By default, this flag is turned off and the bridge will + * not forward traffic back out of the receiving port. + */ enum { BRIDGE_MODE_UNSPEC, BRIDGE_MODE_HAIRPIN, }; +/** + * DOC: Bridge port enum definition + * + * @IFLA_BRPORT_STATE + * The operation state of the port. Here are the valid values. + * + * * 0 - port is in STP *DISABLED* state. Make this port completely + * inactive for STP. This is also called BPDU filter and could be used + * to disable STP on an untrusted port, like a leaf virtual device. + * The traffic forwarding is also stopped on this port. + * * 1 - port is in STP *LISTENING* state. Only valid if STP is enabled + * on the bridge. In this state the port listens for STP BPDUs and + * drops all other traffic frames. + * * 2 - port is in STP *LEARNING* state. Only valid if STP is enabled on + * the bridge. In this state the port will accept traffic only for the + * purpose of updating MAC address tables. + * * 3 - port is in STP *FORWARDING* state. Port is fully active. + * * 4 - port is in STP *BLOCKING* state. Only valid if STP is enabled on + * the bridge. This state is used during the STP election process. + * In this state, port will only process STP BPDUs. + * + * @IFLA_BRPORT_PRIORITY + * The STP port priority. The valid values are between 0 and 255. + * + * @IFLA_BRPORT_COST + * The STP path cost of the port. The valid values are between 1 and 65535. + * + * @IFLA_BRPORT_MODE + * Set the bridge port mode. See *BRIDGE_MODE_HAIRPIN* for more details. + * + * @IFLA_BRPORT_GUARD + * Controls whether STP BPDUs will be processed by the bridge port. By + * default, the flag is turned off to allow BPDU processing. Turning this + * flag on will disable the bridge port if a STP BPDU packet is received. + * + * If the bridge has Spanning Tree enabled, hostile devices on the network + * may send BPDU on a port and cause network failure. Setting *guard on* + * will detect and stop this by disabling the port. The port will be + * restarted if the link is brought down, or removed and reattached. + * + * @IFLA_BRPORT_PROTECT + * Controls whether a given port is allowed to become a root port or not. + * Only used when STP is enabled on the bridge. By default the flag is off. + * + * This feature is also called root port guard. If BPDU is received from a + * leaf (edge) port, it should not be elected as root port. This could + * be used if using STP on a bridge and the downstream bridges are not fully + * trusted; this prevents a hostile guest from rerouting traffic. + * + * @IFLA_BRPORT_FAST_LEAVE + * This flag allows the bridge to immediately stop multicast traffic + * forwarding on a port that receives an IGMP Leave message. It is only used + * when IGMP snooping is enabled on the bridge. By default the flag is off. + * + * @IFLA_BRPORT_LEARNING + * Controls whether a given port will learn *source* MAC addresses from + * received traffic or not. Also controls whether dynamic FDB entries + * (which can also be added by software) will be refreshed by incoming + * traffic. By default this flag is on. + * + * @IFLA_BRPORT_UNICAST_FLOOD + * Controls whether unicast traffic for which there is no FDB entry will + * be flooded towards this port. By default this flag is on. + * + * @IFLA_BRPORT_PROXYARP + * Enable proxy ARP on this port. + * + * @IFLA_BRPORT_LEARNING_SYNC + * Controls whether a given port will sync MAC addresses learned on device + * port to bridge FDB. + * + * @IFLA_BRPORT_PROXYARP_WIFI + * Enable proxy ARP on this port which meets extended requirements by + * IEEE 802.11 and Hotspot 2.0 specifications. + * + * @IFLA_BRPORT_ROOT_ID + * + * @IFLA_BRPORT_BRIDGE_ID + * + * @IFLA_BRPORT_DESIGNATED_PORT + * + * @IFLA_BRPORT_DESIGNATED_COST + * + * @IFLA_BRPORT_ID + * + * @IFLA_BRPORT_NO + * + * @IFLA_BRPORT_TOPOLOGY_CHANGE_ACK + * + * @IFLA_BRPORT_CONFIG_PENDING + * + * @IFLA_BRPORT_MESSAGE_AGE_TIMER + * + * @IFLA_BRPORT_FORWARD_DELAY_TIMER + * + * @IFLA_BRPORT_HOLD_TIMER + * + * @IFLA_BRPORT_FLUSH + * Flush bridge ports' fdb dynamic entries. + * + * @IFLA_BRPORT_MULTICAST_ROUTER + * Configure the port's multicast router presence. A port with + * a multicast router will receive all multicast traffic. + * The valid values are: + * + * * 0 disable multicast routers on this port + * * 1 let the system detect the presence of routers (default) + * * 2 permanently enable multicast traffic forwarding on this port + * * 3 enable multicast routers temporarily on this port, not depending + * on incoming queries. + * + * @IFLA_BRPORT_PAD + * + * @IFLA_BRPORT_MCAST_FLOOD + * Controls whether a given port will flood multicast traffic for which + * there is no MDB entry. By default this flag is on. + * + * @IFLA_BRPORT_MCAST_TO_UCAST + * Controls whether a given port will replicate packets using unicast + * instead of multicast. By default this flag is off. + * + * This is done by copying the packet per host and changing the multicast + * destination MAC to a unicast one accordingly. + * + * *mcast_to_unicast* works on top of the multicast snooping feature of the + * bridge. Which means unicast copies are only delivered to hosts which + * are interested in unicast and signaled this via IGMP/MLD reports previously. + * + * This feature is intended for interface types which have a more reliable + * and/or efficient way to deliver unicast packets than broadcast ones + * (e.g. WiFi). + * + * However, it should only be enabled on interfaces where no IGMPv2/MLDv1 + * report suppression takes place. IGMP/MLD report suppression issue is + * usually overcome by the network daemon (supplicant) enabling AP isolation + * and by that separating all STAs. + * + * Delivery of STA-to-STA IP multicast is made possible again by enabling + * and utilizing the bridge hairpin mode, which considers the incoming port + * as a potential outgoing port, too (see *BRIDGE_MODE_HAIRPIN* option). + * Hairpin mode is performed after multicast snooping, therefore leading + * to only deliver reports to STAs running a multicast router. + * + * @IFLA_BRPORT_VLAN_TUNNEL + * Controls whether vlan to tunnel mapping is enabled on the port. + * By default this flag is off. + * + * @IFLA_BRPORT_BCAST_FLOOD + * Controls flooding of broadcast traffic on the given port. By default + * this flag is on. + * + * @IFLA_BRPORT_GROUP_FWD_MASK + * Set the group forward mask. This is a bitmask that is applied to + * decide whether to forward incoming frames destined to link-local + * addresses. The addresses of the form are 01:80:C2:00:00:0X (defaults + * to 0, which means the bridge does not forward any link-local frames + * coming on this port). + * + * @IFLA_BRPORT_NEIGH_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression + * is enabled on the port. By default this flag is off. + * + * @IFLA_BRPORT_ISOLATED + * Controls whether a given port will be isolated, which means it will be + * able to communicate with non-isolated ports only. By default this + * flag is off. + * + * @IFLA_BRPORT_BACKUP_PORT + * Set a backup port. If the port loses carrier all traffic will be + * redirected to the configured backup port. Set the value to 0 to disable + * it. + * + * @IFLA_BRPORT_MRP_RING_OPEN + * + * @IFLA_BRPORT_MRP_IN_OPEN + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + * The number of per-port EHT hosts limit. The default value is 512. + * Setting to 0 is not allowed. + * + * @IFLA_BRPORT_MCAST_EHT_HOSTS_CNT + * The current number of tracked hosts, read only. + * + * @IFLA_BRPORT_LOCKED + * Controls whether a port will be locked, meaning that hosts behind the + * port will not be able to communicate through the port unless an FDB + * entry with the unit's MAC address is in the FDB. The common use case is + * that hosts are allowed access through authentication with the IEEE 802.1X + * protocol or based on whitelists. By default this flag is off. + * + * Please note that secure 802.1X deployments should always use the + * *BR_BOOLOPT_NO_LL_LEARN* flag, to not permit the bridge to populate its + * FDB based on link-local (EAPOL) traffic received on the port. + * + * @IFLA_BRPORT_MAB + * Controls whether a port will use MAC Authentication Bypass (MAB), a + * technique through which select MAC addresses may be allowed on a locked + * port, without using 802.1X authentication. Packets with an unknown source + * MAC address generates a "locked" FDB entry on the incoming bridge port. + * The common use case is for user space to react to these bridge FDB + * notifications and optionally replace the locked FDB entry with a normal + * one, allowing traffic to pass for whitelisted MAC addresses. + * + * Setting this flag also requires *IFLA_BRPORT_LOCKED* and + * *IFLA_BRPORT_LEARNING*. *IFLA_BRPORT_LOCKED* ensures that unauthorized + * data packets are dropped, and *IFLA_BRPORT_LEARNING* allows the dynamic + * FDB entries installed by user space (as replacements for the locked FDB + * entries) to be refreshed and/or aged out. + * + * @IFLA_BRPORT_MCAST_N_GROUPS + * + * @IFLA_BRPORT_MCAST_MAX_GROUPS + * Sets the maximum number of MDB entries that can be registered for a + * given port. Attempts to register more MDB entries at the port than this + * limit allows will be rejected, whether they are done through netlink + * (e.g. the bridge tool), or IGMP or MLD membership reports. Setting a + * limit of 0 disables the limit. The default value is 0. + * + * @IFLA_BRPORT_NEIGH_VLAN_SUPPRESS + * Controls whether neighbor discovery (arp and nd) proxy and suppression is + * enabled for a given port. By default this flag is off. + * + * Note that this option only takes effect when *IFLA_BRPORT_NEIGH_SUPPRESS* + * is enabled for a given port. + * + * @IFLA_BRPORT_BACKUP_NHID + * The FDB nexthop object ID to attach to packets being redirected to a + * backup port that has VLAN tunnel mapping enabled (via the + * *IFLA_BRPORT_VLAN_TUNNEL* option). Setting a value of 0 (default) has + * the effect of not attaching any ID. + */ enum { IFLA_BRPORT_UNSPEC, IFLA_BRPORT_STATE, /* Spanning tree state */ @@ -769,6 +1293,19 @@ enum netkit_mode { NETKIT_L3, }; +/* NETKIT_SCRUB_NONE leaves clearing skb->{mark,priority} up to + * the BPF program if attached. This also means the latter can + * consume the two fields if they were populated earlier. + * + * NETKIT_SCRUB_DEFAULT zeroes skb->{mark,priority} fields before + * invoking the attached BPF program when the peer device resides + * in a different network namespace. This is the default behavior. + */ +enum netkit_scrub { + NETKIT_SCRUB_NONE, + NETKIT_SCRUB_DEFAULT, +}; + enum { IFLA_NETKIT_UNSPEC, IFLA_NETKIT_PEER_INFO, @@ -776,6 +1313,8 @@ enum { IFLA_NETKIT_POLICY, IFLA_NETKIT_PEER_POLICY, IFLA_NETKIT_MODE, + IFLA_NETKIT_SCRUB, + IFLA_NETKIT_PEER_SCRUB, __IFLA_NETKIT_MAX, }; #define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1) @@ -854,6 +1393,7 @@ enum { IFLA_VXLAN_DF, IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */ IFLA_VXLAN_LOCALBYPASS, + IFLA_VXLAN_LABEL_POLICY, /* IPv6 flow label policy; ifla_vxlan_label_policy */ __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) @@ -871,6 +1411,13 @@ enum ifla_vxlan_df { VXLAN_DF_MAX = __VXLAN_DF_END - 1, }; +enum ifla_vxlan_label_policy { + VXLAN_LABEL_FIXED = 0, + VXLAN_LABEL_INHERIT = 1, + __VXLAN_LABEL_END, + VXLAN_LABEL_MAX = __VXLAN_LABEL_END - 1, +}; + /* GENEVE section */ enum { IFLA_GENEVE_UNSPEC, @@ -935,6 +1482,8 @@ enum { IFLA_GTP_ROLE, IFLA_GTP_CREATE_SOCKETS, IFLA_GTP_RESTART_COUNT, + IFLA_GTP_LOCAL, + IFLA_GTP_LOCAL6, __IFLA_GTP_MAX, }; #define IFLA_GTP_MAX (__IFLA_GTP_MAX - 1) @@ -1240,6 +1789,7 @@ enum { IFLA_HSR_PROTOCOL, /* Indicate different protocol than * HSR. For example PRP. */ + IFLA_HSR_INTERLINK, /* HSR interlink network device */ __IFLA_HSR_MAX, }; @@ -1417,7 +1967,9 @@ enum { enum { IFLA_DSA_UNSPEC, - IFLA_DSA_MASTER, + IFLA_DSA_CONDUIT, + /* Deprecated, use IFLA_DSA_CONDUIT instead */ + IFLA_DSA_MASTER = IFLA_DSA_CONDUIT, __IFLA_DSA_MAX, }; diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index d358add1611c..5d32d53508d9 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -141,7 +141,7 @@ struct in_addr { */ #define IP_PMTUDISC_INTERFACE 4 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get - * fragmented if they exeed the interface mtu + * fragmented if they exceed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7c308f04e7a0..e4be227d3ad6 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -122,6 +122,9 @@ enum { NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, NETDEV_A_NAPI_PID, + NETDEV_A_NAPI_DEFER_HARD_IRQS, + NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, + NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) @@ -199,6 +202,7 @@ enum { NETDEV_CMD_NAPI_GET, NETDEV_CMD_QSTATS_GET, NETDEV_CMD_BIND_RX, + NETDEV_CMD_NAPI_SET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h new file mode 100644 index 000000000000..eee3d2a4dbe4 --- /dev/null +++ b/tools/include/vdso/unaligned.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __VDSO_UNALIGNED_H +#define __VDSO_UNALIGNED_H + +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define __put_unaligned_t(type, val, ptr) do { \ + struct { type x; } __packed *__pptr = (typeof(__pptr))(ptr); \ + __pptr->x = (val); \ +} while (0) + +#endif /* __VDSO_UNALIGNED_H */ diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h index d3eb04d1bc89..1731996b2c32 100644 --- a/tools/lib/api/io.h +++ b/tools/lib/api/io.h @@ -189,6 +189,7 @@ static inline ssize_t io__getdelim(struct io *io, char **line_out, size_t *line_ err_out: free(line); *line_out = NULL; + *line_len_out = 0; return -ENOMEM; } diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 1b22f0f37288..857a5f7b413d 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -61,7 +61,8 @@ ifndef VERBOSE endif INCLUDES = -I$(or $(OUTPUT),.) \ - -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi + -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi \ + -I$(srctree)/tools/arch/$(SRCARCH)/include export prefix libdir src obj diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 2a4c71501a17..becdfa701c75 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -776,6 +776,7 @@ int bpf_link_create(int prog_fd, int target_fd, return libbpf_err(-EINVAL); break; case BPF_TRACE_UPROBE_MULTI: + case BPF_TRACE_UPROBE_SESSION: attr.link_create.uprobe_multi.flags = OPTS_GET(opts, uprobe_multi.flags, 0); attr.link_create.uprobe_multi.cnt = OPTS_GET(opts, uprobe_multi.cnt, 0); attr.link_create.uprobe_multi.path = ptr_to_u64(OPTS_GET(opts, uprobe_multi.path, 0)); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index fdf44403ff36..6ff963a491d9 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -34,6 +34,7 @@ struct bpf_gen { void *data_cur; void *insn_start; void *insn_cur; + bool swapped_endian; ssize_t cleanup_label; __u32 nr_progs; __u32 nr_maps; diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 80bc0242e8dc..686824b8b413 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -185,6 +185,7 @@ enum libbpf_tristate { #define __kptr_untrusted __attribute__((btf_type_tag("kptr_untrusted"))) #define __kptr __attribute__((btf_type_tag("kptr"))) #define __percpu_kptr __attribute__((btf_type_tag("percpu_kptr"))) +#define __uptr __attribute__((btf_type_tag("uptr"))) #if defined (__clang__) #define bpf_ksym_exists(sym) ({ \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 3c131039c523..12468ae0d573 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -22,6 +22,7 @@ #include "libbpf_internal.h" #include "hashmap.h" #include "strset.h" +#include "str_error.h" #define BTF_MAX_NR_TYPES 0x7fffffffU #define BTF_MAX_STR_OFFSET 0x7fffffffU @@ -1179,7 +1180,7 @@ static struct btf *btf_parse_elf(const char *path, struct btf *base_btf, fd = open(path, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("failed to open %s: %s\n", path, strerror(errno)); + pr_warn("failed to open %s: %s\n", path, errstr(err)); return ERR_PTR(err); } @@ -1445,7 +1446,7 @@ retry_load: goto retry_load; err = -errno; - pr_warn("BTF loading error: %d\n", err); + pr_warn("BTF loading error: %s\n", errstr(err)); /* don't print out contents of custom log_buf */ if (!log_buf && buf[0]) pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf); @@ -2885,7 +2886,7 @@ int btf__add_decl_tag(struct btf *btf, const char *value, int ref_type_id, return btf_commit_type(btf, sz); } -struct btf_ext_sec_setup_param { +struct btf_ext_sec_info_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -2893,14 +2894,20 @@ struct btf_ext_sec_setup_param { const char *desc; }; -static int btf_ext_setup_info(struct btf_ext *btf_ext, - struct btf_ext_sec_setup_param *ext_sec) +/* + * Parse a single info subsection of the BTF.ext info data: + * - validate subsection structure and elements + * - save info subsection start and sizing details in struct btf_ext + * - endian-independent operation, for calling before byte-swapping + */ +static int btf_ext_parse_sec_info(struct btf_ext *btf_ext, + struct btf_ext_sec_info_param *ext_sec, + bool is_native) { const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; size_t sec_cnt = 0; - /* The start of the info sec (including the __u32 record_size). */ void *info; if (ext_sec->len == 0) @@ -2912,6 +2919,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } + /* The start of the info sec (including the __u32 record_size). */ info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; info_left = ext_sec->len; @@ -2927,9 +2935,13 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - /* The record size needs to meet the minimum standard */ - record_size = *(__u32 *)info; + /* The record size needs to meet either the minimum standard or, when + * handling non-native endianness data, the exact standard so as + * to allow safe byte-swapping. + */ + record_size = is_native ? *(__u32 *)info : bswap_32(*(__u32 *)info); if (record_size < ext_sec->min_rec_size || + (!is_native && record_size != ext_sec->min_rec_size) || record_size & 0x03) { pr_debug("%s section in .BTF.ext has invalid record size %u\n", ext_sec->desc, record_size); @@ -2941,7 +2953,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records\n", ext_sec->desc); return -EINVAL; } @@ -2956,7 +2968,7 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return -EINVAL; } - num_records = sinfo->num_info; + num_records = is_native ? sinfo->num_info : bswap_32(sinfo->num_info); if (num_records == 0) { pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); @@ -2984,64 +2996,157 @@ static int btf_ext_setup_info(struct btf_ext *btf_ext, return 0; } -static int btf_ext_setup_func_info(struct btf_ext *btf_ext) +/* Parse all info secs in the BTF.ext info data */ +static int btf_ext_parse_info(struct btf_ext *btf_ext, bool is_native) { - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param func_info = { .off = btf_ext->hdr->func_info_off, .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_line_info(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param line_info = { .off = btf_ext->hdr->line_info_off, .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - - return btf_ext_setup_info(btf_ext, ¶m); -} - -static int btf_ext_setup_core_relos(struct btf_ext *btf_ext) -{ - struct btf_ext_sec_setup_param param = { + struct btf_ext_sec_info_param core_relo = { .off = btf_ext->hdr->core_relo_off, .len = btf_ext->hdr->core_relo_len, .min_rec_size = sizeof(struct bpf_core_relo), .ext_info = &btf_ext->core_relo_info, .desc = "core_relo", }; + int err; + + err = btf_ext_parse_sec_info(btf_ext, &func_info, is_native); + if (err) + return err; + + err = btf_ext_parse_sec_info(btf_ext, &line_info, is_native); + if (err) + return err; + + if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return 0; /* skip core relos parsing */ + + err = btf_ext_parse_sec_info(btf_ext, &core_relo, is_native); + if (err) + return err; + + return 0; +} + +/* Swap byte-order of BTF.ext header with any endianness */ +static void btf_ext_bswap_hdr(struct btf_ext_header *h) +{ + bool is_native = h->magic == BTF_MAGIC; + __u32 hdr_len; + + hdr_len = is_native ? h->hdr_len : bswap_32(h->hdr_len); + + h->magic = bswap_16(h->magic); + h->hdr_len = bswap_32(h->hdr_len); + h->func_info_off = bswap_32(h->func_info_off); + h->func_info_len = bswap_32(h->func_info_len); + h->line_info_off = bswap_32(h->line_info_off); + h->line_info_len = bswap_32(h->line_info_len); + + if (hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; + + h->core_relo_off = bswap_32(h->core_relo_off); + h->core_relo_len = bswap_32(h->core_relo_len); +} + +/* Swap byte-order of generic info subsection */ +static void btf_ext_bswap_info_sec(void *info, __u32 len, bool is_native, + info_rec_bswap_fn bswap_fn) +{ + struct btf_ext_info_sec *sec; + __u32 info_left, rec_size, *rs; + + if (len == 0) + return; + + rs = info; /* info record size */ + rec_size = is_native ? *rs : bswap_32(*rs); + *rs = bswap_32(*rs); + + sec = info + sizeof(__u32); /* info sec #1 */ + info_left = len - sizeof(__u32); + while (info_left) { + unsigned int sec_hdrlen = sizeof(struct btf_ext_info_sec); + __u32 i, num_recs; + void *p; + + num_recs = is_native ? sec->num_info : bswap_32(sec->num_info); + sec->sec_name_off = bswap_32(sec->sec_name_off); + sec->num_info = bswap_32(sec->num_info); + p = sec->data; /* info rec #1 */ + for (i = 0; i < num_recs; i++, p += rec_size) + bswap_fn(p); + sec = p; + info_left -= sec_hdrlen + (__u64)rec_size * num_recs; + } +} + +/* + * Swap byte-order of all info data in a BTF.ext section + * - requires BTF.ext hdr in native endianness + */ +static void btf_ext_bswap_info(struct btf_ext *btf_ext, void *data) +{ + const bool is_native = btf_ext->swapped_endian; + const struct btf_ext_header *h = data; + void *info; + + /* Swap func_info subsection byte-order */ + info = data + h->hdr_len + h->func_info_off; + btf_ext_bswap_info_sec(info, h->func_info_len, is_native, + (info_rec_bswap_fn)bpf_func_info_bswap); + + /* Swap line_info subsection byte-order */ + info = data + h->hdr_len + h->line_info_off; + btf_ext_bswap_info_sec(info, h->line_info_len, is_native, + (info_rec_bswap_fn)bpf_line_info_bswap); + + /* Swap core_relo subsection byte-order (if present) */ + if (h->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) + return; - return btf_ext_setup_info(btf_ext, ¶m); + info = data + h->hdr_len + h->core_relo_off; + btf_ext_bswap_info_sec(info, h->core_relo_len, is_native, + (info_rec_bswap_fn)bpf_core_relo_bswap); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) +/* Parse hdr data and info sections: check and convert to native endianness */ +static int btf_ext_parse(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; + __u32 hdr_len, data_size = btf_ext->data_size; + struct btf_ext_header *hdr = btf_ext->hdr; + bool swapped_endian = false; + int err; - if (data_size < offsetofend(struct btf_ext_header, hdr_len) || - data_size < hdr->hdr_len) { - pr_debug("BTF.ext header not found"); + if (data_size < offsetofend(struct btf_ext_header, hdr_len)) { + pr_debug("BTF.ext header too short\n"); return -EINVAL; } + hdr_len = hdr->hdr_len; if (hdr->magic == bswap_16(BTF_MAGIC)) { - pr_warn("BTF.ext in non-native endianness is not supported\n"); - return -ENOTSUP; + swapped_endian = true; + hdr_len = bswap_32(hdr_len); } else if (hdr->magic != BTF_MAGIC) { pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } - if (hdr->version != BTF_VERSION) { + /* Ensure known version of structs, current BTF_VERSION == 1 */ + if (hdr->version != 1) { pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } @@ -3051,11 +3156,39 @@ static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) return -ENOTSUP; } - if (data_size == hdr->hdr_len) { + if (data_size < hdr_len) { + pr_debug("BTF.ext header not found\n"); + return -EINVAL; + } else if (data_size == hdr_len) { pr_debug("BTF.ext has no data\n"); return -EINVAL; } + /* Verify mandatory hdr info details present */ + if (hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { + pr_warn("BTF.ext header missing func_info, line_info\n"); + return -EINVAL; + } + + /* Keep hdr native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_hdr(btf_ext->hdr); + + /* Validate info subsections and cache key metadata */ + err = btf_ext_parse_info(btf_ext, !swapped_endian); + if (err) + return err; + + /* Keep infos native byte-order in memory for introspection */ + if (swapped_endian) + btf_ext_bswap_info(btf_ext, btf_ext->data); + + /* + * Set btf_ext->swapped_endian only after all header and info data has + * been swapped, helping bswap functions determine if their data are + * in native byte-order when called. + */ + btf_ext->swapped_endian = swapped_endian; return 0; } @@ -3067,6 +3200,7 @@ void btf_ext__free(struct btf_ext *btf_ext) free(btf_ext->line_info.sec_idxs); free(btf_ext->core_relo_info.sec_idxs); free(btf_ext->data); + free(btf_ext->data_swapped); free(btf_ext); } @@ -3087,29 +3221,7 @@ struct btf_ext *btf_ext__new(const __u8 *data, __u32 size) } memcpy(btf_ext->data, data, size); - err = btf_ext_parse_hdr(btf_ext->data, size); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) { - err = -EINVAL; - goto done; - } - - err = btf_ext_setup_func_info(btf_ext); - if (err) - goto done; - - err = btf_ext_setup_line_info(btf_ext); - if (err) - goto done; - - if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, core_relo_len)) - goto done; /* skip core relos parsing */ - - err = btf_ext_setup_core_relos(btf_ext); - if (err) - goto done; + err = btf_ext_parse(btf_ext); done: if (err) { @@ -3120,15 +3232,66 @@ done: return btf_ext; } +static void *btf_ext_raw_data(const struct btf_ext *btf_ext_ro, bool swap_endian) +{ + struct btf_ext *btf_ext = (struct btf_ext *)btf_ext_ro; + const __u32 data_sz = btf_ext->data_size; + void *data; + + /* Return native data (always present) or swapped data if present */ + if (!swap_endian) + return btf_ext->data; + else if (btf_ext->data_swapped) + return btf_ext->data_swapped; + + /* Recreate missing swapped data, then cache and return */ + data = calloc(1, data_sz); + if (!data) + return NULL; + memcpy(data, btf_ext->data, data_sz); + + btf_ext_bswap_info(btf_ext, data); + btf_ext_bswap_hdr(data); + btf_ext->data_swapped = data; + return data; +} + const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size) { + void *data; + + data = btf_ext_raw_data(btf_ext, btf_ext->swapped_endian); + if (!data) + return errno = ENOMEM, NULL; + *size = btf_ext->data_size; - return btf_ext->data; + return data; } __attribute__((alias("btf_ext__raw_data"))) const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size); +enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext) +{ + if (is_host_big_endian()) + return btf_ext->swapped_endian ? BTF_LITTLE_ENDIAN : BTF_BIG_ENDIAN; + else + return btf_ext->swapped_endian ? BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; +} + +int btf_ext__set_endianness(struct btf_ext *btf_ext, enum btf_endianness endian) +{ + if (endian != BTF_LITTLE_ENDIAN && endian != BTF_BIG_ENDIAN) + return libbpf_err(-EINVAL); + + btf_ext->swapped_endian = is_host_big_endian() != (endian == BTF_BIG_ENDIAN); + + if (!btf_ext->swapped_endian) { + free(btf_ext->data_swapped); + btf_ext->data_swapped = NULL; + } + return 0; +} struct btf_dedup; @@ -3291,7 +3454,7 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) d = btf_dedup_new(btf, opts); if (IS_ERR(d)) { - pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + pr_debug("btf_dedup_new failed: %ld\n", PTR_ERR(d)); return libbpf_err(-EINVAL); } @@ -3302,42 +3465,42 @@ int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts) err = btf_dedup_prep(d); if (err) { - pr_debug("btf_dedup_prep failed:%d\n", err); + pr_debug("btf_dedup_prep failed: %s\n", errstr(err)); goto done; } err = btf_dedup_strings(d); if (err < 0) { - pr_debug("btf_dedup_strings failed:%d\n", err); + pr_debug("btf_dedup_strings failed: %s\n", errstr(err)); goto done; } err = btf_dedup_prim_types(d); if (err < 0) { - pr_debug("btf_dedup_prim_types failed:%d\n", err); + pr_debug("btf_dedup_prim_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_struct_types(d); if (err < 0) { - pr_debug("btf_dedup_struct_types failed:%d\n", err); + pr_debug("btf_dedup_struct_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_resolve_fwds(d); if (err < 0) { - pr_debug("btf_dedup_resolve_fwds failed:%d\n", err); + pr_debug("btf_dedup_resolve_fwds failed: %s\n", errstr(err)); goto done; } err = btf_dedup_ref_types(d); if (err < 0) { - pr_debug("btf_dedup_ref_types failed:%d\n", err); + pr_debug("btf_dedup_ref_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_compact_types(d); if (err < 0) { - pr_debug("btf_dedup_compact_types failed:%d\n", err); + pr_debug("btf_dedup_compact_types failed: %s\n", errstr(err)); goto done; } err = btf_dedup_remap_types(d); if (err < 0) { - pr_debug("btf_dedup_remap_types failed:%d\n", err); + pr_debug("btf_dedup_remap_types failed: %s\n", errstr(err)); goto done; } @@ -3385,7 +3548,7 @@ struct btf_dedup { struct strset *strs_set; }; -static long hash_combine(long h, long value) +static unsigned long hash_combine(unsigned long h, unsigned long value) { return h * 31 + value; } @@ -5056,7 +5219,8 @@ struct btf *btf__load_vmlinux_btf(void) btf = btf__parse(sysfs_btf_path, NULL); if (!btf) { err = -errno; - pr_warn("failed to read kernel BTF from '%s': %d\n", sysfs_btf_path, err); + pr_warn("failed to read kernel BTF from '%s': %s\n", + sysfs_btf_path, errstr(err)); return libbpf_err_ptr(err); } pr_debug("loaded kernel BTF from '%s'\n", sysfs_btf_path); @@ -5073,7 +5237,7 @@ struct btf *btf__load_vmlinux_btf(void) btf = btf__parse(path, NULL); err = libbpf_get_error(btf); - pr_debug("loading kernel BTF '%s': %d\n", path, err); + pr_debug("loading kernel BTF '%s': %s\n", path, errstr(err)); if (err) continue; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4e349ad79ee6..47ee8f6ac489 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -167,6 +167,9 @@ LIBBPF_API const char *btf__str_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size); LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); LIBBPF_API const void *btf_ext__raw_data(const struct btf_ext *btf_ext, __u32 *size); +LIBBPF_API enum btf_endianness btf_ext__endianness(const struct btf_ext *btf_ext); +LIBBPF_API int btf_ext__set_endianness(struct btf_ext *btf_ext, + enum btf_endianness endian); LIBBPF_API int btf__find_str(struct btf *btf, const char *s); LIBBPF_API int btf__add_str(struct btf *btf, const char *s); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 0a7327541c17..a3fc6908f6c9 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -21,6 +21,7 @@ #include "hashmap.h" #include "libbpf.h" #include "libbpf_internal.h" +#include "str_error.h" static const char PREFIXES[] = "\t\t\t\t\t\t\t\t\t\t\t\t\t"; static const size_t PREFIX_CNT = sizeof(PREFIXES) - 1; @@ -867,8 +868,8 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d, } pads[] = { {"long", d->ptr_sz * 8}, {"int", 32}, {"short", 16}, {"char", 8} }; - int new_off, pad_bits, bits, i; - const char *pad_type; + int new_off = 0, pad_bits = 0, bits, i; + const char *pad_type = NULL; if (cur_off >= next_off) return; /* no gap */ @@ -1304,7 +1305,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, * chain, restore stack, emit warning, and try to * proceed nevertheless */ - pr_warn("not enough memory for decl stack:%d", err); + pr_warn("not enough memory for decl stack: %s\n", errstr(err)); d->decl_stack_cnt = stack_start; return; } diff --git a/tools/lib/bpf/btf_relocate.c b/tools/lib/bpf/btf_relocate.c index 4f7399d85eab..b72f83e15156 100644 --- a/tools/lib/bpf/btf_relocate.c +++ b/tools/lib/bpf/btf_relocate.c @@ -428,7 +428,7 @@ static int btf_relocate_rewrite_strs(struct btf_relocate *r, __u32 i) } else { off = r->str_map[*str_off]; if (!off) { - pr_warn("string '%s' [offset %u] is not mapped to base BTF", + pr_warn("string '%s' [offset %u] is not mapped to base BTF\n", btf__str_by_offset(r->btf, off), *str_off); return -ENOENT; } diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b5ab1cb13e5e..823f83ad819c 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -24,7 +24,6 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) { - char errmsg[STRERR_BUFSIZE]; int fd, ret; Elf *elf; @@ -38,8 +37,7 @@ int elf_open(const char *binary_path, struct elf_fd *elf_fd) fd = open(binary_path, O_RDONLY | O_CLOEXEC); if (fd < 0) { ret = -errno; - pr_warn("elf: failed to open %s: %s\n", binary_path, - libbpf_strerror_r(ret, errmsg, sizeof(errmsg))); + pr_warn("elf: failed to open %s: %s\n", binary_path, errstr(ret)); return ret; } elf = elf_begin(fd, ELF_C_READ_MMAP, NULL); diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c index 50befe125ddc..760657f5224c 100644 --- a/tools/lib/bpf/features.c +++ b/tools/lib/bpf/features.c @@ -47,7 +47,6 @@ static int probe_kern_prog_name(int token_fd) static int probe_kern_global_data(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), @@ -67,9 +66,8 @@ static int probe_kern_global_data(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -267,7 +265,6 @@ static int probe_kern_probe_read_kernel(int token_fd) static int probe_prog_bind_map(int token_fd) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -285,9 +282,8 @@ static int probe_prog_bind_map(int token_fd) map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); if (map < 0) { ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); + pr_warn("Error in %s(): %s. Couldn't create simple array map.\n", + __func__, errstr(ret)); return ret; } @@ -604,7 +600,8 @@ bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_ } else if (ret == 0) { WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + pr_warn("Detection of kernel %s support failed: %s\n", + feat->desc, errstr(ret)); WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); } } diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index cf3323fd47b8..113ae4abd345 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -14,6 +14,7 @@ #include "bpf_gen_internal.h" #include "skel_internal.h" #include <asm/byteorder.h> +#include "str_error.h" #define MAX_USED_MAPS 64 #define MAX_USED_PROGS 32 @@ -393,7 +394,7 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) blob_fd_array_off(gen, i)); emit(gen, BPF_MOV64_IMM(BPF_REG_0, 0)); emit(gen, BPF_EXIT_INSN()); - pr_debug("gen: finish %d\n", gen->error); + pr_debug("gen: finish %s\n", errstr(gen->error)); if (!gen->error) { struct gen_loader_opts *opts = gen->opts; @@ -401,6 +402,15 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps) opts->insns_sz = gen->insn_cur - gen->insn_start; opts->data = gen->data_start; opts->data_sz = gen->data_cur - gen->data_start; + + /* use target endianness for embedded loader */ + if (gen->swapped_endian) { + struct bpf_insn *insn = (struct bpf_insn *)opts->insns; + int insn_cnt = opts->insns_sz / sizeof(struct bpf_insn); + + for (i = 0; i < insn_cnt; i++) + bpf_insn_bswap(insn++); + } } return gen->error; } @@ -414,6 +424,28 @@ void bpf_gen__free(struct bpf_gen *gen) free(gen); } +/* + * Fields of bpf_attr are set to values in native byte-order before being + * written to the target-bound data blob, and may need endian conversion. + * This macro allows providing the correct value in situ more simply than + * writing a separate converter for *all fields* of *all records* included + * in union bpf_attr. Note that sizeof(rval) should match the assignment + * target to avoid runtime problems. + */ +#define tgt_endian(rval) ({ \ + typeof(rval) _val = (rval); \ + if (gen->swapped_endian) { \ + switch (sizeof(_val)) { \ + case 1: break; \ + case 2: _val = bswap_16(_val); break; \ + case 4: _val = bswap_32(_val); break; \ + case 8: _val = bswap_64(_val); break; \ + default: pr_warn("unsupported bswap size!\n"); \ + } \ + } \ + _val; \ +}) + void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, __u32 btf_raw_size) { @@ -422,11 +454,12 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data, union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: load_btf: size %d\n", btf_raw_size); btf_data = add_data(gen, btf_raw_data, btf_raw_size); - attr.btf_size = btf_raw_size; + attr.btf_size = tgt_endian(btf_raw_size); btf_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: load_btf: off %d size %d, attr: off %d size %d\n", + btf_data, btf_raw_size, btf_load_attr, attr_size); /* populate union bpf_attr with user provided log details */ move_ctx2blob(gen, attr_field(btf_load_attr, btf_log_level), 4, @@ -457,28 +490,29 @@ void bpf_gen__map_create(struct bpf_gen *gen, union bpf_attr attr; memset(&attr, 0, attr_size); - attr.map_type = map_type; - attr.key_size = key_size; - attr.value_size = value_size; - attr.map_flags = map_attr->map_flags; - attr.map_extra = map_attr->map_extra; + attr.map_type = tgt_endian(map_type); + attr.key_size = tgt_endian(key_size); + attr.value_size = tgt_endian(value_size); + attr.map_flags = tgt_endian(map_attr->map_flags); + attr.map_extra = tgt_endian(map_attr->map_extra); if (map_name) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); - attr.numa_node = map_attr->numa_node; - attr.map_ifindex = map_attr->map_ifindex; - attr.max_entries = max_entries; - attr.btf_key_type_id = map_attr->btf_key_type_id; - attr.btf_value_type_id = map_attr->btf_value_type_id; - - pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", - attr.map_name, map_idx, map_type, attr.btf_value_type_id); + attr.numa_node = tgt_endian(map_attr->numa_node); + attr.map_ifindex = tgt_endian(map_attr->map_ifindex); + attr.max_entries = tgt_endian(max_entries); + attr.btf_key_type_id = tgt_endian(map_attr->btf_key_type_id); + attr.btf_value_type_id = tgt_endian(map_attr->btf_value_type_id); map_create_attr = add_data(gen, &attr, attr_size); - if (attr.btf_value_type_id) + pr_debug("gen: map_create: %s idx %d type %d value_type_id %d, attr: off %d size %d\n", + map_name, map_idx, map_type, map_attr->btf_value_type_id, + map_create_attr, attr_size); + + if (map_attr->btf_value_type_id) /* populate union bpf_attr with btf_fd saved in the stack earlier */ move_stack2blob(gen, attr_field(map_create_attr, btf_fd), 4, stack_off(btf_fd)); - switch (attr.map_type) { + switch (map_type) { case BPF_MAP_TYPE_ARRAY_OF_MAPS: case BPF_MAP_TYPE_HASH_OF_MAPS: move_stack2blob(gen, attr_field(map_create_attr, inner_map_fd), 4, @@ -498,8 +532,8 @@ void bpf_gen__map_create(struct bpf_gen *gen, /* emit MAP_CREATE command */ emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size); debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", - attr.map_name, map_idx, map_type, value_size, - attr.btf_value_type_id); + map_name, map_idx, map_type, value_size, + map_attr->btf_value_type_id); emit_check_err(gen); /* remember map_fd in the stack, if successful */ if (map_idx < 0) { @@ -784,12 +818,12 @@ log: emit_ksym_relo_log(gen, relo, kdesc->ref); } -static __u32 src_reg_mask(void) +static __u32 src_reg_mask(struct bpf_gen *gen) { -#if defined(__LITTLE_ENDIAN_BITFIELD) - return 0x0f; /* src_reg,dst_reg,... */ -#elif defined(__BIG_ENDIAN_BITFIELD) - return 0xf0; /* dst_reg,src_reg,... */ +#if defined(__LITTLE_ENDIAN_BITFIELD) /* src_reg,dst_reg,... */ + return gen->swapped_endian ? 0xf0 : 0x0f; +#elif defined(__BIG_ENDIAN_BITFIELD) /* dst_reg,src_reg,... */ + return gen->swapped_endian ? 0x0f : 0xf0; #else #error "Unsupported bit endianness, cannot proceed" #endif @@ -840,7 +874,7 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo, emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 3)); clear_src_reg: /* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ - reg_mask = src_reg_mask(); + reg_mask = src_reg_mask(gen); emit(gen, BPF_LDX_MEM(BPF_B, BPF_REG_9, BPF_REG_8, offsetofend(struct bpf_insn, code))); emit(gen, BPF_ALU32_IMM(BPF_AND, BPF_REG_9, reg_mask)); emit(gen, BPF_STX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, offsetofend(struct bpf_insn, code))); @@ -931,48 +965,94 @@ static void cleanup_relos(struct bpf_gen *gen, int insns) cleanup_core_relo(gen); } +/* Convert func, line, and core relo info blobs to target endianness */ +static void info_blob_bswap(struct bpf_gen *gen, int func_info, int line_info, + int core_relos, struct bpf_prog_load_opts *load_attr) +{ + struct bpf_func_info *fi = gen->data_start + func_info; + struct bpf_line_info *li = gen->data_start + line_info; + struct bpf_core_relo *cr = gen->data_start + core_relos; + int i; + + for (i = 0; i < load_attr->func_info_cnt; i++) + bpf_func_info_bswap(fi++); + + for (i = 0; i < load_attr->line_info_cnt; i++) + bpf_line_info_bswap(li++); + + for (i = 0; i < gen->core_relo_cnt; i++) + bpf_core_relo_bswap(cr++); +} + void bpf_gen__prog_load(struct bpf_gen *gen, enum bpf_prog_type prog_type, const char *prog_name, const char *license, struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *load_attr, int prog_idx) { + int func_info_tot_sz = load_attr->func_info_cnt * + load_attr->func_info_rec_size; + int line_info_tot_sz = load_attr->line_info_cnt * + load_attr->line_info_rec_size; + int core_relo_tot_sz = gen->core_relo_cnt * + sizeof(struct bpf_core_relo); int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; int attr_size = offsetofend(union bpf_attr, core_relo_rec_size); union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", - prog_type, insn_cnt, prog_idx); /* add license string to blob of bytes */ license_off = add_data(gen, license, strlen(license) + 1); /* add insns to blob of bytes */ insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); + pr_debug("gen: prog_load: prog_idx %d type %d insn off %d insns_cnt %zd license off %d\n", + prog_idx, prog_type, insns_off, insn_cnt, license_off); - attr.prog_type = prog_type; - attr.expected_attach_type = load_attr->expected_attach_type; - attr.attach_btf_id = load_attr->attach_btf_id; - attr.prog_ifindex = load_attr->prog_ifindex; - attr.kern_version = 0; - attr.insn_cnt = (__u32)insn_cnt; - attr.prog_flags = load_attr->prog_flags; - - attr.func_info_rec_size = load_attr->func_info_rec_size; - attr.func_info_cnt = load_attr->func_info_cnt; - func_info = add_data(gen, load_attr->func_info, - attr.func_info_cnt * attr.func_info_rec_size); + /* convert blob insns to target endianness */ + if (gen->swapped_endian) { + struct bpf_insn *insn = gen->data_start + insns_off; + int i; - attr.line_info_rec_size = load_attr->line_info_rec_size; - attr.line_info_cnt = load_attr->line_info_cnt; - line_info = add_data(gen, load_attr->line_info, - attr.line_info_cnt * attr.line_info_rec_size); + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); + } - attr.core_relo_rec_size = sizeof(struct bpf_core_relo); - attr.core_relo_cnt = gen->core_relo_cnt; - core_relos = add_data(gen, gen->core_relos, - attr.core_relo_cnt * attr.core_relo_rec_size); + attr.prog_type = tgt_endian(prog_type); + attr.expected_attach_type = tgt_endian(load_attr->expected_attach_type); + attr.attach_btf_id = tgt_endian(load_attr->attach_btf_id); + attr.prog_ifindex = tgt_endian(load_attr->prog_ifindex); + attr.kern_version = 0; + attr.insn_cnt = tgt_endian((__u32)insn_cnt); + attr.prog_flags = tgt_endian(load_attr->prog_flags); + + attr.func_info_rec_size = tgt_endian(load_attr->func_info_rec_size); + attr.func_info_cnt = tgt_endian(load_attr->func_info_cnt); + func_info = add_data(gen, load_attr->func_info, func_info_tot_sz); + pr_debug("gen: prog_load: func_info: off %d cnt %d rec size %d\n", + func_info, load_attr->func_info_cnt, + load_attr->func_info_rec_size); + + attr.line_info_rec_size = tgt_endian(load_attr->line_info_rec_size); + attr.line_info_cnt = tgt_endian(load_attr->line_info_cnt); + line_info = add_data(gen, load_attr->line_info, line_info_tot_sz); + pr_debug("gen: prog_load: line_info: off %d cnt %d rec size %d\n", + line_info, load_attr->line_info_cnt, + load_attr->line_info_rec_size); + + attr.core_relo_rec_size = tgt_endian((__u32)sizeof(struct bpf_core_relo)); + attr.core_relo_cnt = tgt_endian(gen->core_relo_cnt); + core_relos = add_data(gen, gen->core_relos, core_relo_tot_sz); + pr_debug("gen: prog_load: core_relos: off %d cnt %d rec size %zd\n", + core_relos, gen->core_relo_cnt, + sizeof(struct bpf_core_relo)); + + /* convert all info blobs to target endianness */ + if (gen->swapped_endian) + info_blob_bswap(gen, func_info, line_info, core_relos, load_attr); libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); prog_load_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: prog_load: attr: off %d size %d\n", + prog_load_attr, attr_size); /* populate union bpf_attr with a pointer to license */ emit_rel_store(gen, attr_field(prog_load_attr, license), license_off); @@ -1040,7 +1120,6 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, int zero = 0; memset(&attr, 0, attr_size); - pr_debug("gen: map_update_elem: idx %d\n", map_idx); value = add_data(gen, pvalue, value_size); key = add_data(gen, &zero, sizeof(zero)); @@ -1068,6 +1147,8 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue, emit(gen, BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_update_elem: idx %d, value: off %d size %d, attr: off %d size %d\n", + map_idx, value, value_size, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1084,14 +1165,16 @@ void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slo int attr_size = offsetofend(union bpf_attr, flags); int map_update_attr, key; union bpf_attr attr; + int tgt_slot; memset(&attr, 0, attr_size); - pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", - outer_map_idx, slot, inner_map_idx); - key = add_data(gen, &slot, sizeof(slot)); + tgt_slot = tgt_endian(slot); + key = add_data(gen, &tgt_slot, sizeof(tgt_slot)); map_update_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: populate_outer_map: outer %d key %d inner %d, attr: off %d size %d\n", + outer_map_idx, slot, inner_map_idx, map_update_attr, attr_size); move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, blob_fd_array_off(gen, outer_map_idx)); emit_rel_store(gen, attr_field(map_update_attr, key), key); @@ -1112,8 +1195,9 @@ void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx) union bpf_attr attr; memset(&attr, 0, attr_size); - pr_debug("gen: map_freeze: idx %d\n", map_idx); map_freeze_attr = add_data(gen, &attr, attr_size); + pr_debug("gen: map_freeze: idx %d, attr: off %d size %d\n", + map_idx, map_freeze_attr, attr_size); move_blob2blob(gen, attr_field(map_freeze_attr, map_fd), 4, blob_fd_array_off(gen, map_idx)); /* emit MAP_FREEZE command */ diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/lib/bpf/hashmap.h +++ b/tools/lib/bpf/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 219facd0e66e..66173ddb5a2d 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -133,6 +133,7 @@ static const char * const attach_type_name[] = { [BPF_NETKIT_PRIMARY] = "netkit_primary", [BPF_NETKIT_PEER] = "netkit_peer", [BPF_TRACE_KPROBE_SESSION] = "trace_kprobe_session", + [BPF_TRACE_UPROBE_SESSION] = "trace_uprobe_session", }; static const char * const link_type_name[] = { @@ -694,6 +695,8 @@ struct bpf_object { /* Information when doing ELF related work. Only valid if efile.elf is not NULL */ struct elf_state efile; + unsigned char byteorder; + struct btf *btf; struct btf_ext *btf_ext; @@ -940,6 +943,20 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data, return 0; } +static void bpf_object_bswap_progs(struct bpf_object *obj) +{ + struct bpf_program *prog = obj->programs; + struct bpf_insn *insn; + int p, i; + + for (p = 0; p < obj->nr_programs; p++, prog++) { + insn = prog->insns; + for (i = 0; i < prog->insns_cnt; i++, insn++) + bpf_insn_bswap(insn); + } + pr_debug("converted %zu BPF programs to native byte order\n", obj->nr_programs); +} + static const struct btf_member * find_member_by_offset(const struct btf_type *t, __u32 bit_offset) { @@ -1506,6 +1523,7 @@ static void bpf_object__elf_finish(struct bpf_object *obj) elf_end(obj->efile.elf); obj->efile.elf = NULL; + obj->efile.ehdr = NULL; obj->efile.symbols = NULL; obj->efile.arena_data = NULL; @@ -1533,11 +1551,8 @@ static int bpf_object__elf_init(struct bpf_object *obj) } else { obj->efile.fd = open(obj->path, O_RDONLY | O_CLOEXEC); if (obj->efile.fd < 0) { - char errmsg[STRERR_BUFSIZE], *cp; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("elf: failed to open %s: %s\n", obj->path, cp); + pr_warn("elf: failed to open %s: %s\n", obj->path, errstr(err)); return err; } @@ -1571,6 +1586,16 @@ static int bpf_object__elf_init(struct bpf_object *obj) goto errout; } + /* Validate ELF object endianness... */ + if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB && + ehdr->e_ident[EI_DATA] != ELFDATA2MSB) { + err = -LIBBPF_ERRNO__ENDIAN; + pr_warn("elf: '%s' has unknown byte order\n", obj->path); + goto errout; + } + /* and save after bpf_object_open() frees ELF data */ + obj->byteorder = ehdr->e_ident[EI_DATA]; + if (elf_getshdrstrndx(elf, &obj->efile.shstrndx)) { pr_warn("elf: failed to get section names section index for %s: %s\n", obj->path, elf_errmsg(-1)); @@ -1599,19 +1624,15 @@ errout: return err; } -static int bpf_object__check_endianness(struct bpf_object *obj) +static bool is_native_endianness(struct bpf_object *obj) { #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2LSB) - return 0; + return obj->byteorder == ELFDATA2LSB; #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - if (obj->efile.ehdr->e_ident[EI_DATA] == ELFDATA2MSB) - return 0; + return obj->byteorder == ELFDATA2MSB; #else # error "Unrecognized __BYTE_ORDER__" #endif - pr_warn("elf: endianness mismatch in %s.\n", obj->path); - return -LIBBPF_ERRNO__ENDIAN; } static int @@ -1937,8 +1958,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("failed to alloc map '%s' content buffer: %d\n", - map->name, err); + pr_warn("failed to alloc map '%s' content buffer: %s\n", map->name, errstr(err)); zfree(&map->real_name); zfree(&map->name); return err; @@ -2102,7 +2122,7 @@ static int parse_u64(const char *value, __u64 *res) *res = strtoull(value, &value_end, 0); if (errno) { err = -errno; - pr_warn("failed to parse '%s' as integer: %d\n", value, err); + pr_warn("failed to parse '%s': %s\n", value, errstr(err)); return err; } if (*value_end) { @@ -2268,8 +2288,8 @@ static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) while (gzgets(file, buf, sizeof(buf))) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing system Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing system Kconfig line '%s': %s\n", + buf, errstr(err)); goto out; } } @@ -2289,15 +2309,15 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, file = fmemopen((void *)config, strlen(config), "r"); if (!file) { err = -errno; - pr_warn("failed to open in-memory Kconfig: %d\n", err); + pr_warn("failed to open in-memory Kconfig: %s\n", errstr(err)); return err; } while (fgets(buf, sizeof(buf), file)) { err = bpf_object__process_kconfig_line(obj, buf, data); if (err) { - pr_warn("error parsing in-memory Kconfig line '%s': %d\n", - buf, err); + pr_warn("error parsing in-memory Kconfig line '%s': %s\n", + buf, errstr(err)); break; } } @@ -3212,7 +3232,7 @@ static int bpf_object__init_btf(struct bpf_object *obj, err = libbpf_get_error(obj->btf); if (err) { obj->btf = NULL; - pr_warn("Error loading ELF section %s: %d.\n", BTF_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s.\n", BTF_ELF_SEC, errstr(err)); goto out; } /* enforce 8-byte pointers for BPF-targeted BTFs */ @@ -3230,8 +3250,8 @@ static int bpf_object__init_btf(struct bpf_object *obj, obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, btf_ext_data->d_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("Error loading ELF section %s: %d. Ignored and continue.\n", - BTF_EXT_ELF_SEC, err); + pr_warn("Error loading ELF section %s: %s. Ignored and continue.\n", + BTF_EXT_ELF_SEC, errstr(err)); obj->btf_ext = NULL; goto out; } @@ -3323,8 +3343,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, if (t->size == 0) { err = find_elf_sec_sz(obj, sec_name, &size); if (err || !size) { - pr_debug("sec '%s': failed to determine size from ELF: size %u, err %d\n", - sec_name, size, err); + pr_debug("sec '%s': failed to determine size from ELF: size %u, err %s\n", + sec_name, size, errstr(err)); return -ENOENT; } @@ -3478,7 +3498,7 @@ static int bpf_object__load_vmlinux_btf(struct bpf_object *obj, bool force) obj->btf_vmlinux = btf__load_vmlinux_btf(); err = libbpf_get_error(obj->btf_vmlinux); if (err) { - pr_warn("Error loading vmlinux BTF: %d\n", err); + pr_warn("Error loading vmlinux BTF: %s\n", errstr(err)); obj->btf_vmlinux = NULL; return err; } @@ -3581,11 +3601,14 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) report: if (err) { btf_mandatory = kernel_needs_btf(obj); - pr_warn("Error loading .BTF into kernel: %d. %s\n", err, - btf_mandatory ? "BTF is mandatory, can't proceed." - : "BTF is optional, ignoring."); - if (!btf_mandatory) + if (btf_mandatory) { + pr_warn("Error loading .BTF into kernel: %s. BTF is mandatory, can't proceed.\n", + errstr(err)); + } else { + pr_info("Error loading .BTF into kernel: %s. BTF is optional, ignoring.\n", + errstr(err)); err = 0; + } } return err; } @@ -3953,6 +3976,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj) return -LIBBPF_ERRNO__FORMAT; } + /* change BPF program insns to native endianness for introspection */ + if (!is_native_endianness(obj)) + bpf_object_bswap_progs(obj); + /* sort BPF programs by section name and in-section instruction offset * for faster search */ @@ -3985,7 +4012,7 @@ static bool sym_is_subprog(const Elf64_Sym *sym, int text_shndx) return true; /* global function */ - return bind == STB_GLOBAL && type == STT_FUNC; + return (bind == STB_GLOBAL || bind == STB_WEAK) && type == STT_FUNC; } static int find_extern_btf_id(const struct btf *btf, const char *ext_name) @@ -4389,7 +4416,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj) static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog) { - return prog->sec_idx == obj->efile.text_shndx && obj->nr_programs > 1; + return prog->sec_idx == obj->efile.text_shndx; } struct bpf_program * @@ -4783,8 +4810,8 @@ static int bpf_get_map_info_from_fdinfo(int fd, struct bpf_map_info *info) fp = fopen(file, "re"); if (!fp) { err = -errno; - pr_warn("failed to open %s: %d. No procfs support?\n", file, - err); + pr_warn("failed to open %s: %s. No procfs support?\n", file, + errstr(err)); return err; } @@ -4939,8 +4966,8 @@ static int bpf_object_prepare_token(struct bpf_object *obj) bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); if (bpffs_fd < 0) { err = -errno; - __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", - obj->name, err, bpffs_path, + __pr(level, "object '%s': failed (%s) to open BPF FS mount at '%s'%s\n", + obj->name, errstr(err), bpffs_path, mandatory ? "" : ", skipping optional step..."); return mandatory ? err : 0; } @@ -4974,7 +5001,6 @@ static int bpf_object_prepare_token(struct bpf_object *obj) static int bpf_object__probe_loading(struct bpf_object *obj) { - char *cp, errmsg[STRERR_BUFSIZE]; struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), @@ -4990,7 +5016,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bump_rlimit_memlock(); if (ret) - pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); + pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %s), you might need to do it explicitly!\n", + errstr(ret)); /* make sure basic loading works */ ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); @@ -4998,11 +5025,8 @@ bpf_object__probe_loading(struct bpf_object *obj) ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF " - "program. Make sure your kernel supports BPF " - "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is " - "set to big enough value.\n", __func__, cp, ret); + pr_warn("Error in %s(): %s. Couldn't load trivial BPF program. Make sure your kernel supports BPF (CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is set to big enough value.\n", + __func__, errstr(ret)); return -ret; } close(ret); @@ -5027,7 +5051,6 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) { struct bpf_map_info map_info; - char msg[STRERR_BUFSIZE]; __u32 map_info_len = sizeof(map_info); int err; @@ -5037,7 +5060,7 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) err = bpf_get_map_info_from_fdinfo(map_fd, &map_info); if (err) { pr_warn("failed to get map info for map FD %d: %s\n", map_fd, - libbpf_strerror_r(errno, msg, sizeof(msg))); + errstr(err)); return false; } @@ -5052,7 +5075,6 @@ static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) static int bpf_object__reuse_map(struct bpf_map *map) { - char *cp, errmsg[STRERR_BUFSIZE]; int err, pin_fd; pin_fd = bpf_obj_get(map->pin_path); @@ -5064,9 +5086,8 @@ bpf_object__reuse_map(struct bpf_map *map) return 0; } - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); pr_warn("couldn't retrieve pinned map '%s': %s\n", - map->pin_path, cp); + map->pin_path, errstr(err)); return err; } @@ -5092,8 +5113,8 @@ static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { enum libbpf_map_type map_type = map->libbpf_type; - char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; + size_t mmap_sz; if (obj->gen_loader) { bpf_gen__map_update_elem(obj->gen_loader, map - obj->maps, @@ -5106,9 +5127,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error setting initial map(%s) contents: %s\n", - map->name, cp); + pr_warn("map '%s': failed to set initial contents: %s\n", + bpf_map__name(map), errstr(err)); return err; } @@ -5117,12 +5137,43 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_freeze(map->fd); if (err) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error freezing map(%s) as read-only: %s\n", - map->name, cp); + pr_warn("map '%s': failed to freeze as read-only: %s\n", + bpf_map__name(map), errstr(err)); return err; } } + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure. + */ + mmap_sz = bpf_map_mmap_sz(map); + if (map->def.map_flags & BPF_F_MMAPABLE) { + void *mmaped; + int prot; + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map->fd, 0); + if (mmaped == MAP_FAILED) { + err = -errno; + pr_warn("map '%s': failed to re-mmap() contents: %s\n", + bpf_map__name(map), errstr(err)); + return err; + } + map->mmaped = mmaped; + } else if (map->mmaped) { + munmap(map->mmaped, mmap_sz); + map->mmaped = NULL; + } + return 0; } @@ -5171,8 +5222,8 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { - pr_warn("map '%s': failed to create inner map: %d\n", - map->name, err); + pr_warn("map '%s': failed to create inner map: %s\n", + map->name, errstr(err)); return err; } map->inner_map_fd = map->inner_map->fd; @@ -5226,12 +5277,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b def->max_entries, &create_attr); } if (map_fd < 0 && (create_attr.btf_key_type_id || create_attr.btf_value_type_id)) { - char *cp, errmsg[STRERR_BUFSIZE]; - err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", - map->name, cp, err); + pr_warn("Error in bpf_create_map_xattr(%s): %s. Retrying without BTF.\n", + map->name, errstr(err)); create_attr.btf_fd = 0; create_attr.btf_key_type_id = 0; create_attr.btf_value_type_id = 0; @@ -5286,8 +5334,8 @@ static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map) } if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", - map->name, i, targ_map->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %s\n", + map->name, i, targ_map->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -5319,8 +5367,8 @@ static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) err = bpf_map_update_elem(map->fd, &i, &fd, 0); if (err) { err = -errno; - pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", - map->name, i, targ_prog->name, fd, err); + pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %s\n", + map->name, i, targ_prog->name, fd, errstr(err)); return err; } pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", @@ -5373,7 +5421,6 @@ static int bpf_object__create_maps(struct bpf_object *obj) { struct bpf_map *map; - char *cp, errmsg[STRERR_BUFSIZE]; unsigned int i, j; int err; bool retried; @@ -5439,8 +5486,7 @@ retry: err = bpf_object__populate_internal_map(obj, map); if (err < 0) goto err_out; - } - if (map->def.type == BPF_MAP_TYPE_ARENA) { + } else if (map->def.type == BPF_MAP_TYPE_ARENA) { map->mmaped = mmap((void *)(long)map->map_extra, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, map->map_extra ? MAP_SHARED | MAP_FIXED : MAP_SHARED, @@ -5448,8 +5494,8 @@ retry: if (map->mmaped == MAP_FAILED) { err = -errno; map->mmaped = NULL; - pr_warn("map '%s': failed to mmap arena: %d\n", - map->name, err); + pr_warn("map '%s': failed to mmap arena: %s\n", + map->name, errstr(err)); return err; } if (obj->arena_data) { @@ -5471,8 +5517,8 @@ retry: retried = true; goto retry; } - pr_warn("map '%s': failed to auto-pin at '%s': %d\n", - map->name, map->pin_path, err); + pr_warn("map '%s': failed to auto-pin at '%s': %s\n", + map->name, map->pin_path, errstr(err)); goto err_out; } } @@ -5481,8 +5527,7 @@ retry: return 0; err_out: - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err); + pr_warn("map '%s': failed to create: %s\n", map->name, errstr(err)); pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); @@ -5606,7 +5651,7 @@ static int load_module_btfs(struct bpf_object *obj) } if (err) { err = -errno; - pr_warn("failed to iterate BTF objects: %d\n", err); + pr_warn("failed to iterate BTF objects: %s\n", errstr(err)); return err; } @@ -5615,7 +5660,7 @@ static int load_module_btfs(struct bpf_object *obj) if (errno == ENOENT) continue; /* expected race: BTF was unloaded */ err = -errno; - pr_warn("failed to get BTF object #%d FD: %d\n", id, err); + pr_warn("failed to get BTF object #%d FD: %s\n", id, errstr(err)); return err; } @@ -5627,7 +5672,7 @@ static int load_module_btfs(struct bpf_object *obj) err = bpf_btf_get_info_by_fd(fd, &info, &len); if (err) { err = -errno; - pr_warn("failed to get BTF object #%d info: %d\n", id, err); + pr_warn("failed to get BTF object #%d info: %s\n", id, errstr(err)); goto err_out; } @@ -5640,8 +5685,8 @@ static int load_module_btfs(struct bpf_object *obj) btf = btf_get_from_fd(fd, obj->btf_vmlinux); err = libbpf_get_error(btf); if (err) { - pr_warn("failed to load module [%s]'s BTF object #%d: %d\n", - name, id, err); + pr_warn("failed to load module [%s]'s BTF object #%d: %s\n", + name, id, errstr(err)); goto err_out; } @@ -5870,7 +5915,7 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) obj->btf_vmlinux_override = btf__parse(targ_btf_path, NULL); err = libbpf_get_error(obj->btf_vmlinux_override); if (err) { - pr_warn("failed to parse target BTF: %d\n", err); + pr_warn("failed to parse target BTF: %s\n", errstr(err)); return err; } } @@ -5930,8 +5975,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = record_relo_core(prog, rec, insn_idx); if (err) { - pr_warn("prog '%s': relo #%d: failed to record relocation: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to record relocation: %s\n", + prog->name, i, errstr(err)); goto out; } @@ -5940,15 +5985,15 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path) err = bpf_core_resolve_relo(prog, rec, i, obj->btf, cand_cache, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", - prog->name, i, err); + pr_warn("prog '%s': relo #%d: failed to relocate: %s\n", + prog->name, i, errstr(err)); goto out; } err = bpf_core_patch_insn(prog->name, insn, insn_idx, rec, i, &targ_res); if (err) { - pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %d\n", - prog->name, i, insn_idx, err); + pr_warn("prog '%s': relo #%d: failed to patch insn #%u: %s\n", + prog->name, i, insn_idx, errstr(err)); goto out; } } @@ -6216,8 +6261,8 @@ reloc_prog_func_and_line_info(const struct bpf_object *obj, &main_prog->func_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext function info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext function info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->func_info) { @@ -6244,8 +6289,8 @@ line_info: &main_prog->line_info_rec_size); if (err) { if (err != -ENOENT) { - pr_warn("prog '%s': error relocating .BTF.ext line info: %d\n", - prog->name, err); + pr_warn("prog '%s': error relocating .BTF.ext line info: %s\n", + prog->name, errstr(err)); return err; } if (main_prog->line_info) { @@ -7009,8 +7054,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat if (obj->btf_ext) { err = bpf_object__relocate_core(obj, targ_btf_path); if (err) { - pr_warn("failed to perform CO-RE relocations: %d\n", - err); + pr_warn("failed to perform CO-RE relocations: %s\n", + errstr(err)); return err; } bpf_object__sort_relos(obj); @@ -7054,8 +7099,8 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat err = bpf_object__relocate_calls(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate calls: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate calls: %s\n", + prog->name, errstr(err)); return err; } @@ -7091,16 +7136,16 @@ static int bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_pat /* Process data relos for main programs */ err = bpf_object__relocate_data(obj, prog); if (err) { - pr_warn("prog '%s': failed to relocate data references: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to relocate data references: %s\n", + prog->name, errstr(err)); return err; } /* Fix up .BTF.ext information, if necessary */ err = bpf_program_fixup_func_info(obj, prog); if (err) { - pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to perform .BTF.ext fix ups: %s\n", + prog->name, errstr(err)); return err; } } @@ -7352,8 +7397,14 @@ static int libbpf_prepare_prog_load(struct bpf_program *prog, opts->prog_flags |= BPF_F_XDP_HAS_FRAGS; /* special check for usdt to use uprobe_multi link */ - if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) + if ((def & SEC_USDT) && kernel_supports(prog->obj, FEAT_UPROBE_MULTI_LINK)) { + /* for BPF_TRACE_UPROBE_MULTI, user might want to query expected_attach_type + * in prog, and expected_attach_type we set in kernel is from opts, so we + * update both. + */ prog->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + opts->expected_attach_type = BPF_TRACE_UPROBE_MULTI; + } if ((def & SEC_ATTACH_BTF) && !prog->attach_btf_id) { int btf_obj_fd = 0, btf_type_id = 0, err; @@ -7403,7 +7454,6 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog { LIBBPF_OPTS(bpf_prog_load_opts, load_attr); const char *prog_name = NULL; - char *cp, errmsg[STRERR_BUFSIZE]; size_t log_buf_size = 0; char *log_buf = NULL, *tmp; bool own_log_buf = true; @@ -7443,6 +7493,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.expected_attach_type = prog->expected_attach_type; /* specify func_info/line_info only if kernel supports them */ if (obj->btf && btf__fd(obj->btf) >= 0 && kernel_supports(obj, FEAT_BTF_FUNC)) { @@ -7466,17 +7517,14 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to prepare load attributes: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to prepare load attributes: %s\n", + prog->name, errstr(err)); return err; } insns = prog->insns; insns_cnt = prog->insns_cnt; } - /* allow prog_prepare_load_fn to change expected_attach_type */ - load_attr.expected_attach_type = prog->expected_attach_type; - if (obj->gen_loader) { bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, license, insns, insns_cnt, &load_attr, @@ -7534,9 +7582,8 @@ retry_load: continue; if (bpf_prog_bind_map(ret, map->fd, NULL)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("prog '%s': failed to bind map '%s': %s\n", - prog->name, map->real_name, cp); + prog->name, map->real_name, errstr(errno)); /* Don't fail hard if can't bind rodata. */ } } @@ -7566,8 +7613,7 @@ retry_load: /* post-process verifier log to improve error descriptions */ fixup_verifier_log(prog, log_buf, log_buf_size); - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp); + pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, errstr(errno)); pr_perm_msg(ret); if (own_log_buf && log_buf && log_buf[0] != '\0') { @@ -7860,7 +7906,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) err = bpf_object_load_prog(obj, prog, prog->insns, prog->insns_cnt, obj->license, obj->kern_version, &prog->fd); if (err) { - pr_warn("prog '%s': failed to load: %d\n", prog->name, err); + pr_warn("prog '%s': failed to load: %s\n", prog->name, errstr(err)); return err; } } @@ -7894,8 +7940,8 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object if (prog->sec_def->prog_setup_fn) { err = prog->sec_def->prog_setup_fn(prog, prog->sec_def->cookie); if (err < 0) { - pr_warn("prog '%s': failed to initialize: %d\n", - prog->name, err); + pr_warn("prog '%s': failed to initialize: %s\n", + prog->name, errstr(err)); return err; } } @@ -7992,7 +8038,6 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, } err = bpf_object__elf_init(obj); - err = err ? : bpf_object__check_endianness(obj); err = err ? : bpf_object__elf_collect(obj); err = err ? : bpf_object__collect_externs(obj); err = err ? : bpf_object_fixup_btf(obj); @@ -8085,7 +8130,7 @@ static int libbpf_kallsyms_parse(kallsyms_cb_t cb, void *ctx) f = fopen("/proc/kallsyms", "re"); if (!f) { err = -errno; - pr_warn("failed to open /proc/kallsyms: %d\n", err); + pr_warn("failed to open /proc/kallsyms: %s\n", errstr(err)); return err; } @@ -8498,8 +8543,15 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch return libbpf_err(-EINVAL); } - if (obj->gen_loader) + /* Disallow kernel loading programs of non-native endianness but + * permit cross-endian creation of "light skeleton". + */ + if (obj->gen_loader) { bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); + } else if (!is_native_endianness(obj)) { + pr_warn("object '%s': loading non-native endianness is unsupported\n", obj->name); + return libbpf_err(-LIBBPF_ERRNO__ENDIAN); + } err = bpf_object_prepare_token(obj); err = err ? : bpf_object__probe_loading(obj); @@ -8562,7 +8614,6 @@ int bpf_object__load(struct bpf_object *obj) static int make_parent_dir(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; char *dname, *dir; int err = 0; @@ -8576,15 +8627,13 @@ static int make_parent_dir(const char *path) free(dname); if (err) { - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to mkdir %s: %s\n", path, cp); + pr_warn("failed to mkdir %s: %s\n", path, errstr(err)); } return err; } static int check_path(const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; struct statfs st_fs; char *dname, *dir; int err = 0; @@ -8598,8 +8647,7 @@ static int check_path(const char *path) dir = dirname(dname); if (statfs(dir, &st_fs)) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); - pr_warn("failed to statfs %s: %s\n", dir, cp); + pr_warn("failed to statfs %s: %s\n", dir, errstr(errno)); err = -errno; } free(dname); @@ -8614,7 +8662,6 @@ static int check_path(const char *path) int bpf_program__pin(struct bpf_program *prog, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (prog->fd < 0) { @@ -8632,8 +8679,7 @@ int bpf_program__pin(struct bpf_program *prog, const char *path) if (bpf_obj_pin(prog->fd, path)) { err = -errno; - cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); - pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, cp); + pr_warn("prog '%s': failed to pin at '%s': %s\n", prog->name, path, errstr(err)); return libbpf_err(err); } @@ -8664,7 +8710,6 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path) int bpf_map__pin(struct bpf_map *map, const char *path) { - char *cp, errmsg[STRERR_BUFSIZE]; int err; if (map == NULL) { @@ -8723,8 +8768,7 @@ int bpf_map__pin(struct bpf_map *map, const char *path) return 0; out_err: - cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); - pr_warn("failed to pin map: %s\n", cp); + pr_warn("failed to pin map: %s\n", errstr(err)); return libbpf_err(err); } @@ -9096,6 +9140,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) if (!gen) return -ENOMEM; gen->opts = opts; + gen->swapped_endian = !is_native_endianness(obj); obj->gen_loader = gen; return 0; } @@ -9370,8 +9415,10 @@ static const struct bpf_sec_def section_defs[] = { SEC_DEF("kprobe.session+", KPROBE, BPF_TRACE_KPROBE_SESSION, SEC_NONE, attach_kprobe_session), SEC_DEF("uprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), SEC_DEF("uretprobe.multi+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_NONE, attach_uprobe_multi), + SEC_DEF("uprobe.session+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_NONE, attach_uprobe_multi), SEC_DEF("uprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("uretprobe.multi.s+", KPROBE, BPF_TRACE_UPROBE_MULTI, SEC_SLEEPABLE, attach_uprobe_multi), + SEC_DEF("uprobe.session.s+", KPROBE, BPF_TRACE_UPROBE_SESSION, SEC_SLEEPABLE, attach_uprobe_multi), SEC_DEF("ksyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("kretsyscall+", KPROBE, 0, SEC_NONE, attach_ksyscall), SEC_DEF("usdt+", KPROBE, 0, SEC_USDT, attach_usdt), @@ -9910,8 +9957,8 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) memset(&info, 0, info_len); err = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len); if (err) { - pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %d\n", - attach_prog_fd, err); + pr_warn("failed bpf_prog_get_info_by_fd for FD %d: %s\n", + attach_prog_fd, errstr(err)); return err; } @@ -9923,7 +9970,7 @@ static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) btf = btf__load_from_kernel_by_id(info.btf_id); err = libbpf_get_error(btf); if (err) { - pr_warn("Failed to get BTF %d of the program: %d\n", info.btf_id, err); + pr_warn("Failed to get BTF %d of the program: %s\n", info.btf_id, errstr(err)); goto out; } err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); @@ -10005,8 +10052,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac } err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd); if (err < 0) { - pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n", - prog->name, attach_prog_fd, attach_name, err); + pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %s\n", + prog->name, attach_prog_fd, attach_name, errstr(err)); return err; } *btf_obj_fd = 0; @@ -10025,8 +10072,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac btf_type_id); } if (err) { - pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", - prog->name, attach_name, err); + pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %s\n", + prog->name, attach_name, errstr(err)); return err; } return 0; @@ -10254,14 +10301,14 @@ int bpf_map__set_value_size(struct bpf_map *map, __u32 size) mmap_new_sz = array_map_mmap_sz(size, map->def.max_entries); err = bpf_map_mmap_resize(map, mmap_old_sz, mmap_new_sz); if (err) { - pr_warn("map '%s': failed to resize memory-mapped region: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to resize memory-mapped region: %s\n", + bpf_map__name(map), errstr(err)); return err; } err = map_btf_datasec_resize(map, size); if (err && err != -ENOENT) { - pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %d\n", - bpf_map__name(map), err); + pr_warn("map '%s': failed to adjust resized BTF, clearing BTF key/value info: %s\n", + bpf_map__name(map), errstr(err)); map->btf_value_type_id = 0; map->btf_key_type_id = 0; } @@ -10752,7 +10799,6 @@ static void bpf_link_perf_dealloc(struct bpf_link *link) struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *prog, int pfd, const struct bpf_perf_event_opts *opts) { - char errmsg[STRERR_BUFSIZE]; struct bpf_link_perf *link; int prog_fd, link_fd = -1, err; bool force_ioctl_attach; @@ -10787,9 +10833,8 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p link_fd = bpf_link_create(prog_fd, pfd, BPF_PERF_EVENT, &link_opts); if (link_fd < 0) { err = -errno; - pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %d (%s)\n", - prog->name, pfd, - err, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + pr_warn("prog '%s': failed to create BPF link for perf_event FD %d: %s\n", + prog->name, pfd, errstr(err)); goto err_out; } link->link.fd = link_fd; @@ -10803,7 +10848,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { err = -errno; pr_warn("prog '%s': failed to attach to perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); if (err == -EPROTO) pr_warn("prog '%s': try add PERF_SAMPLE_CALLCHAIN to or remove exclude_callchain_[kernel|user] from pfd %d\n", prog->name, pfd); @@ -10814,7 +10859,7 @@ struct bpf_link *bpf_program__attach_perf_event_opts(const struct bpf_program *p if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("prog '%s': failed to enable perf_event FD %d: %s\n", - prog->name, pfd, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, pfd, errstr(err)); goto err_out; } @@ -10838,22 +10883,19 @@ struct bpf_link *bpf_program__attach_perf_event(const struct bpf_program *prog, */ static int parse_uint_from_file(const char *file, const char *fmt) { - char buf[STRERR_BUFSIZE]; int err, ret; FILE *f; f = fopen(file, "re"); if (!f) { err = -errno; - pr_debug("failed to open '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to open '%s': %s\n", file, errstr(err)); return err; } err = fscanf(f, fmt, &ret); if (err != 1) { err = err == EOF ? -EIO : -errno; - pr_debug("failed to parse '%s': %s\n", file, - libbpf_strerror_r(err, buf, sizeof(buf))); + pr_debug("failed to parse '%s': %s\n", file, errstr(err)); fclose(f); return err; } @@ -10897,7 +10939,6 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd; if ((__u64)ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS)) @@ -10910,7 +10951,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (type < 0) { pr_warn("failed to determine %s perf type: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(type, errmsg, sizeof(errmsg))); + errstr(type)); return type; } if (retprobe) { @@ -10920,7 +10961,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name, if (bit < 0) { pr_warn("failed to determine %s retprobe bit: %s\n", uprobe ? "uprobe" : "kprobe", - libbpf_strerror_r(bit, errmsg, sizeof(errmsg))); + errstr(bit)); return bit; } attr.config |= 1 << bit; @@ -11049,14 +11090,13 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int type, pfd, err; err = add_kprobe_event_legacy(probe_name, retprobe, kfunc_name, offset); if (err < 0) { pr_warn("failed to add legacy kprobe event for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } type = determine_kprobe_perf_type_legacy(probe_name, retprobe); @@ -11064,7 +11104,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, err = type; pr_warn("failed to determine legacy kprobe event id for '%s+0x%zx': %s\n", kfunc_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } @@ -11080,7 +11120,7 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe, if (pfd < 0) { err = -errno; pr_warn("legacy kprobe perf_event_open() failed: %s\n", - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } return pfd; @@ -11156,7 +11196,6 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; - char errmsg[STRERR_BUFSIZE]; char *legacy_probe = NULL; struct bpf_link *link; size_t offset; @@ -11214,7 +11253,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to create %s '%s+0x%zx' perf event: %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -11224,7 +11263,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog, pr_warn("prog '%s': failed to attach to %s '%s+0x%zx': %s\n", prog->name, retprobe ? "kretprobe" : "kprobe", func_name, offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -11360,7 +11399,7 @@ static int libbpf_available_kallsyms_parse(struct kprobe_multi_resolve *res) f = fopen(available_functions_file, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_functions_file, err); + pr_warn("failed to open %s: %s\n", available_functions_file, errstr(err)); return err; } @@ -11435,7 +11474,7 @@ static int libbpf_available_kprobes_parse(struct kprobe_multi_resolve *res) f = fopen(available_path, "re"); if (!f) { err = -errno; - pr_warn("failed to open %s: %d\n", available_path, err); + pr_warn("failed to open %s: %s\n", available_path, errstr(err)); return err; } @@ -11481,7 +11520,6 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, }; enum bpf_attach_type attach_type; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; const unsigned long *addrs; int err, link_fd, prog_fd; bool retprobe, session; @@ -11549,7 +11587,7 @@ bpf_program__attach_kprobe_multi_opts(const struct bpf_program *prog, if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -11693,7 +11731,9 @@ static int attach_uprobe_multi(const struct bpf_program *prog, long cookie, stru ret = 0; break; case 3: + opts.session = str_has_pfx(probe_type, "uprobe.session"); opts.retprobe = str_has_pfx(probe_type, "uretprobe.multi"); + *link = bpf_program__attach_uprobe_multi(prog, -1, binary_path, func_name, &opts); ret = libbpf_get_error(*link); break; @@ -11756,15 +11796,15 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, err = add_uprobe_event_legacy(probe_name, retprobe, binary_path, offset); if (err < 0) { - pr_warn("failed to add legacy uprobe event for %s:0x%zx: %d\n", - binary_path, (size_t)offset, err); + pr_warn("failed to add legacy uprobe event for %s:0x%zx: %s\n", + binary_path, (size_t)offset, errstr(err)); return err; } type = determine_uprobe_perf_type_legacy(probe_name, retprobe); if (type < 0) { err = type; - pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %d\n", - binary_path, offset, err); + pr_warn("failed to determine legacy uprobe event id for %s:0x%zx: %s\n", + binary_path, offset, errstr(err)); goto err_clean_legacy; } @@ -11779,7 +11819,7 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe, -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC); if (pfd < 0) { err = -errno; - pr_warn("legacy uprobe perf_event_open() failed: %d\n", err); + pr_warn("legacy uprobe perf_event_open() failed: %s\n", errstr(err)); goto err_clean_legacy; } return pfd; @@ -11942,10 +11982,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, const unsigned long *ref_ctr_offsets = NULL, *offsets = NULL; LIBBPF_OPTS(bpf_link_create_opts, lopts); unsigned long *resolved_offsets = NULL; + enum bpf_attach_type attach_type; int err = 0, link_fd, prog_fd; struct bpf_link *link = NULL; - char errmsg[STRERR_BUFSIZE]; char full_path[PATH_MAX]; + bool retprobe, session; const __u64 *cookies; const char **syms; size_t cnt; @@ -11965,6 +12006,8 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, ref_ctr_offsets = OPTS_GET(opts, ref_ctr_offsets, NULL); cookies = OPTS_GET(opts, cookies, NULL); cnt = OPTS_GET(opts, cnt, 0); + retprobe = OPTS_GET(opts, retprobe, false); + session = OPTS_GET(opts, session, false); /* * User can specify 2 mutually exclusive set of inputs: @@ -11993,12 +12036,15 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, return libbpf_err_ptr(-EINVAL); } + if (retprobe && session) + return libbpf_err_ptr(-EINVAL); + if (func_pattern) { if (!strchr(path, '/')) { err = resolve_full_path(path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, path, errstr(err)); return libbpf_err_ptr(err); } path = full_path; @@ -12016,12 +12062,14 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, offsets = resolved_offsets; } + attach_type = session ? BPF_TRACE_UPROBE_SESSION : BPF_TRACE_UPROBE_MULTI; + lopts.uprobe_multi.path = path; lopts.uprobe_multi.offsets = offsets; lopts.uprobe_multi.ref_ctr_offsets = ref_ctr_offsets; lopts.uprobe_multi.cookies = cookies; lopts.uprobe_multi.cnt = cnt; - lopts.uprobe_multi.flags = OPTS_GET(opts, retprobe, false) ? BPF_F_UPROBE_MULTI_RETURN : 0; + lopts.uprobe_multi.flags = retprobe ? BPF_F_UPROBE_MULTI_RETURN : 0; if (pid == 0) pid = getpid(); @@ -12035,11 +12083,11 @@ bpf_program__attach_uprobe_multi(const struct bpf_program *prog, } link->detach = &bpf_link__detach_fd; - link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &lopts); + link_fd = bpf_link_create(prog_fd, 0, attach_type, &lopts); if (link_fd < 0) { err = -errno; pr_warn("prog '%s': failed to attach multi-uprobe: %s\n", - prog->name, libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + prog->name, errstr(err)); goto error; } link->fd = link_fd; @@ -12058,7 +12106,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, const struct bpf_uprobe_opts *opts) { const char *archive_path = NULL, *archive_sep = NULL; - char errmsg[STRERR_BUFSIZE], *legacy_probe = NULL; + char *legacy_probe = NULL; DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); enum probe_attach_mode attach_mode; char full_path[PATH_MAX]; @@ -12090,8 +12138,8 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, } else if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, full_path, sizeof(full_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = full_path; @@ -12157,7 +12205,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to create %s '%s:0x%zx' perf event: %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_out; } @@ -12168,7 +12216,7 @@ bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid, pr_warn("prog '%s': failed to attach to %s '%s:0x%zx': %s\n", prog->name, retprobe ? "uretprobe" : "uprobe", binary_path, func_offset, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); goto err_clean_legacy; } if (legacy) { @@ -12289,8 +12337,8 @@ struct bpf_link *bpf_program__attach_usdt(const struct bpf_program *prog, if (!strchr(binary_path, '/')) { err = resolve_full_path(binary_path, resolved_path, sizeof(resolved_path)); if (err) { - pr_warn("prog '%s': failed to resolve full path for '%s': %d\n", - prog->name, binary_path, err); + pr_warn("prog '%s': failed to resolve full path for '%s': %s\n", + prog->name, binary_path, errstr(err)); return libbpf_err_ptr(err); } binary_path = resolved_path; @@ -12368,14 +12416,13 @@ static int perf_event_open_tracepoint(const char *tp_category, { const size_t attr_sz = sizeof(struct perf_event_attr); struct perf_event_attr attr; - char errmsg[STRERR_BUFSIZE]; int tp_id, pfd, err; tp_id = determine_tracepoint_id(tp_category, tp_name); if (tp_id < 0) { pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", tp_category, tp_name, - libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); + errstr(tp_id)); return tp_id; } @@ -12390,7 +12437,7 @@ static int perf_event_open_tracepoint(const char *tp_category, err = -errno; pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return err; } return pfd; @@ -12402,7 +12449,6 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p const struct bpf_tracepoint_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, pe_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int pfd, err; @@ -12415,7 +12461,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p if (pfd < 0) { pr_warn("prog '%s': failed to create tracepoint '%s/%s' perf event: %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + errstr(pfd)); return libbpf_err_ptr(pfd); } link = bpf_program__attach_perf_event_opts(prog, pfd, &pe_opts); @@ -12424,7 +12470,7 @@ struct bpf_link *bpf_program__attach_tracepoint_opts(const struct bpf_program *p close(pfd); pr_warn("prog '%s': failed to attach to tracepoint '%s/%s': %s\n", prog->name, tp_category, tp_name, - libbpf_strerror_r(err, errmsg, sizeof(errmsg))); + errstr(err)); return libbpf_err_ptr(err); } return link; @@ -12475,7 +12521,6 @@ bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, struct bpf_raw_tracepoint_opts *opts) { LIBBPF_OPTS(bpf_raw_tp_opts, raw_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; @@ -12500,7 +12545,7 @@ bpf_program__attach_raw_tracepoint_opts(const struct bpf_program *prog, pfd = -errno; free(link); pr_warn("prog '%s': failed to attach to raw tracepoint '%s': %s\n", - prog->name, tp_name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, tp_name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; @@ -12559,7 +12604,6 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro const struct bpf_trace_opts *opts) { LIBBPF_OPTS(bpf_link_create_opts, link_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, pfd; @@ -12584,7 +12628,7 @@ static struct bpf_link *bpf_program__attach_btf_id(const struct bpf_program *pro pfd = -errno; free(link); pr_warn("prog '%s': failed to attach: %s\n", - prog->name, libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); + prog->name, errstr(pfd)); return libbpf_err_ptr(pfd); } link->fd = pfd; @@ -12625,7 +12669,6 @@ bpf_program_attach_fd(const struct bpf_program *prog, const struct bpf_link_create_opts *opts) { enum bpf_attach_type attach_type; - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; @@ -12647,7 +12690,7 @@ bpf_program_attach_fd(const struct bpf_program *prog, free(link); pr_warn("prog '%s': failed to attach to %s: %s\n", prog->name, target_name, - libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12760,7 +12803,7 @@ struct bpf_link *bpf_program__attach_freplace(const struct bpf_program *prog, } if (prog->type != BPF_PROG_TYPE_EXT) { - pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace", + pr_warn("prog '%s': only BPF_PROG_TYPE_EXT can attach as freplace\n", prog->name); return libbpf_err_ptr(-EINVAL); } @@ -12789,7 +12832,6 @@ bpf_program__attach_iter(const struct bpf_program *prog, const struct bpf_iter_attach_opts *opts) { DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); - char errmsg[STRERR_BUFSIZE]; struct bpf_link *link; int prog_fd, link_fd; __u32 target_fd = 0; @@ -12817,7 +12859,7 @@ bpf_program__attach_iter(const struct bpf_program *prog, link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to iterator: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -12859,12 +12901,10 @@ struct bpf_link *bpf_program__attach_netfilter(const struct bpf_program *prog, link_fd = bpf_link_create(prog_fd, 0, BPF_NETFILTER, &lopts); if (link_fd < 0) { - char errmsg[STRERR_BUFSIZE]; - link_fd = -errno; free(link); pr_warn("prog '%s': failed to attach to netfilter: %s\n", - prog->name, libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg))); + prog->name, errstr(link_fd)); return libbpf_err_ptr(link_fd); } link->fd = link_fd; @@ -13149,7 +13189,6 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, int cpu, int map_key) { struct perf_cpu_buf *cpu_buf; - char msg[STRERR_BUFSIZE]; int err; cpu_buf = calloc(1, sizeof(*cpu_buf)); @@ -13165,7 +13204,7 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, if (cpu_buf->fd < 0) { err = -errno; pr_warn("failed to open perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -13176,14 +13215,14 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr, cpu_buf->base = NULL; err = -errno; pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) { err = -errno; pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", - cpu, libbpf_strerror_r(err, msg, sizeof(msg))); + cpu, errstr(err)); goto error; } @@ -13259,7 +13298,6 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, { const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map; - char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; bool *online = NULL; __u32 map_info_len; @@ -13282,7 +13320,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, */ if (err != -EINVAL) { pr_warn("failed to get map info for map FD %d: %s\n", - map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); + map_fd, errstr(err)); return ERR_PTR(err); } pr_debug("failed to get map info for FD %d; API not supported? Ignoring...\n", @@ -13312,7 +13350,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, if (pb->epoll_fd < 0) { err = -errno; pr_warn("failed to create epoll instance: %s\n", - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13343,7 +13381,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = parse_cpu_mask_file(online_cpus_file, &online, &n); if (err) { - pr_warn("failed to get online CPU mask: %d\n", err); + pr_warn("failed to get online CPU mask: %s\n", errstr(err)); goto error; } @@ -13374,7 +13412,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", cpu, map_key, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } @@ -13385,7 +13423,7 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, - libbpf_strerror_r(err, msg, sizeof(msg))); + errstr(err)); goto error; } j++; @@ -13480,7 +13518,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("error while processing records: %d\n", err); + pr_warn("error while processing records: %s\n", errstr(err)); return libbpf_err(err); } } @@ -13564,7 +13602,8 @@ int perf_buffer__consume(struct perf_buffer *pb) err = perf_buffer__process_records(pb, cpu_buf); if (err) { - pr_warn("perf_buffer: failed to process records in buffer #%d: %d\n", i, err); + pr_warn("perf_buffer: failed to process records in buffer #%d: %s\n", + i, errstr(err)); return libbpf_err(err); } } @@ -13675,14 +13714,14 @@ int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) fd = open(fcpu, O_RDONLY | O_CLOEXEC); if (fd < 0) { err = -errno; - pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + pr_warn("Failed to open cpu mask file %s: %s\n", fcpu, errstr(err)); return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { err = len ? -errno : -EINVAL; - pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + pr_warn("Failed to read cpu mask from %s: %s\n", fcpu, errstr(err)); return err; } if (len >= sizeof(buf)) { @@ -13774,20 +13813,21 @@ int bpf_object__open_skeleton(struct bpf_object_skeleton *s, obj = bpf_object_open(NULL, s->data, s->data_sz, s->name, opts); if (IS_ERR(obj)) { err = PTR_ERR(obj); - pr_warn("failed to initialize skeleton BPF object '%s': %d\n", s->name, err); + pr_warn("failed to initialize skeleton BPF object '%s': %s\n", + s->name, errstr(err)); return libbpf_err(err); } *s->obj = obj; err = populate_skeleton_maps(obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate skeleton maps for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton maps for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } err = populate_skeleton_progs(obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate skeleton progs for '%s': %d\n", s->name, err); + pr_warn("failed to populate skeleton progs for '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } @@ -13817,13 +13857,13 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) err = populate_skeleton_maps(s->obj, s->maps, s->map_cnt, s->map_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } err = populate_skeleton_progs(s->obj, s->progs, s->prog_cnt, s->prog_skel_sz); if (err) { - pr_warn("failed to populate subskeleton maps: %d\n", err); + pr_warn("failed to populate subskeleton maps: %s\n", errstr(err)); return libbpf_err(err); } @@ -13834,7 +13874,7 @@ int bpf_object__open_subskeleton(struct bpf_object_subskeleton *s) map_type = btf__type_by_id(btf, map_type_id); if (!btf_is_datasec(map_type)) { - pr_warn("type for map '%1$s' is not a datasec: %2$s", + pr_warn("type for map '%1$s' is not a datasec: %2$s\n", bpf_map__name(map), __btf_kind_str(btf_kind(map_type))); return libbpf_err(-EINVAL); @@ -13870,53 +13910,18 @@ int bpf_object__load_skeleton(struct bpf_object_skeleton *s) err = bpf_object__load(*s->obj); if (err) { - pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + pr_warn("failed to load BPF skeleton '%s': %s\n", s->name, errstr(err)); return libbpf_err(err); } for (i = 0; i < s->map_cnt; i++) { struct bpf_map_skeleton *map_skel = (void *)s->maps + i * s->map_skel_sz; struct bpf_map *map = *map_skel->map; - size_t mmap_sz = bpf_map_mmap_sz(map); - int prot, map_fd = map->fd; - void **mmaped = map_skel->mmaped; - if (!mmaped) + if (!map_skel->mmaped) continue; - if (!(map->def.map_flags & BPF_F_MMAPABLE)) { - *mmaped = NULL; - continue; - } - - if (map->def.type == BPF_MAP_TYPE_ARENA) { - *mmaped = map->mmaped; - continue; - } - - if (map->def.map_flags & BPF_F_RDONLY_PROG) - prot = PROT_READ; - else - prot = PROT_READ | PROT_WRITE; - - /* Remap anonymous mmap()-ed "map initialization image" as - * a BPF map-backed mmap()-ed memory, but preserving the same - * memory address. This will cause kernel to change process' - * page table to point to a different piece of kernel memory, - * but from userspace point of view memory address (and its - * contents, being identical at this point) will stay the - * same. This mapping will be released by bpf_object__close() - * as per normal clean up procedure, so we don't need to worry - * about it from skeleton's clean up perspective. - */ - *mmaped = mmap(map->mmaped, mmap_sz, prot, MAP_SHARED | MAP_FIXED, map_fd, 0); - if (*mmaped == MAP_FAILED) { - err = -errno; - *mmaped = NULL; - pr_warn("failed to re-mmap() map '%s': %d\n", - bpf_map__name(map), err); - return libbpf_err(err); - } + *map_skel->mmaped = map->mmaped; } return 0; @@ -13944,8 +13949,8 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) err = prog->sec_def->prog_attach_fn(prog, prog->sec_def->cookie, link); if (err) { - pr_warn("prog '%s': failed to auto-attach: %d\n", - bpf_program__name(prog), err); + pr_warn("prog '%s': failed to auto-attach: %s\n", + bpf_program__name(prog), errstr(err)); return libbpf_err(err); } @@ -13988,7 +13993,8 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) *link = bpf_map__attach_struct_ops(map); if (!*link) { err = -errno; - pr_warn("map '%s': failed to auto-attach: %d\n", bpf_map__name(map), err); + pr_warn("map '%s': failed to auto-attach: %s\n", + bpf_map__name(map), errstr(err)); return libbpf_err(err); } } diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 91484303849c..b2ce3a72b11d 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -577,10 +577,12 @@ struct bpf_uprobe_multi_opts { size_t cnt; /* create return uprobes */ bool retprobe; + /* create session kprobes */ + bool session; size_t :0; }; -#define bpf_uprobe_multi_opts__last_field retprobe +#define bpf_uprobe_multi_opts__last_field session /** * @brief **bpf_program__attach_uprobe_multi()** attaches a BPF program diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 0096e483f7eb..54b6f312cfa8 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -421,6 +421,8 @@ LIBBPF_1.5.0 { global: btf__distill_base; btf__relocate; + btf_ext__endianness; + btf_ext__set_endianness; bpf_map__autoattach; bpf_map__set_autoattach; bpf_object__token_fd; @@ -428,3 +430,6 @@ LIBBPF_1.5.0 { ring__consume_n; ring_buffer__consume_n; } LIBBPF_1.4.0; + +LIBBPF_1.6.0 { +} LIBBPF_1.5.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 408df59e0771..de498e2dd6b0 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -10,6 +10,7 @@ #define __LIBBPF_LIBBPF_INTERNAL_H #include <stdlib.h> +#include <byteswap.h> #include <limits.h> #include <errno.h> #include <linux/err.h> @@ -448,11 +449,11 @@ struct btf_ext_info { * * The func_info subsection layout: * record size for struct bpf_func_info in the func_info subsection - * struct btf_sec_func_info for section #1 + * struct btf_ext_info_sec for section #1 * a list of bpf_func_info records for section #1 * where struct bpf_func_info mimics one in include/uapi/linux/bpf.h * but may not be identical - * struct btf_sec_func_info for section #2 + * struct btf_ext_info_sec for section #2 * a list of bpf_func_info records for section #2 * ...... * @@ -484,6 +485,8 @@ struct btf_ext { struct btf_ext_header *hdr; void *data; }; + void *data_swapped; + bool swapped_endian; struct btf_ext_info func_info; struct btf_ext_info line_info; struct btf_ext_info core_relo_info; @@ -511,6 +514,32 @@ struct bpf_line_info_min { __u32 line_col; }; +/* Functions to byte-swap info records */ + +typedef void (*info_rec_bswap_fn)(void *); + +static inline void bpf_func_info_bswap(struct bpf_func_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); +} + +static inline void bpf_line_info_bswap(struct bpf_line_info *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->file_name_off = bswap_32(i->file_name_off); + i->line_off = bswap_32(i->line_off); + i->line_col = bswap_32(i->line_col); +} + +static inline void bpf_core_relo_bswap(struct bpf_core_relo *i) +{ + i->insn_off = bswap_32(i->insn_off); + i->type_id = bswap_32(i->type_id); + i->access_str_off = bswap_32(i->access_str_off); + i->kind = bswap_32(i->kind); +} + enum btf_field_iter_kind { BTF_FIELD_ITER_IDS, BTF_FIELD_ITER_STRS, @@ -588,6 +617,16 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +static inline void bpf_insn_bswap(struct bpf_insn *insn) +{ + __u8 tmp_reg = insn->dst_reg; + + insn->dst_reg = insn->src_reg; + insn->src_reg = tmp_reg; + insn->off = bswap_16(insn->off); + insn->imm = bswap_32(insn->imm); +} + /* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. * Original FD is not closed or altered in any other way. * Preserves original FD value, if it's invalid (negative). diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index d6e5eff967cb..28c58fb17250 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 5 +#define LIBBPF_MINOR_VERSION 6 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index e0005c6ade88..cf71d149fe26 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -20,6 +20,7 @@ #include "btf.h" #include "libbpf_internal.h" #include "strset.h" +#include "str_error.h" #define BTF_EXTERN_SEC ".extern" @@ -135,6 +136,7 @@ struct bpf_linker { int fd; Elf *elf; Elf64_Ehdr *elf_hdr; + bool swapped_endian; /* Output sections metadata */ struct dst_sec *secs; @@ -305,7 +307,7 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 0644); if (linker->fd < 0) { err = -errno; - pr_warn("failed to create '%s': %d\n", file, err); + pr_warn("failed to create '%s': %s\n", file, errstr(err)); return err; } @@ -324,13 +326,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) linker->elf_hdr->e_machine = EM_BPF; linker->elf_hdr->e_type = ET_REL; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif + /* Set unknown ELF endianness, assign later from input files */ + linker->elf_hdr->e_ident[EI_DATA] = ELFDATANONE; /* STRTAB */ /* initialize strset with an empty string to conform to ELF */ @@ -396,6 +393,8 @@ static int init_output_elf(struct bpf_linker *linker, const char *file) pr_warn_elf("failed to create SYMTAB data"); return -EINVAL; } + /* Ensure libelf translates byte-order of symbol records */ + sec->data->d_type = ELF_T_SYM; str_off = strset__add_str(linker->strtab_strs, sec->sec_name); if (str_off < 0) @@ -539,19 +538,21 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, const struct bpf_linker_file_opts *opts, struct src_obj *obj) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - const int host_endianness = ELFDATA2LSB; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - const int host_endianness = ELFDATA2MSB; -#else -#error "Unknown __BYTE_ORDER__" -#endif int err = 0; Elf_Scn *scn; Elf_Data *data; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; struct src_sec *sec; + unsigned char obj_byteorder; + unsigned char link_byteorder = linker->elf_hdr->e_ident[EI_DATA]; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2LSB; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + const unsigned char host_byteorder = ELFDATA2MSB; +#else +#error "Unknown __BYTE_ORDER__" +#endif pr_debug("linker: adding object file '%s'...\n", filename); @@ -560,7 +561,7 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->fd = open(filename, O_RDONLY | O_CLOEXEC); if (obj->fd < 0) { err = -errno; - pr_warn("failed to open file '%s': %d\n", filename, err); + pr_warn("failed to open file '%s': %s\n", filename, errstr(err)); return err; } obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL); @@ -577,11 +578,25 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, pr_warn_elf("failed to get ELF header for %s", filename); return err; } - if (ehdr->e_ident[EI_DATA] != host_endianness) { + + /* Linker output endianness set by first input object */ + obj_byteorder = ehdr->e_ident[EI_DATA]; + if (obj_byteorder != ELFDATA2LSB && obj_byteorder != ELFDATA2MSB) { err = -EOPNOTSUPP; - pr_warn_elf("unsupported byte order of ELF file %s", filename); + pr_warn("unknown byte order of ELF file %s\n", filename); return err; } + if (link_byteorder == ELFDATANONE) { + linker->elf_hdr->e_ident[EI_DATA] = obj_byteorder; + linker->swapped_endian = obj_byteorder != host_byteorder; + pr_debug("linker: set %s-endian output byte order\n", + obj_byteorder == ELFDATA2MSB ? "big" : "little"); + } else if (link_byteorder != obj_byteorder) { + err = -EOPNOTSUPP; + pr_warn("byte order mismatch with ELF file %s\n", filename); + return err; + } + if (ehdr->e_type != ET_REL || ehdr->e_machine != EM_BPF || ehdr->e_ident[EI_CLASS] != ELFCLASS64) { @@ -656,7 +671,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf = btf__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf); if (err) { - pr_warn("failed to parse .BTF from %s: %d\n", filename, err); + pr_warn("failed to parse .BTF from %s: %s\n", + filename, errstr(err)); return err; } sec->skipped = true; @@ -666,7 +682,8 @@ static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size); err = libbpf_get_error(obj->btf_ext); if (err) { - pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err); + pr_warn("failed to parse .BTF.ext from '%s': %s\n", + filename, errstr(err)); return err; } sec->skipped = true; @@ -1109,6 +1126,24 @@ static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec return true; } +static bool is_exec_sec(struct dst_sec *sec) +{ + if (!sec || sec->ephemeral) + return false; + return (sec->shdr->sh_type == SHT_PROGBITS) && + (sec->shdr->sh_flags & SHF_EXECINSTR); +} + +static void exec_sec_bswap(void *raw_data, int size) +{ + const int insn_cnt = size / sizeof(struct bpf_insn); + struct bpf_insn *insn = raw_data; + int i; + + for (i = 0; i < insn_cnt; i++, insn++) + bpf_insn_bswap(insn); +} + static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src) { void *tmp; @@ -1168,6 +1203,10 @@ static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz); /* now copy src data at a properly aligned offset */ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size); + + /* convert added bpf insns to native byte-order */ + if (linker->swapped_endian && is_exec_sec(dst)) + exec_sec_bswap(dst->raw_data + dst_align_sz, src->shdr->sh_size); } dst->sec_sz = dst_final_sz; @@ -2415,6 +2454,10 @@ static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj) if (glob_sym && glob_sym->var_idx >= 0) { __s64 sz; + /* FUNCs don't have size, nothing to update */ + if (btf_is_func(t)) + continue; + dst_var = &dst_sec->sec_vars[glob_sym->var_idx]; /* Because underlying BTF type might have * changed, so might its size have changed, so @@ -2628,6 +2671,10 @@ int bpf_linker__finalize(struct bpf_linker *linker) if (!sec->scn) continue; + /* restore sections with bpf insns to target byte-order */ + if (linker->swapped_endian && is_exec_sec(sec)) + exec_sec_bswap(sec->raw_data, sec->sec_sz); + sec->data->d_buf = sec->raw_data; } @@ -2696,6 +2743,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name, static int finalize_btf(struct bpf_linker *linker) { + enum btf_endianness link_endianness; LIBBPF_OPTS(btf_dedup_opts, opts); struct btf *btf = linker->btf; const void *raw_data; @@ -2729,17 +2777,24 @@ static int finalize_btf(struct bpf_linker *linker) err = finalize_btf_ext(linker); if (err) { - pr_warn(".BTF.ext generation failed: %d\n", err); + pr_warn(".BTF.ext generation failed: %s\n", errstr(err)); return err; } opts.btf_ext = linker->btf_ext; err = btf__dedup(linker->btf, &opts); if (err) { - pr_warn("BTF dedup failed: %d\n", err); + pr_warn("BTF dedup failed: %s\n", errstr(err)); return err; } + /* Set .BTF and .BTF.ext output byte order */ + link_endianness = linker->elf_hdr->e_ident[EI_DATA] == ELFDATA2MSB ? + BTF_BIG_ENDIAN : BTF_LITTLE_ENDIAN; + btf__set_endianness(linker->btf, link_endianness); + if (linker->btf_ext) + btf_ext__set_endianness(linker->btf_ext, link_endianness); + /* Emit .BTF section */ raw_data = btf__raw_data(linker->btf, &raw_sz); if (!raw_data) @@ -2747,7 +2802,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF ELF section: %d\n", err); + pr_warn("failed to write out .BTF ELF section: %s\n", errstr(err)); return err; } @@ -2759,7 +2814,7 @@ static int finalize_btf(struct bpf_linker *linker) err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz); if (err) { - pr_warn("failed to write out .BTF.ext ELF section: %d\n", err); + pr_warn("failed to write out .BTF.ext ELF section: %s\n", errstr(err)); return err; } } @@ -2935,7 +2990,7 @@ static int finalize_btf_ext(struct bpf_linker *linker) err = libbpf_get_error(linker->btf_ext); if (err) { linker->btf_ext = NULL; - pr_warn("failed to parse final .BTF.ext data: %d\n", err); + pr_warn("failed to parse final .BTF.ext data: %s\n", errstr(err)); goto out; } diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index 63a4d5ad12d1..7632e9d41827 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1339,7 +1339,7 @@ int bpf_core_calc_relo_insn(const char *prog_name, cands->cands[i].id, cand_spec); if (err < 0) { bpf_core_format_spec(spec_buf, sizeof(spec_buf), cand_spec); - pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n ", + pr_warn("prog '%s': relo #%d: error matching candidate #%d %s: %d\n", prog_name, relo_idx, i, spec_buf, err); return err; } diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index bfd8dac4c0cc..9702b70da444 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -21,6 +21,7 @@ #include "libbpf.h" #include "libbpf_internal.h" #include "bpf.h" +#include "str_error.h" struct ring { ring_buffer_sample_fn sample_cb; @@ -88,8 +89,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("ringbuf: failed to get map info for fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return libbpf_err(err); } @@ -123,8 +124,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->consumer_pos = tmp; @@ -142,8 +143,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, tmp = mmap(NULL, (size_t)mmap_sz, PROT_READ, MAP_SHARED, map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } r->producer_pos = tmp; @@ -156,8 +157,8 @@ int ring_buffer__add(struct ring_buffer *rb, int map_fd, e->data.fd = rb->ring_cnt; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) { err = -errno; - pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n", - map_fd, err); + pr_warn("ringbuf: failed to epoll add map fd=%d: %s\n", + map_fd, errstr(err)); goto err_out; } @@ -205,7 +206,7 @@ ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx, rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } @@ -458,7 +459,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) err = bpf_map_get_info_by_fd(map_fd, &info, &len); if (err) { err = -errno; - pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to get map info for fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -474,8 +476,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } rb->consumer_pos = tmp; @@ -494,8 +496,8 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) map_fd, rb->page_size); if (tmp == MAP_FAILED) { err = -errno; - pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n", - map_fd, err); + pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %s\n", + map_fd, errstr(err)); return err; } @@ -506,7 +508,7 @@ static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd) rb_epoll->events = EPOLLOUT; if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) { err = -errno; - pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err); + pr_warn("user ringbuf: failed to epoll add map fd=%d: %s\n", map_fd, errstr(err)); return err; } @@ -531,7 +533,7 @@ user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts) rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC); if (rb->epoll_fd < 0) { err = -errno; - pr_warn("user ringbuf: failed to create epoll instance: %d\n", err); + pr_warn("user ringbuf: failed to create epoll instance: %s\n", errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 0875452521e9..4d5fa079b5d6 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -351,10 +351,11 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts) attr.test.ctx_size_in = opts->ctx->sz; err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz); if (err < 0 || (int)attr.test.retval < 0) { - opts->errstr = "failed to execute loader prog"; if (err < 0) { + opts->errstr = "failed to execute loader prog"; set_err; } else { + opts->errstr = "error returned by loader prog"; err = (int)attr.test.retval; #ifndef __KERNEL__ errno = -err; diff --git a/tools/lib/bpf/str_error.c b/tools/lib/bpf/str_error.c index 5e6a1e27ddf9..8743049e32b7 100644 --- a/tools/lib/bpf/str_error.c +++ b/tools/lib/bpf/str_error.c @@ -5,6 +5,10 @@ #include <errno.h> #include "str_error.h" +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + /* make sure libbpf doesn't use kernel-only integer typedefs */ #pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64 @@ -31,3 +35,70 @@ char *libbpf_strerror_r(int err, char *dst, int len) } return dst; } + +const char *errstr(int err) +{ + static __thread char buf[12]; + + if (err > 0) + err = -err; + + switch (err) { + case -E2BIG: return "-E2BIG"; + case -EACCES: return "-EACCES"; + case -EADDRINUSE: return "-EADDRINUSE"; + case -EADDRNOTAVAIL: return "-EADDRNOTAVAIL"; + case -EAGAIN: return "-EAGAIN"; + case -EALREADY: return "-EALREADY"; + case -EBADF: return "-EBADF"; + case -EBADFD: return "-EBADFD"; + case -EBUSY: return "-EBUSY"; + case -ECANCELED: return "-ECANCELED"; + case -ECHILD: return "-ECHILD"; + case -EDEADLK: return "-EDEADLK"; + case -EDOM: return "-EDOM"; + case -EEXIST: return "-EEXIST"; + case -EFAULT: return "-EFAULT"; + case -EFBIG: return "-EFBIG"; + case -EILSEQ: return "-EILSEQ"; + case -EINPROGRESS: return "-EINPROGRESS"; + case -EINTR: return "-EINTR"; + case -EINVAL: return "-EINVAL"; + case -EIO: return "-EIO"; + case -EISDIR: return "-EISDIR"; + case -ELOOP: return "-ELOOP"; + case -EMFILE: return "-EMFILE"; + case -EMLINK: return "-EMLINK"; + case -EMSGSIZE: return "-EMSGSIZE"; + case -ENAMETOOLONG: return "-ENAMETOOLONG"; + case -ENFILE: return "-ENFILE"; + case -ENODATA: return "-ENODATA"; + case -ENODEV: return "-ENODEV"; + case -ENOENT: return "-ENOENT"; + case -ENOEXEC: return "-ENOEXEC"; + case -ENOLINK: return "-ENOLINK"; + case -ENOMEM: return "-ENOMEM"; + case -ENOSPC: return "-ENOSPC"; + case -ENOTBLK: return "-ENOTBLK"; + case -ENOTDIR: return "-ENOTDIR"; + case -ENOTSUPP: return "-ENOTSUPP"; + case -ENOTTY: return "-ENOTTY"; + case -ENXIO: return "-ENXIO"; + case -EOPNOTSUPP: return "-EOPNOTSUPP"; + case -EOVERFLOW: return "-EOVERFLOW"; + case -EPERM: return "-EPERM"; + case -EPIPE: return "-EPIPE"; + case -EPROTO: return "-EPROTO"; + case -EPROTONOSUPPORT: return "-EPROTONOSUPPORT"; + case -ERANGE: return "-ERANGE"; + case -EROFS: return "-EROFS"; + case -ESPIPE: return "-ESPIPE"; + case -ESRCH: return "-ESRCH"; + case -ETXTBSY: return "-ETXTBSY"; + case -EUCLEAN: return "-EUCLEAN"; + case -EXDEV: return "-EXDEV"; + default: + snprintf(buf, sizeof(buf), "%d", err); + return buf; + } +} diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index 626d7ffb03d6..66ffebde0684 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -6,4 +6,11 @@ char *libbpf_strerror_r(int err, char *dst, int len); +/** + * @brief **errstr()** returns string corresponding to numeric errno + * @param err negative numeric errno + * @return pointer to string representation of the errno, that is invalidated + * upon the next call. + */ +const char *errstr(int err); #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 93794f01bb67..5f085736c6c4 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -20,6 +20,7 @@ #include "libbpf_common.h" #include "libbpf_internal.h" #include "hashmap.h" +#include "str_error.h" /* libbpf's USDT support consists of BPF-side state/code and user-space * state/code working together in concert. BPF-side parts are defined in @@ -465,8 +466,8 @@ static int parse_vma_segs(int pid, const char *lib_path, struct elf_seg **segs, goto proceed; if (!realpath(lib_path, path)) { - pr_warn("usdt: failed to get absolute path of '%s' (err %d), using path as is...\n", - lib_path, -errno); + pr_warn("usdt: failed to get absolute path of '%s' (err %s), using path as is...\n", + lib_path, errstr(-errno)); libbpf_strlcpy(path, lib_path, sizeof(path)); } @@ -475,8 +476,8 @@ proceed: f = fopen(line, "re"); if (!f) { err = -errno; - pr_warn("usdt: failed to open '%s' to get base addr of '%s': %d\n", - line, lib_path, err); + pr_warn("usdt: failed to open '%s' to get base addr of '%s': %s\n", + line, lib_path, errstr(err)); return err; } @@ -606,7 +607,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * err = parse_elf_segs(elf, path, &segs, &seg_cnt); if (err) { - pr_warn("usdt: failed to process ELF program segments for '%s': %d\n", path, err); + pr_warn("usdt: failed to process ELF program segments for '%s': %s\n", + path, errstr(err)); goto err_out; } @@ -708,8 +710,8 @@ static int collect_usdt_targets(struct usdt_manager *man, Elf *elf, const char * if (vma_seg_cnt == 0) { err = parse_vma_segs(pid, path, &vma_segs, &vma_seg_cnt); if (err) { - pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %d\n", - pid, path, err); + pr_warn("usdt: failed to get memory segments in PID %d for shared library '%s': %s\n", + pid, path, errstr(err)); goto err_out; } } @@ -1047,8 +1049,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct if (is_new && bpf_map_update_elem(spec_map_fd, &spec_id, &target->spec, BPF_ANY)) { err = -errno; - pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %d\n", - spec_id, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to set USDT spec #%d for '%s:%s' in '%s': %s\n", + spec_id, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } if (!man->has_bpf_cookie && @@ -1058,9 +1060,9 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct pr_warn("usdt: IP collision detected for spec #%d for '%s:%s' in '%s'\n", spec_id, usdt_provider, usdt_name, path); } else { - pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %d\n", + pr_warn("usdt: failed to map IP 0x%lx to spec #%d for '%s:%s' in '%s': %s\n", target->abs_ip, spec_id, usdt_provider, usdt_name, - path, err); + path, errstr(err)); } goto err_out; } @@ -1076,8 +1078,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct target->rel_ip, &opts); err = libbpf_get_error(uprobe_link); if (err) { - pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %d\n", - i, usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe #%d for '%s:%s' in '%s': %s\n", + i, usdt_provider, usdt_name, path, errstr(err)); goto err_out; } @@ -1099,8 +1101,8 @@ struct bpf_link *usdt_manager_attach_usdt(struct usdt_manager *man, const struct NULL, &opts_multi); if (!link->multi_link) { err = -errno; - pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %d\n", - usdt_provider, usdt_name, path, err); + pr_warn("usdt: failed to attach uprobe multi for '%s:%s' in '%s': %s\n", + usdt_provider, usdt_name, path, errstr(err)); goto err_out; } diff --git a/tools/lib/bpf/zip.c b/tools/lib/bpf/zip.c index 3f26d629b2b4..88c376a8348d 100644 --- a/tools/lib/bpf/zip.c +++ b/tools/lib/bpf/zip.c @@ -223,7 +223,7 @@ struct zip_archive *zip_archive_open(const char *path) if (!archive) { munmap(data, size); return ERR_PTR(-ENOMEM); - }; + } archive->data = data; archive->size = size; diff --git a/tools/lib/list_sort.c b/tools/lib/list_sort.c index 69affa251fa7..bb99e493dcd1 100644 --- a/tools/lib/list_sort.c +++ b/tools/lib/list_sort.c @@ -1,8 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> #include <linux/compiler.h> #include <linux/export.h> -#include <linux/string.h> #include <linux/list_sort.h> #include <linux/list.h> diff --git a/tools/lib/perf/Documentation/Makefile b/tools/lib/perf/Documentation/Makefile index 972754082a85..573ca5b27556 100644 --- a/tools/lib/perf/Documentation/Makefile +++ b/tools/lib/perf/Documentation/Makefile @@ -121,7 +121,7 @@ install-man: all $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \ $(INSTALL) -m 644 $(MAN_7) $(DESTDIR)$(man7dir); -install-html: +install-html: $(MAN_HTML) $(call QUIET_INSTALL, html) \ $(INSTALL) -d -m 755 $(DESTDIR)$(htmldir); \ $(INSTALL) -m 644 $(MAN_HTML) $(DESTDIR)$(htmldir); \ diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index c07160953224..c475319e2e41 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -5,6 +5,7 @@ #include <perf/evsel.h> #include <perf/cpumap.h> #include <perf/threadmap.h> +#include <linux/hash.h> #include <linux/list.h> #include <internal/evsel.h> #include <linux/zalloc.h> @@ -23,6 +24,7 @@ void perf_evsel__init(struct perf_evsel *evsel, struct perf_event_attr *attr, int idx) { INIT_LIST_HEAD(&evsel->node); + INIT_LIST_HEAD(&evsel->per_stream_periods); evsel->attr = *attr; evsel->idx = idx; evsel->leader = evsel; @@ -531,10 +533,56 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) void perf_evsel__free_id(struct perf_evsel *evsel) { + struct perf_sample_id_period *pos, *n; + xyarray__delete(evsel->sample_id); evsel->sample_id = NULL; zfree(&evsel->id); evsel->ids = 0; + + perf_evsel_for_each_per_thread_period_safe(evsel, n, pos) { + list_del_init(&pos->node); + free(pos); + } +} + +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel) +{ + return (evsel->attr.sample_type & PERF_SAMPLE_READ) && + (evsel->attr.sample_type & PERF_SAMPLE_TID) && + evsel->attr.inherit; +} + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, bool per_thread) +{ + struct hlist_head *head; + struct perf_sample_id_period *res; + int hash; + + if (!per_thread) + return &sid->period; + + hash = hash_32(tid, PERF_SAMPLE_ID__HLIST_BITS); + head = &sid->periods[hash]; + + hlist_for_each_entry(res, head, hnode) + if (res->tid == tid) + return &res->period; + + if (sid->evsel == NULL) + return NULL; + + res = zalloc(sizeof(struct perf_sample_id_period)); + if (res == NULL) + return NULL; + + INIT_LIST_HEAD(&res->node); + res->tid = tid; + + list_add_tail(&res->node, &sid->evsel->per_stream_periods); + hlist_add_head(&res->hnode, &sid->periods[hash]); + + return &res->period; } void perf_counts_values__scale(struct perf_counts_values *count, diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 5cd220a61962..ea78defa77d0 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -11,6 +11,32 @@ struct perf_thread_map; struct xyarray; +/** + * The per-thread accumulated period storage node. + */ +struct perf_sample_id_period { + struct list_head node; + struct hlist_node hnode; + /* Holds total ID period value for PERF_SAMPLE_READ processing. */ + u64 period; + /* The TID that the values belongs to */ + u32 tid; +}; + +/** + * perf_evsel_for_each_per_thread_period_safe - safely iterate thru all the + * per_stream_periods + * @evlist:perf_evsel instance to iterate + * @item: struct perf_sample_id_period iterator + * @tmp: struct perf_sample_id_period temp iterator + */ +#define perf_evsel_for_each_per_thread_period_safe(evsel, tmp, item) \ + list_for_each_entry_safe(item, tmp, &(evsel)->per_stream_periods, node) + + +#define PERF_SAMPLE_ID__HLIST_BITS 4 +#define PERF_SAMPLE_ID__HLIST_SIZE (1 << PERF_SAMPLE_ID__HLIST_BITS) + /* * Per fd, to map back from PERF_SAMPLE_ID to evsel, only used when there are * more than one entry in the evlist. @@ -34,8 +60,32 @@ struct perf_sample_id { pid_t machine_pid; struct perf_cpu vcpu; - /* Holds total ID period value for PERF_SAMPLE_READ processing. */ - u64 period; + /* + * Per-thread, and global event counts are mutually exclusive: + * Whilst it is possible to combine events into a group with differing + * values of PERF_SAMPLE_READ, it is not valid to have inconsistent + * values for `inherit`. Therefore it is not possible to have a + * situation where a per-thread event is sampled as a global event; + * all !inherit groups are global, and all groups where the sampling + * event is inherit + PERF_SAMPLE_READ will be per-thread. Any event + * that is part of such a group that is inherit but not PERF_SAMPLE_READ + * will be read as per-thread. If such an event can also trigger a + * sample (such as with sample_period > 0) then it will not cause + * `read_format` to be included in its PERF_RECORD_SAMPLE, and + * therefore will not expose the per-thread group members as global. + */ + union { + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is not per-thread). + */ + u64 period; + /* + * Holds total ID period value for PERF_SAMPLE_READ processing + * (when period is per-thread). + */ + struct hlist_head periods[PERF_SAMPLE_ID__HLIST_SIZE]; + }; }; struct perf_evsel { @@ -58,6 +108,10 @@ struct perf_evsel { u32 ids; struct perf_evsel *leader; + /* For events where the read_format value is per-thread rather than + * global, stores the per-thread cumulative period */ + struct list_head per_stream_periods; + /* parse modifier helper */ int nr_members; /* @@ -88,4 +142,9 @@ int perf_evsel__apply_filter(struct perf_evsel *evsel, const char *filter); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); void perf_evsel__free_id(struct perf_evsel *evsel); +bool perf_evsel__attr_has_per_thread_sample_period(struct perf_evsel *evsel); + +u64 *perf_sample_id__get_period_storage(struct perf_sample_id *sid, u32 tid, + bool per_thread); + #endif /* __LIBPERF_INTERNAL_EVSEL_H */ diff --git a/tools/lib/subcmd/parse-options.c b/tools/lib/subcmd/parse-options.c index eb896d30545b..555d617c1f50 100644 --- a/tools/lib/subcmd/parse-options.c +++ b/tools/lib/subcmd/parse-options.c @@ -807,7 +807,7 @@ static int option__cmp(const void *va, const void *vb) static struct option *options__order(const struct option *opts) { int nr_opts = 0, nr_group = 0, nr_parent = 0, len; - const struct option *o, *p = opts; + const struct option *o = NULL, *p = opts; struct option *opt, *ordered = NULL, *group; /* flatten the options that have parents */ diff --git a/tools/lib/subcmd/run-command.c b/tools/lib/subcmd/run-command.c index 4e3a557a2f37..0a764c25c384 100644 --- a/tools/lib/subcmd/run-command.c +++ b/tools/lib/subcmd/run-command.c @@ -2,6 +2,7 @@ #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> +#include <ctype.h> #include <fcntl.h> #include <string.h> #include <linux/string.h> @@ -217,8 +218,40 @@ static int wait_or_whine(struct child_process *cmd, bool block) int check_if_command_finished(struct child_process *cmd) { +#ifdef __linux__ + char filename[FILENAME_MAX + 12]; + char status_line[256]; + FILE *status_file; + + /* + * Check by reading /proc/<pid>/status as calling waitpid causes + * stdout/stderr to be closed and data lost. + */ + sprintf(filename, "/proc/%d/status", cmd->pid); + status_file = fopen(filename, "r"); + if (status_file == NULL) { + /* Open failed assume finish_command was called. */ + return true; + } + while (fgets(status_line, sizeof(status_line), status_file) != NULL) { + char *p; + + if (strncmp(status_line, "State:", 6)) + continue; + + fclose(status_file); + p = status_line + 6; + while (isspace(*p)) + p++; + return *p == 'Z' ? 1 : 0; + } + /* Read failed assume finish_command was called. */ + fclose(status_file); + return 1; +#else wait_or_whine(cmd, /*block=*/false); return cmd->finished; +#endif } int finish_command(struct child_process *cmd) diff --git a/tools/lib/subcmd/subcmd-util.h b/tools/lib/subcmd/subcmd-util.h index dfac76e35ac7..c742b08815dc 100644 --- a/tools/lib/subcmd/subcmd-util.h +++ b/tools/lib/subcmd/subcmd-util.h @@ -20,8 +20,8 @@ static __noreturn inline void die(const char *err, ...) va_start(params, err); report(" Fatal: ", err, params); - exit(128); va_end(params); + exit(128); } #define zfree(ptr) ({ free(*ptr); *ptr = NULL; }) diff --git a/tools/lib/thermal/Makefile b/tools/lib/thermal/Makefile index 2d0d255fd0e1..8890fd57b110 100644 --- a/tools/lib/thermal/Makefile +++ b/tools/lib/thermal/Makefile @@ -121,7 +121,9 @@ all: fixdep clean: $(call QUIET_CLEAN, libthermal) $(RM) $(LIBTHERMAL_A) \ - *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) + *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBTHERMAL_VERSION) \ + .*.d .*.cmd LIBTHERMAL-CFLAGS $(LIBTHERMAL_PC) \ + $(srctree)/tools/$(THERMAL_UAPI) $(LIBTHERMAL_PC): $(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \ diff --git a/tools/lib/thermal/commands.c b/tools/lib/thermal/commands.c index 73d4d4e8d6ec..4998cec793ed 100644 --- a/tools/lib/thermal/commands.c +++ b/tools/lib/thermal/commands.c @@ -5,6 +5,7 @@ #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" @@ -33,6 +34,11 @@ static struct nla_policy thermal_genl_policy[THERMAL_GENL_ATTR_MAX + 1] = { [THERMAL_GENL_ATTR_CDEV_CUR_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_MAX_STATE] = { .type = NLA_U32 }, [THERMAL_GENL_ATTR_CDEV_NAME] = { .type = NLA_STRING }, + + /* Thresholds */ + [THERMAL_GENL_ATTR_THRESHOLD] = { .type = NLA_NESTED }, + [THERMAL_GENL_ATTR_THRESHOLD_TEMP] = { .type = NLA_U32 }, + [THERMAL_GENL_ATTR_THRESHOLD_DIRECTION] = { .type = NLA_U32 }, }; static int parse_tz_get(struct genl_info *info, struct thermal_zone **tz) @@ -182,6 +188,48 @@ static int parse_tz_get_gov(struct genl_info *info, struct thermal_zone *tz) return THERMAL_SUCCESS; } +static int parse_threshold_get(struct genl_info *info, struct thermal_zone *tz) +{ + struct nlattr *attr; + struct thermal_threshold *__tt = NULL; + size_t size = 0; + int rem; + + /* + * The size contains the size of the array and we want to + * access the last element, size - 1. + * + * The variable size is initialized to zero but it will be + * then incremented by the first if() statement. The message + * attributes are ordered, so the first if() statement will be + * always called before the second one. If it happens that is + * not the case, then it is a kernel bug. + */ + nla_for_each_nested(attr, info->attrs[THERMAL_GENL_ATTR_THRESHOLD], rem) { + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_TEMP) { + + size++; + + __tt = realloc(__tt, sizeof(*__tt) * (size + 2)); + if (!__tt) + return THERMAL_ERROR; + + __tt[size - 1].temperature = nla_get_u32(attr); + } + + if (nla_type(attr) == THERMAL_GENL_ATTR_THRESHOLD_DIRECTION) + __tt[size - 1].direction = nla_get_u32(attr); + } + + if (__tt) + __tt[size].temperature = INT_MAX; + + tz->thresholds = __tt; + + return THERMAL_SUCCESS; +} + static int handle_netlink(struct nl_cache_ops *unused, struct genl_cmd *cmd, struct genl_info *info, void *arg) @@ -210,6 +258,10 @@ static int handle_netlink(struct nl_cache_ops *unused, ret = parse_tz_get_gov(info, arg); break; + case THERMAL_GENL_CMD_THRESHOLD_GET: + ret = parse_threshold_get(info, arg); + break; + default: return THERMAL_ERROR; } @@ -253,6 +305,34 @@ static struct genl_cmd thermal_cmds[] = { .c_maxattr = THERMAL_GENL_ATTR_MAX, .c_attr_policy = thermal_genl_policy, }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_GET, + .c_name = (char *)"Get thresholds list", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_ADD, + .c_name = (char *)"Add a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_DELETE, + .c_name = (char *)"Delete a threshold", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, + { + .c_id = THERMAL_GENL_CMD_THRESHOLD_FLUSH, + .c_name = (char *)"Flush the thresholds", + .c_msg_parser = handle_netlink, + .c_maxattr = THERMAL_GENL_ATTR_MAX, + .c_attr_policy = thermal_genl_policy, + }, }; static struct genl_ops thermal_cmd_ops = { @@ -261,9 +341,41 @@ static struct genl_ops thermal_cmd_ops = { .o_ncmds = ARRAY_SIZE(thermal_cmds), }; -static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int cmd, - int flags, void *arg) +struct cmd_param { + int tz_id; + int temp; + int direction; +}; + +typedef int (*cmd_cb_t)(struct nl_msg *, struct cmd_param *); + +static int thermal_genl_tz_id_encode(struct nl_msg *msg, struct cmd_param *p) { + if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, p->tz_id)) + return -1; + + return 0; +} + +static int thermal_genl_threshold_encode(struct nl_msg *msg, struct cmd_param *p) +{ + if (thermal_genl_tz_id_encode(msg, p)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_TEMP, p->temp)) + return -1; + + if (nla_put_u32(msg, THERMAL_GENL_ATTR_THRESHOLD_DIRECTION, p->direction)) + return -1; + + return 0; +} + +static thermal_error_t thermal_genl_auto(struct thermal_handler *th, cmd_cb_t cmd_cb, + struct cmd_param *param, + int cmd, int flags, void *arg) +{ + thermal_error_t ret = THERMAL_ERROR; struct nl_msg *msg; void *hdr; @@ -274,45 +386,95 @@ static thermal_error_t thermal_genl_auto(struct thermal_handler *th, int id, int hdr = genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, thermal_cmd_ops.o_id, 0, flags, cmd, THERMAL_GENL_VERSION); if (!hdr) - return THERMAL_ERROR; + goto out; - if (id >= 0 && nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_ID, id)) - return THERMAL_ERROR; + if (cmd_cb && cmd_cb(msg, param)) + goto out; if (nl_send_msg(th->sk_cmd, th->cb_cmd, msg, genl_handle_msg, arg)) - return THERMAL_ERROR; + goto out; + ret = THERMAL_SUCCESS; +out: nlmsg_free(msg); - return THERMAL_SUCCESS; + return ret; } thermal_error_t thermal_cmd_get_tz(struct thermal_handler *th, struct thermal_zone **tz) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_TZ_GET_ID, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_TZ_GET_ID, NLM_F_DUMP | NLM_F_ACK, tz); } thermal_error_t thermal_cmd_get_cdev(struct thermal_handler *th, struct thermal_cdev **tc) { - return thermal_genl_auto(th, -1, THERMAL_GENL_CMD_CDEV_GET, + return thermal_genl_auto(th, NULL, NULL, THERMAL_GENL_CMD_CDEV_GET, NLM_F_DUMP | NLM_F_ACK, tc); } thermal_error_t thermal_cmd_get_trip(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TRIP, - 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TRIP, 0, tz); } thermal_error_t thermal_cmd_get_governor(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_GOV, 0, tz); } thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz) { - return thermal_genl_auto(th, tz->id, THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_TZ_GET_TEMP, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_GET, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_ADD, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction) +{ + struct cmd_param p = { .tz_id = tz->id, .temp = temperature, .direction = direction }; + + return thermal_genl_auto(th, thermal_genl_threshold_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_DELETE, 0, tz); +} + +thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz) +{ + struct cmd_param p = { .tz_id = tz->id }; + + return thermal_genl_auto(th, thermal_genl_tz_id_encode, &p, + THERMAL_GENL_CMD_THRESHOLD_FLUSH, 0, tz); } thermal_error_t thermal_cmd_exit(struct thermal_handler *th) diff --git a/tools/lib/thermal/events.c b/tools/lib/thermal/events.c index a7a55d1a0c4c..bd851c869029 100644 --- a/tools/lib/thermal/events.c +++ b/tools/lib/thermal/events.c @@ -94,6 +94,30 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) case THERMAL_GENL_EVENT_TZ_GOV_CHANGE: return ops->gov_change(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), nla_get_string(attrs[THERMAL_GENL_ATTR_GOV_NAME]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_ADD: + return ops->threshold_add(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DELETE: + return ops->threshold_delete(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_THRESHOLD_DIRECTION]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_FLUSH: + return ops->threshold_flush(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_UP: + return ops->threshold_up(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + + case THERMAL_GENL_EVENT_THRESHOLD_DOWN: + return ops->threshold_down(nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_ID]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_TEMP]), + nla_get_u32(attrs[THERMAL_GENL_ATTR_TZ_PREV_TEMP]), arg); + default: return -1; } @@ -101,19 +125,24 @@ static int handle_thermal_event(struct nl_msg *n, void *arg) static void thermal_events_ops_init(struct thermal_events_ops *ops) { - enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; - enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; - enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; - enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; - enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; - enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; - enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; - enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_CREATE] = !!ops->tz_create; + enabled_ops[THERMAL_GENL_EVENT_TZ_DELETE] = !!ops->tz_delete; + enabled_ops[THERMAL_GENL_EVENT_TZ_DISABLE] = !!ops->tz_disable; + enabled_ops[THERMAL_GENL_EVENT_TZ_ENABLE] = !!ops->tz_enable; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_UP] = !!ops->trip_high; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DOWN] = !!ops->trip_low; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_CHANGE] = !!ops->trip_change; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_ADD] = !!ops->trip_add; + enabled_ops[THERMAL_GENL_EVENT_TZ_TRIP_DELETE] = !!ops->trip_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_ADD] = !!ops->cdev_add; + enabled_ops[THERMAL_GENL_EVENT_CDEV_DELETE] = !!ops->cdev_delete; + enabled_ops[THERMAL_GENL_EVENT_CDEV_STATE_UPDATE] = !!ops->cdev_update; + enabled_ops[THERMAL_GENL_EVENT_TZ_GOV_CHANGE] = !!ops->gov_change; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_ADD] = !!ops->threshold_add; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DELETE] = !!ops->threshold_delete; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_FLUSH] = !!ops->threshold_flush; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_UP] = !!ops->threshold_up; + enabled_ops[THERMAL_GENL_EVENT_THRESHOLD_DOWN] = !!ops->threshold_down; } thermal_error_t thermal_events_handle(struct thermal_handler *th, void *arg) diff --git a/tools/lib/thermal/include/thermal.h b/tools/lib/thermal/include/thermal.h index 1abc560602cf..818ecdfb46e5 100644 --- a/tools/lib/thermal/include/thermal.h +++ b/tools/lib/thermal/include/thermal.h @@ -4,11 +4,20 @@ #define __LIBTHERMAL_H #include <linux/thermal.h> +#include <sys/types.h> #ifndef LIBTHERMAL_API #define LIBTHERMAL_API __attribute__((visibility("default"))) #endif +#ifndef THERMAL_THRESHOLD_WAY_UP +#define THERMAL_THRESHOLD_WAY_UP 0x1 +#endif + +#ifndef THERMAL_THRESHOLD_WAY_DOWN +#define THERMAL_THRESHOLD_WAY_DOWN 0x2 +#endif + #ifdef __cplusplus extern "C" { #endif @@ -31,6 +40,11 @@ struct thermal_events_ops { int (*cdev_delete)(int cdev_id, void *arg); int (*cdev_update)(int cdev_id, int cur_state, void *arg); int (*gov_change)(int tz_id, const char *gov_name, void *arg); + int (*threshold_add)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_delete)(int tz_id, int temperature, int direction, void *arg); + int (*threshold_flush)(int tz_id, void *arg); + int (*threshold_up)(int tz_id, int temp, int prev_temp, void *arg); + int (*threshold_down)(int tz_id, int temp, int prev_temp, void *arg); }; struct thermal_ops { @@ -45,12 +59,18 @@ struct thermal_trip { int hyst; }; +struct thermal_threshold { + int temperature; + int direction; +}; + struct thermal_zone { int id; int temp; char name[THERMAL_NAME_LENGTH]; char governor[THERMAL_NAME_LENGTH]; struct thermal_trip *trip; + struct thermal_threshold *thresholds; }; struct thermal_cdev { @@ -74,12 +94,16 @@ typedef int (*cb_tt_t)(struct thermal_trip *, void *); typedef int (*cb_tc_t)(struct thermal_cdev *, void *); +typedef int (*cb_th_t)(struct thermal_threshold *, void *); + LIBTHERMAL_API int for_each_thermal_zone(struct thermal_zone *tz, cb_tz_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_trip(struct thermal_trip *tt, cb_tt_t cb, void *arg); LIBTHERMAL_API int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg); +LIBTHERMAL_API int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg); + LIBTHERMAL_API struct thermal_zone *thermal_zone_find_by_name(struct thermal_zone *tz, const char *name); @@ -124,6 +148,22 @@ LIBTHERMAL_API thermal_error_t thermal_cmd_get_governor(struct thermal_handler * LIBTHERMAL_API thermal_error_t thermal_cmd_get_temp(struct thermal_handler *th, struct thermal_zone *tz); +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_get(struct thermal_handler *th, + struct thermal_zone *tz); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_add(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_delete(struct thermal_handler *th, + struct thermal_zone *tz, + int temperature, + int direction); + +LIBTHERMAL_API thermal_error_t thermal_cmd_threshold_flush(struct thermal_handler *th, + struct thermal_zone *tz); + /* * Netlink thermal samples */ diff --git a/tools/lib/thermal/libthermal.map b/tools/lib/thermal/libthermal.map index d5e77738c7a4..d657176aa47f 100644 --- a/tools/lib/thermal/libthermal.map +++ b/tools/lib/thermal/libthermal.map @@ -4,6 +4,7 @@ LIBTHERMAL_0.0.1 { for_each_thermal_zone; for_each_thermal_trip; for_each_thermal_cdev; + for_each_thermal_threshold; thermal_zone_find_by_name; thermal_zone_find_by_id; thermal_zone_discover; @@ -17,6 +18,10 @@ LIBTHERMAL_0.0.1 { thermal_cmd_get_trip; thermal_cmd_get_governor; thermal_cmd_get_temp; + thermal_cmd_threshold_get; + thermal_cmd_threshold_add; + thermal_cmd_threshold_delete; + thermal_cmd_threshold_flush; thermal_sampling_init; thermal_sampling_handle; thermal_sampling_fd; diff --git a/tools/lib/thermal/sampling.c b/tools/lib/thermal/sampling.c index 70577423a9f0..f67c1f9ea1d7 100644 --- a/tools/lib/thermal/sampling.c +++ b/tools/lib/thermal/sampling.c @@ -16,6 +16,8 @@ static int handle_thermal_sample(struct nl_msg *n, void *arg) struct thermal_handler_param *thp = arg; struct thermal_handler *th = thp->th; + arg = thp->arg; + genlmsg_parse(nlh, 0, attrs, THERMAL_GENL_ATTR_MAX, NULL); switch (genlhdr->cmd) { diff --git a/tools/lib/thermal/thermal.c b/tools/lib/thermal/thermal.c index 72a76dc205bc..6f02e3539159 100644 --- a/tools/lib/thermal/thermal.c +++ b/tools/lib/thermal/thermal.c @@ -1,10 +1,24 @@ // SPDX-License-Identifier: LGPL-2.1+ // Copyright (C) 2022, Linaro Ltd - Daniel Lezcano <daniel.lezcano@linaro.org> #include <stdio.h> +#include <limits.h> #include <thermal.h> #include "thermal_nl.h" +int for_each_thermal_threshold(struct thermal_threshold *th, cb_th_t cb, void *arg) +{ + int i, ret = 0; + + if (!th) + return 0; + + for (i = 0; th[i].temperature != INT_MAX; i++) + ret |= cb(&th[i], arg); + + return ret; +} + int for_each_thermal_cdev(struct thermal_cdev *cdev, cb_tc_t cb, void *arg) { int i, ret = 0; @@ -80,6 +94,9 @@ static int __thermal_zone_discover(struct thermal_zone *tz, void *th) if (thermal_cmd_get_trip(th, tz) < 0) return -1; + if (thermal_cmd_threshold_get(th, tz)) + return -1; + if (thermal_cmd_get_governor(th, tz)) return -1; diff --git a/tools/mm/page-types.c b/tools/mm/page-types.c index fa050d5a48cd..bcac7ebfb51f 100644 --- a/tools/mm/page-types.c +++ b/tools/mm/page-types.c @@ -22,6 +22,7 @@ #include <time.h> #include <setjmp.h> #include <signal.h> +#include <inttypes.h> #include <sys/types.h> #include <sys/errno.h> #include <sys/fcntl.h> @@ -391,9 +392,9 @@ static void show_page_range(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voff); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup0); + printf("@%" PRIu64 "\t", cgroup0); if (opt_list_mapcnt) - printf("%lu\t", mapcnt0); + printf("%" PRIu64 "\t", mapcnt0); printf("%lx\t%lx\t%s\n", index, count, page_flag_name(flags0)); } @@ -419,9 +420,9 @@ static void show_page(unsigned long voffset, unsigned long offset, if (opt_file) printf("%lx\t", voffset); if (opt_list_cgroup) - printf("@%llu\t", (unsigned long long)cgroup); + printf("@%" PRIu64 "\t", cgroup); if (opt_list_mapcnt) - printf("%lu\t", mapcnt); + printf("%" PRIu64 "\t", mapcnt); printf("%lx\t%s\n", offset, page_flag_name(flags)); } diff --git a/tools/mm/page_owner_sort.c b/tools/mm/page_owner_sort.c index e1f264444342..880e36df0c11 100644 --- a/tools/mm/page_owner_sort.c +++ b/tools/mm/page_owner_sort.c @@ -377,6 +377,7 @@ static char *get_comm(char *buf) if (errno != 0) { if (debug_on) fprintf(stderr, "wrong comm in follow buf:\n%s\n", buf); + free(comm_str); return NULL; } diff --git a/tools/mm/slabinfo.c b/tools/mm/slabinfo.c index cfaeaea71042..1433eff99feb 100644 --- a/tools/mm/slabinfo.c +++ b/tools/mm/slabinfo.c @@ -21,7 +21,7 @@ #include <regex.h> #include <errno.h> -#define MAX_SLABS 500 +#define MAX_SLABS 2000 #define MAX_ALIASES 500 #define MAX_NODES 1024 @@ -1228,6 +1228,8 @@ static void read_slab_dir(void) continue; switch (de->d_type) { case DT_LNK: + if (alias - aliasinfo == MAX_ALIASES) + fatal("Too many aliases\n"); alias->name = strdup(de->d_name); count = readlink(de->d_name, buffer, sizeof(buffer)-1); @@ -1242,6 +1244,8 @@ static void read_slab_dir(void) alias++; break; case DT_DIR: + if (slab - slabinfo == MAX_SLABS) + fatal("Too many slabs\n"); if (chdir(de->d_name)) fatal("Unable to access slab %s\n", slab->name); slab->name = strdup(de->d_name); @@ -1297,7 +1301,9 @@ static void read_slab_dir(void) slab->cpu_partial_free = get_obj("cpu_partial_free"); slab->alloc_node_mismatch = get_obj("alloc_node_mismatch"); slab->deactivate_bypass = get_obj("deactivate_bypass"); - chdir(".."); + if (chdir("..")) + fatal("Unable to chdir from slab ../%s\n", + slab->name); if (slab->name[0] == ':') alias_targets++; slab++; @@ -1310,10 +1316,6 @@ static void read_slab_dir(void) slabs = slab - slabinfo; actual_slabs = slabs; aliases = alias - aliasinfo; - if (slabs > MAX_SLABS) - fatal("Too many slabs\n"); - if (aliases > MAX_ALIASES) - fatal("Too many aliases\n"); } static void output_slabs(void) diff --git a/tools/net/sunrpc/extract.sh b/tools/net/sunrpc/extract.sh new file mode 100755 index 000000000000..f944066f25bc --- /dev/null +++ b/tools/net/sunrpc/extract.sh @@ -0,0 +1,11 @@ +#! /bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Extract an RPC protocol specification from an RFC document. +# The version of this script comes from RFC 8166. +# +# Usage: +# $ extract.sh < rfcNNNN.txt > protocol.x +# + +grep '^ *///' | sed 's?^ */// ??' | sed 's?^ *///$??' diff --git a/tools/net/sunrpc/xdrgen/README b/tools/net/sunrpc/xdrgen/README index 92f7738ad50c..27218a78ab40 100644 --- a/tools/net/sunrpc/xdrgen/README +++ b/tools/net/sunrpc/xdrgen/README @@ -150,6 +150,23 @@ Pragma directives specify exceptions to the normal generation of encoding and decoding functions. Currently one directive is implemented: "public". +Pragma big_endian +------ ---------- + + pragma big_endian <enum> ; + +For variables that might contain only a small number values, it +is more efficient to avoid the byte-swap when encoding or decoding +on little-endian machines. Such is often the case with error status +codes. For example: + + pragma big_endian nfsstat3; + +In this case, when generating an XDR struct or union containing a +field of type "nfsstat3", xdrgen will make the type of that field +"__be32" instead of "enum nfsstat3". XDR unions then switch on the +non-byte-swapped value of that field. + Pragma exclude ------ ------- diff --git a/tools/net/sunrpc/xdrgen/generators/__init__.py b/tools/net/sunrpc/xdrgen/generators/__init__.py index fd2457461274..b98574a36a4a 100644 --- a/tools/net/sunrpc/xdrgen/generators/__init__.py +++ b/tools/net/sunrpc/xdrgen/generators/__init__.py @@ -111,3 +111,7 @@ class SourceGenerator: def emit_encoder(self, node: _XdrAst) -> None: """Emit one encoder function for this XDR type""" raise NotImplementedError("Encoder generation not supported") + + def emit_maxsize(self, node: _XdrAst) -> None: + """Emit one maxsize macro for this XDR type""" + raise NotImplementedError("Maxsize macro generation not supported") diff --git a/tools/net/sunrpc/xdrgen/generators/enum.py b/tools/net/sunrpc/xdrgen/generators/enum.py index 855e43f4ae38..e62f715d3996 100644 --- a/tools/net/sunrpc/xdrgen/generators/enum.py +++ b/tools/net/sunrpc/xdrgen/generators/enum.py @@ -4,7 +4,7 @@ """Generate code to handle XDR enum types""" from generators import SourceGenerator, create_jinja2_environment -from xdr_ast import _XdrEnum, public_apis +from xdr_ast import _XdrEnum, public_apis, big_endian, get_header_name class XdrEnumGenerator(SourceGenerator): @@ -18,7 +18,7 @@ class XdrEnumGenerator(SourceGenerator): def emit_declaration(self, node: _XdrEnum) -> None: """Emit one declaration pair for an XDR enum type""" if node.name in public_apis: - template = self.environment.get_template("declaration/close.j2") + template = self.environment.get_template("declaration/enum.j2") print(template.render(name=node.name)) def emit_definition(self, node: _XdrEnum) -> None: @@ -30,15 +30,35 @@ class XdrEnumGenerator(SourceGenerator): for enumerator in node.enumerators: print(template.render(name=enumerator.name, value=enumerator.value)) - template = self.environment.get_template("definition/close.j2") + if node.name in big_endian: + template = self.environment.get_template("definition/close_be.j2") + else: + template = self.environment.get_template("definition/close.j2") print(template.render(name=node.name)) def emit_decoder(self, node: _XdrEnum) -> None: """Emit one decoder function for an XDR enum type""" - template = self.environment.get_template("decoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("decoder/enum_be.j2") + else: + template = self.environment.get_template("decoder/enum.j2") print(template.render(name=node.name)) def emit_encoder(self, node: _XdrEnum) -> None: """Emit one encoder function for an XDR enum type""" - template = self.environment.get_template("encoder/enum.j2") + if node.name in big_endian: + template = self.environment.get_template("encoder/enum_be.j2") + else: + template = self.environment.get_template("encoder/enum.j2") print(template.render(name=node.name)) + + def emit_maxsize(self, node: _XdrEnum) -> None: + """Emit one maxsize macro for an XDR enum type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = self.environment.get_template("maxsize/enum.j2") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) diff --git a/tools/net/sunrpc/xdrgen/generators/pointer.py b/tools/net/sunrpc/xdrgen/generators/pointer.py index b0b27f1819c8..6dbda60ad2db 100644 --- a/tools/net/sunrpc/xdrgen/generators/pointer.py +++ b/tools/net/sunrpc/xdrgen/generators/pointer.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrPointer, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_pointer_declaration(environment: Environment, node: _XdrPointer) -> None: @@ -46,7 +46,7 @@ def emit_pointer_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_pointer_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_pointer_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_pointer_encoder(environment: Environment, node: _XdrPointer) -> None: print(template.render()) +def emit_pointer_maxsize(environment: Environment, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "pointer") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrPointerGenerator(SourceGenerator): """Generate source code for XDR pointer""" @@ -270,3 +282,7 @@ class XdrPointerGenerator(SourceGenerator): def emit_encoder(self, node: _XdrPointer) -> None: """Emit one encoder function for an XDR pointer type""" emit_pointer_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrPointer) -> None: + """Emit one maxsize macro for an XDR pointer type""" + emit_pointer_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/struct.py b/tools/net/sunrpc/xdrgen/generators/struct.py index b694cd470829..64911de46f62 100644 --- a/tools/net/sunrpc/xdrgen/generators/struct.py +++ b/tools/net/sunrpc/xdrgen/generators/struct.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrStruct, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_struct_declaration(environment: Environment, node: _XdrStruct) -> None: @@ -46,7 +46,7 @@ def emit_struct_member_definition( elif isinstance(field, _XdrVariableLengthOpaque): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "definition", field.template) print(template.render(name=field.name)) elif isinstance(field, _XdrFixedLengthArray): @@ -119,7 +119,7 @@ def emit_struct_member_decoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "decoder", field.template) print( template.render( @@ -198,7 +198,7 @@ def emit_struct_member_encoder( maxsize=field.maxsize, ) ) - elif isinstance(field, _XdrVariableLengthString): + elif isinstance(field, _XdrString): template = get_jinja2_template(environment, "encoder", field.template) print( template.render( @@ -247,6 +247,18 @@ def emit_struct_encoder(environment: Environment, node: _XdrStruct) -> None: print(template.render()) +def emit_struct_maxsize(environment: Environment, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "struct") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrStructGenerator(SourceGenerator): """Generate source code for XDR structs""" @@ -270,3 +282,7 @@ class XdrStructGenerator(SourceGenerator): def emit_encoder(self, node: _XdrStruct) -> None: """Emit one encoder function for an XDR struct type""" emit_struct_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrStruct) -> None: + """Emit one maxsize macro for an XDR struct type""" + emit_struct_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/generators/typedef.py b/tools/net/sunrpc/xdrgen/generators/typedef.py index 85a1b2303333..fab72e9d6915 100644 --- a/tools/net/sunrpc/xdrgen/generators/typedef.py +++ b/tools/net/sunrpc/xdrgen/generators/typedef.py @@ -8,11 +8,11 @@ from jinja2 import Environment from generators import SourceGenerator, kernel_c_type from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrTypedef, _XdrVariableLengthString +from xdr_ast import _XdrBasic, _XdrTypedef, _XdrString from xdr_ast import _XdrFixedLengthOpaque, _XdrVariableLengthOpaque from xdr_ast import _XdrFixedLengthArray, _XdrVariableLengthArray from xdr_ast import _XdrOptionalData, _XdrVoid, _XdrDeclaration -from xdr_ast import public_apis +from xdr_ast import public_apis, get_header_name def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> None: @@ -28,7 +28,7 @@ def emit_typedef_declaration(environment: Environment, node: _XdrDeclaration) -> classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "declaration", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -74,7 +74,7 @@ def emit_type_definition(environment: Environment, node: _XdrDeclaration) -> Non classifier=node.spec.c_classifier, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "definition", node.template) print(template.render(name=node.name)) elif isinstance(node, _XdrFixedLengthOpaque): @@ -119,7 +119,7 @@ def emit_typedef_decoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "decoder", node.template) print( template.render( @@ -180,7 +180,7 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non type=node.spec.type_name, ) ) - elif isinstance(node, _XdrVariableLengthString): + elif isinstance(node, _XdrString): template = get_jinja2_template(environment, "encoder", node.template) print( template.render( @@ -230,6 +230,18 @@ def emit_typedef_encoder(environment: Environment, node: _XdrDeclaration) -> Non raise NotImplementedError("typedef: type not recognized") +def emit_typedef_maxsize(environment: Environment, node: _XdrDeclaration) -> None: + """Emit a maxsize macro for an XDR typedef""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", node.template) + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrTypedefGenerator(SourceGenerator): """Generate source code for XDR typedefs""" @@ -253,3 +265,7 @@ class XdrTypedefGenerator(SourceGenerator): def emit_encoder(self, node: _XdrTypedef) -> None: """Emit one encoder function for an XDR typedef""" emit_typedef_encoder(self.environment, node.declaration) + + def emit_maxsize(self, node: _XdrTypedef) -> None: + """Emit one maxsize macro for an XDR typedef""" + emit_typedef_maxsize(self.environment, node.declaration) diff --git a/tools/net/sunrpc/xdrgen/generators/union.py b/tools/net/sunrpc/xdrgen/generators/union.py index 7974967bbb9f..2cca00e279cd 100644 --- a/tools/net/sunrpc/xdrgen/generators/union.py +++ b/tools/net/sunrpc/xdrgen/generators/union.py @@ -8,8 +8,8 @@ from jinja2 import Environment from generators import SourceGenerator from generators import create_jinja2_environment, get_jinja2_template -from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid -from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis +from xdr_ast import _XdrBasic, _XdrUnion, _XdrVoid, get_header_name +from xdr_ast import _XdrDeclaration, _XdrCaseSpec, public_apis, big_endian def emit_union_declaration(environment: Environment, node: _XdrUnion) -> None: @@ -77,13 +77,18 @@ def emit_union_switch_spec_decoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_decoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_decoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit decoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "decoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "decoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "decoder", "case_spec") for case in node.values: print(template.render(case=case)) @@ -136,7 +141,11 @@ def emit_union_decoder(environment: Environment, node: _XdrUnion) -> None: emit_union_switch_spec_decoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_decoder(environment, case) + emit_union_case_spec_decoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_decoder(environment, node) @@ -153,17 +162,21 @@ def emit_union_switch_spec_encoder( print(template.render(name=node.name, type=node.spec.type_name)) -def emit_union_case_spec_encoder(environment: Environment, node: _XdrCaseSpec) -> None: +def emit_union_case_spec_encoder( + environment: Environment, node: _XdrCaseSpec, big_endian_discriminant: bool +) -> None: """Emit encoder functions for an XDR union's case arm""" if isinstance(node.arm, _XdrVoid): return - template = get_jinja2_template(environment, "encoder", "case_spec") + if big_endian_discriminant: + template = get_jinja2_template(environment, "encoder", "case_spec_be") + else: + template = get_jinja2_template(environment, "encoder", "case_spec") for case in node.values: print(template.render(case=case)) - assert isinstance(node.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", node.arm.template) print( template.render( @@ -192,7 +205,6 @@ def emit_union_default_spec_encoder(environment: Environment, node: _XdrUnion) - print(template.render()) return - assert isinstance(default_case.arm, _XdrBasic) template = get_jinja2_template(environment, "encoder", default_case.arm.template) print( template.render( @@ -210,7 +222,11 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: emit_union_switch_spec_encoder(environment, node.discriminant) for case in node.cases: - emit_union_case_spec_encoder(environment, case) + emit_union_case_spec_encoder( + environment, + case, + node.discriminant.spec.type_name in big_endian, + ) emit_union_default_spec_encoder(environment, node) @@ -218,6 +234,18 @@ def emit_union_encoder(environment, node: _XdrUnion) -> None: print(template.render()) +def emit_union_maxsize(environment: Environment, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union type""" + macro_name = get_header_name().upper() + "_" + node.name + "_sz" + template = get_jinja2_template(environment, "maxsize", "union") + print( + template.render( + macro=macro_name, + width=" + ".join(node.symbolic_width()), + ) + ) + + class XdrUnionGenerator(SourceGenerator): """Generate source code for XDR unions""" @@ -241,3 +269,7 @@ class XdrUnionGenerator(SourceGenerator): def emit_encoder(self, node: _XdrUnion) -> None: """Emit one encoder function for an XDR union""" emit_union_encoder(self.environment, node) + + def emit_maxsize(self, node: _XdrUnion) -> None: + """Emit one maxsize macro for an XDR union""" + emit_union_maxsize(self.environment, node) diff --git a/tools/net/sunrpc/xdrgen/grammars/xdr.lark b/tools/net/sunrpc/xdrgen/grammars/xdr.lark index f3c4552e548d..7c2c1b8c86d1 100644 --- a/tools/net/sunrpc/xdrgen/grammars/xdr.lark +++ b/tools/net/sunrpc/xdrgen/grammars/xdr.lark @@ -3,7 +3,7 @@ declaration : "opaque" identifier "[" value "]" -> fixed_length_opaque | "opaque" identifier "<" [ value ] ">" -> variable_length_opaque - | "string" identifier "<" [ value ] ">" -> variable_length_string + | "string" identifier "<" [ value ] ">" -> string | type_specifier identifier "[" value "]" -> fixed_length_array | type_specifier identifier "<" [ value ] ">" -> variable_length_array | type_specifier "*" identifier -> optional_data @@ -87,12 +87,14 @@ procedure_def : type_specifier identifier "(" type_specifier ")" "=" c pragma_def : "pragma" directive identifier [ identifier ] ";" -directive : exclude_directive +directive : big_endian_directive + | exclude_directive | header_directive | pages_directive | public_directive | skip_directive +big_endian_directive : "big_endian" exclude_directive : "exclude" header_directive : "header" pages_directive : "pages" diff --git a/tools/net/sunrpc/xdrgen/subcmds/definitions.py b/tools/net/sunrpc/xdrgen/subcmds/definitions.py index 5cd13d53221f..c956e27f37c0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/definitions.py +++ b/tools/net/sunrpc/xdrgen/subcmds/definitions.py @@ -28,9 +28,7 @@ from xdr_parse import xdr_parser, set_xdr_annotate logger.setLevel(logging.INFO) -def emit_header_definitions( - root: Specification, language: str, peer: str -) -> None: +def emit_header_definitions(root: Specification, language: str, peer: str) -> None: """Emit header definitions""" for definition in root.definitions: if isinstance(definition.value, _XdrConstant): @@ -52,6 +50,25 @@ def emit_header_definitions( gen.emit_definition(definition.value) +def emit_header_maxsize(root: Specification, language: str, peer: str) -> None: + """Emit header maxsize macros""" + print("") + for definition in root.definitions: + if isinstance(definition.value, _XdrEnum): + gen = XdrEnumGenerator(language, peer) + elif isinstance(definition.value, _XdrPointer): + gen = XdrPointerGenerator(language, peer) + elif isinstance(definition.value, _XdrTypedef): + gen = XdrTypedefGenerator(language, peer) + elif isinstance(definition.value, _XdrStruct): + gen = XdrStructGenerator(language, peer) + elif isinstance(definition.value, _XdrUnion): + gen = XdrUnionGenerator(language, peer) + else: + continue + gen.emit_maxsize(definition.value) + + def handle_parse_error(e: UnexpectedInput) -> bool: """Simple parse error reporting, no recovery attempted""" print(e) @@ -71,6 +88,7 @@ def subcmd(args: Namespace) -> int: gen.emit_definition(args.filename, ast) emit_header_definitions(ast, args.language, args.peer) + emit_header_maxsize(ast, args.language, args.peer) gen = XdrHeaderBottomGenerator(args.language, args.peer) gen.emit_definition(args.filename, ast) diff --git a/tools/net/sunrpc/xdrgen/subcmds/source.py b/tools/net/sunrpc/xdrgen/subcmds/source.py index 00c04ad15b89..2024954748f0 100644 --- a/tools/net/sunrpc/xdrgen/subcmds/source.py +++ b/tools/net/sunrpc/xdrgen/subcmds/source.py @@ -83,8 +83,7 @@ def generate_client_source(filename: str, root: Specification, language: str) -> gen = XdrSourceTopGenerator(language, "client") gen.emit_source(filename, root) - # cel: todo: client needs XDR size macros - + print("") for definition in root.definitions: emit_source_encoder(definition.value, language, "client") for definition in root.definitions: diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 deleted file mode 100644 index ab1e576c9531..000000000000 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/close.j2 +++ /dev/null @@ -1,4 +0,0 @@ -{# SPDX-License-Identifier: GPL-2.0 #} - -bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr); -bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 new file mode 100644 index 000000000000..d1405c7c5354 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/declaration/enum.j2 @@ -0,0 +1,4 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +bool xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr); +bool xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value); diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 index 341d829afeda..6482984f1cb7 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_decode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} *ptr) +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) { u32 val; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 new file mode 100644 index 000000000000..44c391c10b42 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/decoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_decode_{{ name }}(struct xdr_stream *xdr, {{ name }} *ptr) +{ + return xdr_stream_decode_be32(xdr, ptr) == 0; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 index 9e62344a976a..a07586cbee17 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close.j2 @@ -1,2 +1,3 @@ {# SPDX-License-Identifier: GPL-2.0 #} }; +typedef enum {{ name }} {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 new file mode 100644 index 000000000000..2c18948bddf7 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/definition/close_be.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +}; +typedef __be32 {{ name }}; diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 index bd0a770e50f2..67245b9a914d 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum.j2 @@ -8,7 +8,7 @@ bool {% else %} static bool __maybe_unused {% endif %} -xdrgen_encode_{{ name }}(struct xdr_stream *xdr, enum {{ name }} value) +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) { return xdr_stream_encode_u32(xdr, value) == XDR_UNIT; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 new file mode 100644 index 000000000000..fbbcc45948d6 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/encoder/enum_be.j2 @@ -0,0 +1,14 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + +{% if annotate %} +/* enum {{ name }} (big-endian) */ +{% endif %} +{% if name in public_apis %} +bool +{% else %} +static bool __maybe_unused +{% endif %} +xdrgen_encode_{{ name }}(struct xdr_stream *xdr, {{ name }} value) +{ + return xdr_stream_encode_be32(xdr, value) == XDR_UNIT; +} diff --git a/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/enum/maxsize/enum.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/pointer/maxsize/pointer.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 index d304eccb5c40..aa9940e322db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/program/decoder/result.j2 @@ -13,10 +13,6 @@ static int {{ program }}_xdr_dec_{{ result }}(struct rpc_rqst *req, if (!xdrgen_decode_{{ result }}(xdr, result)) return -EIO; - if (result->stat != nfs_ok) { - trace_nfs_xdr_status(xdr, (int)result->stat); - return {{ program }}_stat_to_errno(result->stat); - } {% endif %} return 0; } diff --git a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 index e3a802cbc4d7..c5518c519854 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/source_top/client.j2 @@ -3,6 +3,11 @@ // XDR specification file: {{ filename }} // XDR specification modification time: {{ mtime }} -#include <linux/sunrpc/xprt.h> +#include <linux/types.h> -#include "{{ program }}xdr_gen.h" +#include <linux/sunrpc/xdr.h> +#include <linux/sunrpc/xdrgen/_defs.h> +#include <linux/sunrpc/xdrgen/_builtins.h> +#include <linux/sunrpc/xdrgen/nlm4.h> + +#include <linux/sunrpc/clnt.h> diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 index 12d20b143b43..12d20b143b43 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 index 2de2feec77db..2de2feec77db 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 index cf65b71eaef3..cf65b71eaef3 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/struct/maxsize/struct.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 index 3fe3ddd9f359..3fe3ddd9f359 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/declaration/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 index 56c5a17d6a70..56c5a17d6a70 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 index c03c2df8e625..c03c2df8e625 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/definition/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 index 3d490ff180d0..3d490ff180d0 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/encoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/basic.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/fixed_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/string.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_array.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 new file mode 100644 index 000000000000..45c1d4c21b22 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/typedef/maxsize/variable_length_opaque.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 index 83b6e5a14e7f..83b6e5a14e7f 100644 --- a/tools/net/sunrpc/xdrgen/templates/C/union/decoder/variable_length_string.j2 +++ b/tools/net/sunrpc/xdrgen/templates/C/union/decoder/string.j2 diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 new file mode 100644 index 000000000000..917f3a1c4588 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/encoder/case_spec_be.j2 @@ -0,0 +1,2 @@ +{# SPDX-License-Identifier: GPL-2.0 #} + case __constant_cpu_to_be32({{ case }}): diff --git a/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 new file mode 100644 index 000000000000..9f3bfb47d2f4 --- /dev/null +++ b/tools/net/sunrpc/xdrgen/templates/C/union/maxsize/union.j2 @@ -0,0 +1,3 @@ +{# SPDX-License-Identifier: GPL-2.0 #} +#define {{ '{:<31}'.format(macro) }} \ + ({{ width }}) diff --git a/tools/net/sunrpc/xdrgen/xdr_ast.py b/tools/net/sunrpc/xdrgen/xdr_ast.py index dbd3fcf9c957..5233e73c7046 100644 --- a/tools/net/sunrpc/xdrgen/xdr_ast.py +++ b/tools/net/sunrpc/xdrgen/xdr_ast.py @@ -12,13 +12,50 @@ from lark.tree import Meta this_module = sys.modules[__name__] +big_endian = [] excluded_apis = [] header_name = "none" public_apis = [] -enums = set() structs = set() pass_by_reference = set() +constants = {} + + +def xdr_quadlen(val: str) -> int: + """Return integer XDR width of an XDR type""" + if val in constants: + octets = constants[val] + else: + octets = int(val) + return int((octets + 3) / 4) + + +symbolic_widths = { + "void": ["XDR_void"], + "bool": ["XDR_bool"], + "int": ["XDR_int"], + "unsigned_int": ["XDR_unsigned_int"], + "long": ["XDR_long"], + "unsigned_long": ["XDR_unsigned_long"], + "hyper": ["XDR_hyper"], + "unsigned_hyper": ["XDR_unsigned_hyper"], +} + +# Numeric XDR widths are tracked in a dictionary that is keyed +# by type_name because sometimes a caller has nothing more than +# the type_name to use to figure out the numeric width. +max_widths = { + "void": 0, + "bool": 1, + "int": 1, + "unsigned_int": 1, + "long": 1, + "unsigned_long": 1, + "hyper": 2, + "unsigned_hyper": 2, +} + @dataclass class _XdrAst(ast_utils.Ast): @@ -51,18 +88,31 @@ class _XdrTypeSpecifier(_XdrAst): """Corresponds to 'type_specifier' in the XDR language grammar""" type_name: str - c_classifier: str + c_classifier: str = "" @dataclass class _XdrDefinedType(_XdrTypeSpecifier): """Corresponds to a type defined by the input specification""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [get_header_name().upper() + "_" + self.type_name + "_sz"] + + def __post_init__(self): + if self.type_name in structs: + self.c_classifier = "struct " + symbolic_widths[self.type_name] = self.symbolic_width() + @dataclass class _XdrBuiltInType(_XdrTypeSpecifier): """Corresponds to a built-in XDR type""" + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.type_name] + @dataclass class _XdrDeclaration(_XdrAst): @@ -77,6 +127,18 @@ class _XdrFixedLengthOpaque(_XdrDeclaration): size: str template: str = "fixed_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_QUADLEN(" + self.size + ")"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthOpaque(_XdrDeclaration): @@ -86,14 +148,44 @@ class _XdrVariableLengthOpaque(_XdrDeclaration): maxsize: str template: str = "variable_length_opaque" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass -class _XdrVariableLengthString(_XdrDeclaration): +class _XdrString(_XdrDeclaration): """A (NUL-terminated) variable-length string declaration""" name: str maxsize: str - template: str = "variable_length_string" + template: str = "string" + + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + xdr_quadlen(self.maxsize) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + widths.append("XDR_QUADLEN(" + self.maxsize + ")") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() @dataclass @@ -105,6 +197,19 @@ class _XdrFixedLengthArray(_XdrDeclaration): size: str template: str = "fixed_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return xdr_quadlen(self.size) * max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + return ["(" + self.size + " * (" + item_width + "))"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVariableLengthArray(_XdrDeclaration): @@ -115,6 +220,22 @@ class _XdrVariableLengthArray(_XdrDeclaration): maxsize: str template: str = "variable_length_array" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + (xdr_quadlen(self.maxsize) * max_widths[self.spec.type_name]) + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = ["XDR_unsigned_int"] + if self.maxsize != "0": + item_width = " + ".join(symbolic_widths[self.spec.type_name]) + widths.append("(" + self.maxsize + " * (" + item_width + "))") + return widths + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrOptionalData(_XdrDeclaration): @@ -124,6 +245,20 @@ class _XdrOptionalData(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "optional_data" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_bool"] + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrBasic(_XdrDeclaration): @@ -133,13 +268,34 @@ class _XdrBasic(_XdrDeclaration): spec: _XdrTypeSpecifier template: str = "basic" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return max_widths[self.spec.type_name] + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return symbolic_widths[self.spec.type_name] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrVoid(_XdrDeclaration): """A void declaration""" + name: str = "void" template: str = "void" + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 0 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return [] + @dataclass class _XdrConstant(_XdrAst): @@ -148,6 +304,10 @@ class _XdrConstant(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnumerator(_XdrAst): @@ -156,6 +316,10 @@ class _XdrEnumerator(_XdrAst): name: str value: str + def __post_init__(self): + if self.value not in constants: + constants[self.name] = int(self.value, 0) + @dataclass class _XdrEnum(_XdrAst): @@ -166,6 +330,18 @@ class _XdrEnum(_XdrAst): maximum: int enumerators: List[_XdrEnumerator] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return 1 + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return ["XDR_int"] + + def __post_init__(self): + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrStruct(_XdrAst): @@ -174,6 +350,26 @@ class _XdrStruct(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 0 + for field in self.fields: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + for field in self.fields: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrPointer(_XdrAst): @@ -182,6 +378,27 @@ class _XdrPointer(_XdrAst): name: str fields: List[_XdrDeclaration] + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + width = 1 + for field in self.fields[0:-1]: + width += field.max_width() + return width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + widths = [] + widths += ["XDR_bool"] + for field in self.fields[0:-1]: + widths += field.symbolic_width() + return widths + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _XdrTypedef(_XdrAst): @@ -189,6 +406,23 @@ class _XdrTypedef(_XdrAst): declaration: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + return self.declaration.max_width() + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + return self.declaration.symbolic_width() + + def __post_init__(self): + if isinstance(self.declaration, _XdrBasic): + new_type = self.declaration + if isinstance(new_type.spec, _XdrDefinedType): + if new_type.spec.type_name in pass_by_reference: + pass_by_reference.add(new_type.name) + max_widths[new_type.name] = self.max_width() + symbolic_widths[new_type.name] = self.symbolic_width() + @dataclass class _XdrCaseSpec(_XdrAst): @@ -216,6 +450,36 @@ class _XdrUnion(_XdrAst): cases: List[_XdrCaseSpec] default: _XdrDeclaration + def max_width(self) -> int: + """Return width of type in XDR_UNITS""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + return 1 + max_width + + def symbolic_width(self) -> List: + """Return list containing XDR width of type's components""" + max_width = 0 + for case in self.cases: + if case.arm.max_width() > max_width: + max_width = case.arm.max_width() + width = case.arm.symbolic_width() + if self.default: + if self.default.arm.max_width() > max_width: + max_width = self.default.arm.max_width() + width = self.default.arm.symbolic_width() + return symbolic_widths[self.discriminant.name] + width + + def __post_init__(self): + structs.add(self.name) + pass_by_reference.add(self.name) + max_widths[self.name] = self.max_width() + symbolic_widths[self.name] = self.symbolic_width() + @dataclass class _RpcProcedure(_XdrAst): @@ -290,24 +554,13 @@ class ParseToAst(Transformer): return _XdrConstantValue(value) def type_specifier(self, children): - """Instantiate one type_specifier object""" - c_classifier = "" + """Instantiate one _XdrTypeSpecifier object""" if isinstance(children[0], _XdrIdentifier): name = children[0].symbol - if name in enums: - c_classifier = "enum " - if name in structs: - c_classifier = "struct " - return _XdrDefinedType( - type_name=name, - c_classifier=c_classifier, - ) - - token = children[0].data - return _XdrBuiltInType( - type_name=token.value, - c_classifier=c_classifier, - ) + return _XdrDefinedType(type_name=name) + + name = children[0].data.value + return _XdrBuiltInType(type_name=name) def constant_def(self, children): """Instantiate one _XdrConstant object""" @@ -320,7 +573,6 @@ class ParseToAst(Transformer): def enum(self, children): """Instantiate one _XdrEnum object""" enum_name = children[0].symbol - enums.add(enum_name) i = 0 enumerators = [] @@ -350,15 +602,15 @@ class ParseToAst(Transformer): return _XdrVariableLengthOpaque(name, maxsize) - def variable_length_string(self, children): - """Instantiate one _XdrVariableLengthString declaration object""" + def string(self, children): + """Instantiate one _XdrString declaration object""" name = children[0].symbol if children[1] is not None: maxsize = children[1].value else: maxsize = "0" - return _XdrVariableLengthString(name, maxsize) + return _XdrString(name, maxsize) def fixed_length_array(self, children): """Instantiate one _XdrFixedLengthArray declaration object""" @@ -383,8 +635,6 @@ class ParseToAst(Transformer): """Instantiate one _XdrOptionalData declaration object""" spec = children[0] name = children[1].symbol - structs.add(name) - pass_by_reference.add(name) return _XdrOptionalData(name, spec) @@ -403,8 +653,6 @@ class ParseToAst(Transformer): def struct(self, children): """Instantiate one _XdrStruct object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) fields = children[1].children last_field = fields[-1] @@ -419,11 +667,6 @@ class ParseToAst(Transformer): def typedef(self, children): """Instantiate one _XdrTypedef object""" new_type = children[0] - if isinstance(new_type, _XdrBasic) and isinstance( - new_type.spec, _XdrDefinedType - ): - if new_type.spec.type_name in pass_by_reference: - pass_by_reference.add(new_type.name) return _XdrTypedef(new_type) @@ -445,8 +688,6 @@ class ParseToAst(Transformer): def union(self, children): """Instantiate one _XdrUnion object""" name = children[0].symbol - structs.add(name) - pass_by_reference.add(name) body = children[1] discriminant = body.children[0].children[0] @@ -484,6 +725,8 @@ class ParseToAst(Transformer): """Instantiate one _Pragma object""" directive = children[0].children[0].data match directive: + case "big_endian_directive": + big_endian.append(children[1].symbol) case "exclude_directive": excluded_apis.append(children[1].symbol) case "header_directive": diff --git a/tools/net/sunrpc/xdrgen/xdrgen b/tools/net/sunrpc/xdrgen/xdrgen index 95f303b2861b..43762be39252 100755 --- a/tools/net/sunrpc/xdrgen/xdrgen +++ b/tools/net/sunrpc/xdrgen/xdrgen @@ -128,5 +128,7 @@ There is NO WARRANTY, to the extent permitted by law.""", try: if __name__ == "__main__": sys.exit(main()) -except (SystemExit, KeyboardInterrupt, BrokenPipeError): +except SystemExit: + sys.exit(0) +except (KeyboardInterrupt, BrokenPipeError): sys.exit(1) diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index b8481f401376..41d9fa5c818d 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -3,9 +3,11 @@ import argparse import json +import pathlib import pprint -import time +import sys +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily, Netlink, NlError @@ -43,7 +45,10 @@ def main(): group.add_argument('--list-ops', action='store_true') group.add_argument('--list-msgs', action='store_true') - parser.add_argument('--sleep', dest='sleep', type=int) + parser.add_argument('--duration', dest='duration', type=int, + help='when subscribed, watch for DURATION seconds') + parser.add_argument('--sleep', dest='duration', type=int, + help='alias for duration') parser.add_argument('--subscribe', dest='ntf', type=str) parser.add_argument('--replace', dest='flags', action='append_const', const=Netlink.NLM_F_REPLACE) @@ -80,9 +85,6 @@ def main(): if args.ntf: ynl.ntf_subscribe(args.ntf) - if args.sleep: - time.sleep(args.sleep) - if args.list_ops: for op_name, op in ynl.ops.items(): print(op_name, " [", ", ".join(op.modes), "]") @@ -106,8 +108,11 @@ def main(): exit(1) if args.ntf: - ynl.check_ntf() - output(ynl.async_msg_queue) + try: + for msg in ynl.poll_ntf(duration=args.duration): + output(msg) + except KeyboardInterrupt: + pass if __name__ == "__main__": diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/ethtool.py index 63c471f075ab..ebb0a11f67bf 100755 --- a/tools/net/ynl/ethtool.py +++ b/tools/net/ynl/ethtool.py @@ -3,11 +3,13 @@ import argparse import json +import pathlib import pprint import sys import re import os +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import YnlFamily def args_to_req(ynl, op_name, args, req): diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 713f5fb9cc2d..7db5240de58a 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ -I../lib/ -idirafter $(UAPI_PATH) ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile index 2887cc5de530..94c49cca3dca 100644 --- a/tools/net/ynl/lib/Makefile +++ b/tools/net/ynl/lib/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan endif diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index b6d6f8aef423..a745739655ad 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -131,6 +131,9 @@ class SpecEnumSet(SpecElement): def has_doc(self): if 'doc' in self.yaml: return True + return self.has_entry_doc() + + def has_entry_doc(self): for entry in self.entries.values(): if entry.has_doc(): return True diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index c22c22bf2cb7..01ec01a90e76 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -12,6 +12,9 @@ import sys import yaml import ipaddress import uuid +import queue +import selectors +import time from .nlspec import SpecFamily @@ -489,7 +492,7 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_GET_STRICT_CHK, 1) self.async_msg_ids = set() - self.async_msg_queue = [] + self.async_msg_queue = queue.Queue() for msg in self.msgs.values(): if msg.is_async: @@ -903,7 +906,7 @@ class YnlFamily(SpecFamily): msg['name'] = op['name'] msg['msg'] = attrs - self.async_msg_queue.append(msg) + self.async_msg_queue.put(msg) def check_ntf(self): while True: @@ -925,11 +928,30 @@ class YnlFamily(SpecFamily): decoded = self.nlproto.decode(self, nl_msg, None) if decoded.cmd() not in self.async_msg_ids: - print("Unexpected msg id done while checking for ntf", decoded) + print("Unexpected msg id while checking for ntf", decoded) continue self.handle_ntf(decoded) + def poll_ntf(self, duration=None): + start_time = time.time() + selector = selectors.DefaultSelector() + selector.register(self.sock, selectors.EVENT_READ) + + while True: + try: + yield self.async_msg_queue.get_nowait() + except queue.Empty: + if duration is not None: + timeout = start_time + duration - time.time() + if timeout <= 0: + return + else: + timeout = None + events = selector.select(timeout) + if events: + self.check_ntf() + def operation_do_attributes(self, name): """ For a given operation name, find and return a supported diff --git a/tools/net/ynl/samples/Makefile b/tools/net/ynl/samples/Makefile index e194a7565861..c9494a564da4 100644 --- a/tools/net/ynl/samples/Makefile +++ b/tools/net/ynl/samples/Makefile @@ -3,7 +3,7 @@ include ../Makefile.deps CC=gcc -CFLAGS=-std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ +CFLAGS += -std=gnu11 -O2 -W -Wall -Wextra -Wno-unused-parameter -Wshadow \ -I../lib/ -I../generated/ -idirafter $(UAPI_PATH) ifeq ("$(DEBUG)","1") CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan diff --git a/tools/net/ynl/samples/page-pool.c b/tools/net/ynl/samples/page-pool.c index 332f281ee5cb..e5d521320fbf 100644 --- a/tools/net/ynl/samples/page-pool.c +++ b/tools/net/ynl/samples/page-pool.c @@ -118,7 +118,7 @@ int main(int argc, char **argv) name = if_indextoname(s->ifc, ifname); if (name) printf("%8s", name); - printf("[%d]\t", s->ifc); + printf("[%u]\t", s->ifc); } printf("page pools: %u (zombies: %u)\n", diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/ynl-gen-c.py index 717530bc9c52..d8201c4b1520 100755 --- a/tools/net/ynl/ynl-gen-c.py +++ b/tools/net/ynl/ynl-gen-c.py @@ -4,12 +4,15 @@ import argparse import collections import filecmp +import pathlib import os import re import shutil +import sys import tempfile import yaml +sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix()) from lib import SpecFamily, SpecAttrSet, SpecAttr, SpecOperation, SpecEnumSet, SpecEnumEntry @@ -80,11 +83,21 @@ class Type(SpecAttr): value = self.checks.get(limit, default) if value is None: return value - elif value in self.family.consts: + if isinstance(value, int): + return value + if value in self.family.consts: + raise Exception("Resolving family constants not implemented, yet") + return limit_to_number(value) + + def get_limit_str(self, limit, default=None, suffix=''): + value = self.checks.get(limit, default) + if value is None: + return '' + if isinstance(value, int): + return str(value) + suffix + if value in self.family.consts: return c_upper(f"{self.family['name']}-{value}") - if not isinstance(value, int): - value = limit_to_number(value) - return value + return c_upper(value) def resolve(self): if 'name-prefix' in self.attr: @@ -157,7 +170,10 @@ class Type(SpecAttr): return '{ .type = ' + policy + ', }' def attr_policy(self, cw): - policy = c_upper('nla-' + self.attr['type']) + policy = f'NLA_{c_upper(self.type)}' + if self.attr.get('byte-order') == 'big-endian': + if self.type in {'u16', 'u32'}: + policy = f'NLA_BE{self.type[1:]}' spec = self._attr_policy(policy) cw.p(f"\t[{self.enum_name}] = {spec},") @@ -358,11 +374,11 @@ class TypeScalar(Type): elif 'full-range' in self.checks: return f"NLA_POLICY_FULL_RANGE({policy}, &{c_lower(self.enum_name)}_range)" elif 'range' in self.checks: - return f"NLA_POLICY_RANGE({policy}, {self.get_limit('min')}, {self.get_limit('max')})" + return f"NLA_POLICY_RANGE({policy}, {self.get_limit_str('min')}, {self.get_limit_str('max')})" elif 'min' in self.checks: - return f"NLA_POLICY_MIN({policy}, {self.get_limit('min')})" + return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})" elif 'max' in self.checks: - return f"NLA_POLICY_MAX({policy}, {self.get_limit('max')})" + return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})" return super()._attr_policy(policy) def _attr_typol(self): @@ -413,11 +429,11 @@ class TypeString(Type): def _attr_policy(self, policy): if 'exact-len' in self.checks: - mem = 'NLA_POLICY_EXACT_LEN(' + str(self.get_limit('exact-len')) + ')' + mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')' else: mem = '{ .type = ' + policy if 'max-len' in self.checks: - mem += ', .len = ' + str(self.get_limit('max-len')) + mem += ', .len = ' + self.get_limit_str('max-len') mem += ', }' return mem @@ -464,17 +480,24 @@ class TypeBinary(Type): return f'.type = YNL_PT_BINARY,' def _attr_policy(self, policy): - if 'exact-len' in self.checks: - mem = 'NLA_POLICY_EXACT_LEN(' + str(self.get_limit('exact-len')) + ')' + if len(self.checks) == 0: + pass + elif len(self.checks) == 1: + check_name = list(self.checks)[0] + if check_name not in {'exact-len', 'min-len', 'max-len'}: + raise Exception('Unsupported check for binary type: ' + check_name) else: - mem = '{ ' - if len(self.checks) == 1 and 'min-len' in self.checks: - mem += '.len = ' + str(self.get_limit('min-len')) - elif len(self.checks) == 0: - mem += '.type = NLA_BINARY' - else: - raise Exception('One or more of binary type checks not implemented, yet') - mem += ', }' + raise Exception('More than one check for binary type not implemented, yet') + + if len(self.checks) == 0: + mem = '{ .type = NLA_BINARY, }' + elif 'exact-len' in self.checks: + mem = 'NLA_POLICY_EXACT_LEN(' + self.get_limit_str('exact-len') + ')' + elif 'min-len' in self.checks: + mem = '{ .len = ' + self.get_limit_str('min-len') + ', }' + elif 'max-len' in self.checks: + mem = 'NLA_POLICY_MAX_LEN(' + self.get_limit_str('max-len') + ')' + return mem def attr_put(self, ri, var): @@ -2161,9 +2184,9 @@ def print_kernel_policy_ranges(family, cw): cw.block_start(line=f'static const struct netlink_range_validation{sign} {c_lower(attr.enum_name)}_range =') members = [] if 'min' in attr.checks: - members.append(('min', str(attr.get_limit('min')) + suffix)) + members.append(('min', attr.get_limit_str('min', suffix=suffix))) if 'max' in attr.checks: - members.append(('max', str(attr.get_limit('max')) + suffix)) + members.append(('max', attr.get_limit_str('max', suffix=suffix))) cw.write_struct_init(members) cw.block_end(line=';') cw.nl() @@ -2396,6 +2419,7 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'): def render_uapi(family, cw): hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H" + hdr_prot = hdr_prot.replace('/', '_') cw.p('#ifndef ' + hdr_prot) cw.p('#define ' + hdr_prot) cw.nl() @@ -2417,11 +2441,15 @@ def render_uapi(family, cw): enum = family.consts[const['name']] if enum.has_doc(): - cw.p('/**') - doc = '' - if 'doc' in enum: - doc = ' - ' + enum['doc'] - cw.write_doc_line(enum.enum_name + doc) + if enum.has_entry_doc(): + cw.p('/**') + doc = '' + if 'doc' in enum: + doc = ' - ' + enum['doc'] + cw.write_doc_line(enum.enum_name + doc) + else: + cw.p('/*') + cw.write_doc_line(enum['doc'], indent=False) for entry in enum.entries.values(): if entry.has_doc(): doc = '@' + entry.c_name + ': ' + entry['doc'] diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile index bf7f7f84ac62..f56e27727534 100644 --- a/tools/objtool/Makefile +++ b/tools/objtool/Makefile @@ -24,6 +24,7 @@ LIBELF_LIBS := $(shell $(HOSTPKG_CONFIG) libelf --libs 2>/dev/null || echo -lel all: $(OBJTOOL) INCLUDES := -I$(srctree)/tools/include \ + -I$(srctree)/tools/include/uapi \ -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \ -I$(srctree)/tools/arch/$(SRCARCH)/include \ -I$(srctree)/tools/objtool/include \ diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index ed6bff0e01dc..fe1362c34564 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -456,10 +456,6 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec if (!rex_w) break; - /* skip RIP relative displacement */ - if (is_RIP()) - break; - /* skip nontrivial SIB */ if (have_SIB()) { modrm_rm = sib_base; @@ -467,6 +463,12 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; } + /* lea disp(%rip), %dst */ + if (is_RIP()) { + insn->type = INSN_LEA_RIP; + break; + } + /* lea disp(%src), %dst */ ADD_OP(op) { op->src.offset = ins.displacement.value; @@ -737,7 +739,10 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec break; } - insn->immediate = ins.immediate.nbytes ? ins.immediate.value : 0; + if (ins.immediate.nbytes) + insn->immediate = ins.immediate.value; + else if (ins.displacement.nbytes) + insn->immediate = ins.displacement.value; return 0; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 6604f5d038aa..f7586f82b967 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -4392,6 +4392,51 @@ static bool noendbr_range(struct objtool_file *file, struct instruction *insn) return insn->offset == sym->offset + sym->len; } +static int __validate_ibt_insn(struct objtool_file *file, struct instruction *insn, + struct instruction *dest) +{ + if (dest->type == INSN_ENDBR) { + mark_endbr_used(dest); + return 0; + } + + if (insn_func(dest) && insn_func(insn) && + insn_func(dest)->pfunc == insn_func(insn)->pfunc) { + /* + * Anything from->to self is either _THIS_IP_ or + * IRET-to-self. + * + * There is no sane way to annotate _THIS_IP_ since the + * compiler treats the relocation as a constant and is + * happy to fold in offsets, skewing any annotation we + * do, leading to vast amounts of false-positives. + * + * There's also compiler generated _THIS_IP_ through + * KCOV and such which we have no hope of annotating. + * + * As such, blanket accept self-references without + * issue. + */ + return 0; + } + + /* + * Accept anything ANNOTATE_NOENDBR. + */ + if (dest->noendbr) + return 0; + + /* + * Accept if this is the instruction after a symbol + * that is (no)endbr -- typical code-range usage. + */ + if (noendbr_range(file, dest)) + return 0; + + WARN_INSN(insn, "relocation to !ENDBR: %s", offstr(dest->sec, dest->offset)); + return 1; +} + static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn) { struct instruction *dest; @@ -4404,6 +4449,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn * direct/indirect branches: */ switch (insn->type) { + case INSN_CALL: case INSN_CALL_DYNAMIC: case INSN_JUMP_CONDITIONAL: @@ -4413,6 +4459,23 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn case INSN_RETURN: case INSN_NOP: return 0; + + case INSN_LEA_RIP: + if (!insn_reloc(file, insn)) { + /* local function pointer reference without reloc */ + + off = arch_jump_destination(insn); + + dest = find_insn(file, insn->sec, off); + if (!dest) { + WARN_INSN(insn, "corrupt function pointer reference"); + return 1; + } + + return __validate_ibt_insn(file, insn, dest); + } + break; + default: break; } @@ -4423,13 +4486,6 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn reloc_offset(reloc) + 1, (insn->offset + insn->len) - (reloc_offset(reloc) + 1))) { - /* - * static_call_update() references the trampoline, which - * doesn't have (or need) ENDBR. Skip warning in that case. - */ - if (reloc->sym->static_call_tramp) - continue; - off = reloc->sym->offset; if (reloc_type(reloc) == R_X86_64_PC32 || reloc_type(reloc) == R_X86_64_PLT32) @@ -4441,47 +4497,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn if (!dest) continue; - if (dest->type == INSN_ENDBR) { - mark_endbr_used(dest); - continue; - } - - if (insn_func(dest) && insn_func(insn) && - insn_func(dest)->pfunc == insn_func(insn)->pfunc) { - /* - * Anything from->to self is either _THIS_IP_ or - * IRET-to-self. - * - * There is no sane way to annotate _THIS_IP_ since the - * compiler treats the relocation as a constant and is - * happy to fold in offsets, skewing any annotation we - * do, leading to vast amounts of false-positives. - * - * There's also compiler generated _THIS_IP_ through - * KCOV and such which we have no hope of annotating. - * - * As such, blanket accept self-references without - * issue. - */ - continue; - } - - /* - * Accept anything ANNOTATE_NOENDBR. - */ - if (dest->noendbr) - continue; - - /* - * Accept if this is the instruction after a symbol - * that is (no)endbr -- typical code-range usage. - */ - if (noendbr_range(file, dest)) - continue; - - WARN_INSN(insn, "relocation to !ENDBR: %s", offstr(dest->sec, dest->offset)); - - warnings++; + warnings += __validate_ibt_insn(file, insn, dest); } return warnings; @@ -4557,6 +4573,7 @@ static int validate_ibt(struct objtool_file *file) !strcmp(sec->name, "__jump_table") || !strcmp(sec->name, "__mcount_loc") || !strcmp(sec->name, ".kcfi_traps") || + !strcmp(sec->name, "__tracepoints") || strstr(sec->name, "__patchable_function_entries")) continue; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 0b303eba660e..d63b46a19f39 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -28,6 +28,7 @@ enum insn_type { INSN_CLD, INSN_TRAP, INSN_ENDBR, + INSN_LEA_RIP, INSN_OTHER, }; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index e7da92489167..f37614cc2c1b 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -11,7 +11,6 @@ NORETURN(__ia32_sys_exit) NORETURN(__ia32_sys_exit_group) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) -NORETURN(__reiserfs_panic) NORETURN(__stack_chk_fail) NORETURN(__tdx_hypercall_failed) NORETURN(__ubsan_handle_builtin_unreachable) diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c index 470258009ddc..7b530d838d40 100644 --- a/tools/pci/pcitest.c +++ b/tools/pci/pcitest.c @@ -95,7 +95,7 @@ static int run_test(struct pci_test *test) if (test->msinum > 0 && test->msinum <= 32) { ret = ioctl(fd, PCITEST_MSI, test->msinum); - fprintf(stdout, "MSI%d:\t\t", test->msinum); + fprintf(stdout, "MSI%u:\t\t", test->msinum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -104,7 +104,7 @@ static int run_test(struct pci_test *test) if (test->msixnum > 0 && test->msixnum <= 2048) { ret = ioctl(fd, PCITEST_MSIX, test->msixnum); - fprintf(stdout, "MSI-X%d:\t\t", test->msixnum); + fprintf(stdout, "MSI-X%u:\t\t", test->msixnum); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -116,7 +116,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_WRITE, ¶m); - fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size); + fprintf(stdout, "WRITE (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -128,7 +128,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_READ, ¶m); - fprintf(stdout, "READ (%7ld bytes):\t\t", test->size); + fprintf(stdout, "READ (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else @@ -140,7 +140,7 @@ static int run_test(struct pci_test *test) if (test->use_dma) param.flags = PCITEST_FLAGS_USE_DMA; ret = ioctl(fd, PCITEST_COPY, ¶m); - fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size); + fprintf(stdout, "COPY (%7lu bytes):\t\t", test->size); if (ret < 0) fprintf(stdout, "TEST FAILED\n"); else diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index f5b81d439387..5aaf73df6700 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,9 +39,9 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents pmu-events/metric_test.log -tests/shell/*.shellcheck_log -tests/shell/coresight/*.shellcheck_log -tests/shell/lib/*.shellcheck_log +pmu-events/empty-pmu-events.log +pmu-events/test-empty-pmu-events.c +*.shellcheck_log feature/ libapi/ libbpf/ diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt index 19cc179be9a7..40476b227f8d 100644 --- a/tools/perf/Documentation/itrace.txt +++ b/tools/perf/Documentation/itrace.txt @@ -1,6 +1,6 @@ i synthesize instructions events y synthesize cycles events - b synthesize branches events (branch misses for Arm SPE) + b synthesize branches events c synthesize branches events (calls only) r synthesize branches events (returns only) x synthesize transactions events diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt index 0a3eda482307..de2b0b479249 100644 --- a/tools/perf/Documentation/perf-arm-spe.txt +++ b/tools/perf/Documentation/perf-arm-spe.txt @@ -187,7 +187,7 @@ groups: 7 llc-access 2 tlb-miss 1K tlb-access - 36 branch-miss + 36 branch 0 remote-access 900 memory diff --git a/tools/perf/Documentation/perf-check.txt b/tools/perf/Documentation/perf-check.txt index 10f69fb6850b..31741499e786 100644 --- a/tools/perf/Documentation/perf-check.txt +++ b/tools/perf/Documentation/perf-check.txt @@ -47,15 +47,15 @@ feature:: bpf / HAVE_LIBBPF_SUPPORT bpf_skeletons / HAVE_BPF_SKEL debuginfod / HAVE_DEBUGINFOD_SUPPORT - dwarf / HAVE_DWARF_SUPPORT - dwarf_getlocations / HAVE_DWARF_GETLOCATIONS_SUPPORT + dwarf / HAVE_LIBDW_SUPPORT + dwarf_getlocations / HAVE_LIBDW_SUPPORT dwarf-unwind / HAVE_DWARF_UNWIND_SUPPORT auxtrace / HAVE_AUXTRACE_SUPPORT libaudit / HAVE_LIBAUDIT_SUPPORT libbfd / HAVE_LIBBFD_SUPPORT libcapstone / HAVE_LIBCAPSTONE_SUPPORT libcrypto / HAVE_LIBCRYPTO_SUPPORT - libdw-dwarf-unwind / HAVE_DWARF_SUPPORT + libdw-dwarf-unwind / HAVE_LIBDW_SUPPORT libelf / HAVE_LIBELF_SUPPORT libnuma / HAVE_LIBNUMA_SUPPORT libopencsd / HAVE_CSTRACE_SUPPORT diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt index 379f9d7a8ab1..1f668d4724e3 100644 --- a/tools/perf/Documentation/perf-config.txt +++ b/tools/perf/Documentation/perf-config.txt @@ -247,6 +247,19 @@ annotate.*:: These are in control of addresses, jump function, source code in lines of assembly code from a specific program. + annotate.disassemblers:: + Choose the disassembler to use: "objdump", "llvm", "capstone", + if not specified it will first try, if available, the "llvm" one, + then, if it fails, "capstone", and finally the original "objdump" + based one. + + Choosing a different one is useful when handling some feature that + is known to be best support at some point by one of the options, + to compare the output when in doubt about some bug, etc. + + This can be a list, in order of preference, the first one that works + finishes the process. + annotate.addr2line:: addr2line binary to use for file names and line numbers. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index dea005410ec0..d0c65fad419a 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -8,7 +8,7 @@ perf-list - List all symbolic event types SYNOPSIS -------- [verse] -'perf list' [--no-desc] [--long-desc] +'perf list' [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob] DESCRIPTION @@ -243,6 +243,21 @@ For accessing trace point events perf needs to have read access to /sys/kernel/tracing, even when perf_event_paranoid is in a relaxed setting. +TOOL/HWMON EVENTS +----------------- + +Some events don't have an associated PMU instead reading values +available to software without perf_event_open. As these events don't +support sampling they can only really be read by tools like perf stat. + +Tool events provide times and certain system parameters. Examples +include duration_time, user_time, system_time and num_cpus_online. + +Hwmon events provide easy access to hwmon sysfs data typically in +/sys/class/hwmon. This information includes temperatures, fan speeds +and energy usage. + + TRACING ------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 7c66d81ab978..87f864519406 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -391,6 +391,14 @@ OPTIONS This allows to examine the path the program took to each sample. The data collection must have used -b (or -j) and -g. + Also show with some branch flags that can be: + - Predicted: display the average percentage of predicated branches. + (predicated number / total number) + - Abort: display the number of tsx aborted branches. + - Cycles: cycles in basic block. + + - iterations: display the average number of iterations in callchain list. + --addr2line=<path>:: Path to addr2line binary. diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 3db64954a267..6dbbddb6464d 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -221,6 +221,14 @@ OPTIONS for 'perf sched timehist' priorities are specified with -: 120-129. A combination of both can also be provided: 0,120-129. +-P:: +--pre-migrations:: + Show pre-migration wait time. pre-migration wait time is the time spent + by a task waiting on a runqueue but not getting the chance to run there + and is migrated to a different runqueue where it is finally run. This + time between sched_wakeup and migrate_task is the pre-migration wait + time. + OPTIONS for 'perf sched replay' ------------------------------ diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 13e37e9385ee..27a1cac6fe76 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -624,7 +624,7 @@ as perf_trace_context.perf_script_context . perf_set_itrace_options(context, itrace_options) - set --itrace options if they have not been set already perf_sample_srcline(context) - returns source_file_name, line_number perf_sample_srccode(context) - returns source_file_name, line_number, source_line - + perf_config_get(config_name) - returns the value of the named config item, or None if unset Util.py Module ~~~~~~~~~~~~~~ diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt index 9acb8d1f6588..efcdec528a8f 100644 --- a/tools/perf/Documentation/perf-test.txt +++ b/tools/perf/Documentation/perf-test.txt @@ -48,3 +48,20 @@ OPTIONS --dso:: Specify a DSO for the "Symbols" test. + +-w:: +--workload=:: + Run a built-in workload, to list them use '--list-workloads', current ones include: + noploop, thloop, leafloop, sqrtloop, brstack, datasym and landlock. + + Used with the shell script regression tests. + + Some accept an extra parameter: + + seconds: leafloop, noploop, sqrtloop, thloop + nrloops: brstack + + The datasym and landlock workloads don't accept any. + +--list-workloads:: + List the available workloads to use with -w/--workload. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4dcf7a0fd235..2916d59c88cd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -31,7 +31,7 @@ $(call detected_var,SRCARCH) ifneq ($(NO_SYSCALL_TABLE),1) NO_SYSCALL_TABLE := 1 - ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch)) + ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 powerpc arm64 s390 mips loongarch riscv)) NO_SYSCALL_TABLE := 0 endif @@ -83,6 +83,10 @@ ifeq ($(ARCH),mips) LIBUNWIND_LIBS = -lunwind -lunwind-mips endif +ifeq ($(ARCH),riscv) + CFLAGS += -I$(OUTPUT)arch/riscv/include/generated +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -91,6 +95,10 @@ ifneq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc s390 csky riscv loon NO_LIBDW_DWARF_UNWIND := 1 endif +ifneq ($(LIBUNWIND),1) + NO_LIBUNWIND := 1 +endif + ifeq ($(LIBUNWIND_LIBS),) NO_LIBUNWIND := 1 endif @@ -147,9 +155,9 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) @@ -158,9 +166,12 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) DWARFLIBS += -lebl endif + + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl endif -FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) -FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) +FEATURE_CHECK_CFLAGS-libdw := $(LIBDW_CFLAGS) +FEATURE_CHECK_LDFLAGS-libdw := $(LIBDW_LDFLAGS) $(DWARFLIBS) # for linking with debug library, run like: # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ @@ -200,10 +211,6 @@ FEATURE_CHECK_CFLAGS-bpf = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arc # include ARCH specific config -include $(src-perf)/arch/$(SRCARCH)/Makefile -ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET - CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -endif - include $(srctree)/tools/scripts/utilities.mak ifeq ($(call get-executable,$(FLEX)),) @@ -423,7 +430,7 @@ ifeq ($(feature-file-handle), 1) endif ifdef NO_LIBELF - NO_DWARF := 1 + NO_LIBDW := 1 NO_LIBUNWIND := 1 NO_LIBDW_DWARF_UNWIND := 1 NO_LIBBPF := 1 @@ -458,28 +465,11 @@ else endif endif else - ifndef NO_LIBDW_DWARF_UNWIND - ifneq ($(feature-libdw-dwarf-unwind),1) - NO_LIBDW_DWARF_UNWIND := 1 - $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR) + ifneq ($(feature-libdw), 1) + ifndef NO_LIBDW + $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.157, disables dwarf support. Please install new elfutils-devel/libdw-dev) + NO_LIBDW := 1 endif - endif - ifneq ($(feature-dwarf), 1) - ifndef NO_DWARF - $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev) - NO_DWARF := 1 - endif - else - ifneq ($(feature-dwarf_getlocations), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.157) - else - CFLAGS += -DHAVE_DWARF_GETLOCATIONS_SUPPORT - endif # dwarf_getlocations - ifneq ($(feature-dwarf_getcfi), 1) - $(warning Old libdw.h, finding variables at given 'perf probe' point will not work, install elfutils-devel/libdw-dev >= 0.142) - else - CFLAGS += -DHAVE_DWARF_CFI_SUPPORT - endif # dwarf_getcfi endif # Dwarf support endif # libelf support endif # NO_LIBELF @@ -490,7 +480,7 @@ ifeq ($(feature-libaio), 1) endif endif -ifdef NO_DWARF +ifdef NO_LIBDW NO_LIBDW_DWARF_UNWIND := 1 endif @@ -568,17 +558,12 @@ ifndef NO_LIBELF endif endif - ifndef NO_DWARF - ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined) - $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled) - NO_DWARF := 1 - else - CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) - LDFLAGS += $(LIBDW_LDFLAGS) - EXTLIBS += ${DWARFLIBS} - $(call detected,CONFIG_DWARF) - endif # PERF_HAVE_DWARF_REGS - endif # NO_DWARF + ifndef NO_LIBDW + CFLAGS += -DHAVE_LIBDW_SUPPORT $(LIBDW_CFLAGS) + LDFLAGS += $(LIBDW_LDFLAGS) + EXTLIBS += ${DWARFLIBS} + $(call detected,CONFIG_LIBDW) + endif # NO_LIBDW ifndef NO_LIBBPF ifeq ($(feature-bpf), 1) @@ -627,7 +612,7 @@ ifdef PERF_HAVE_JITDUMP endif ifeq ($(SRCARCH),powerpc) - ifndef NO_DWARF + ifndef NO_LIBDW CFLAGS += -DHAVE_SKIP_CALLCHAIN_IDX endif endif @@ -701,8 +686,8 @@ ifeq ($(BUILD_BPF_SKEL),1) BUILD_BPF_SKEL := 0 else CLANG_VERSION := $(shell $(CLANG) --version | head -1 | sed 's/.*clang version \([[:digit:]]\+.[[:digit:]]\+.[[:digit:]]\+\).*/\1/g') - ifeq ($(call version-lt3,$(CLANG_VERSION),16.0.6),1) - $(warning Warning: Disabled BPF skeletons as at least $(CLANG) version 16.0.6 is reported to be a working setup with the current of BPF based perf features) + ifeq ($(call version-lt3,$(CLANG_VERSION),12.0.1),1) + $(warning Warning: Disabled BPF skeletons as reliable BTF generation needs at least $(CLANG) version 12.0.1) BUILD_BPF_SKEL := 0 endif endif @@ -747,8 +732,6 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT $(call detected,CONFIG_DWARF_UNWIND) -else - NO_DWARF_UNWIND := 1 endif ifndef NO_LOCAL_LIBUNWIND @@ -1191,7 +1174,7 @@ endif ifneq ($(NO_LIBTRACEEVENT),1) $(call feature_check,libtraceevent) ifeq ($(feature-libtraceevent), 1) - CFLAGS += -DHAVE_LIBTRACEEVENT + CFLAGS += -DHAVE_LIBTRACEEVENT $(shell $(PKG_CONFIG) --cflags libtraceevent) LDFLAGS += $(shell $(PKG_CONFIG) --libs-only-L libtraceevent) EXTLIBS += $(shell $(PKG_CONFIG) --libs-only-l libtraceevent) LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent).0.0 diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9dd2e8d3f3c9..d74241a15131 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -40,7 +40,7 @@ include ../scripts/utilities.mak # # Define EXTRA_PERFLIBS to pass extra libraries to PERFLIBS. # -# Define NO_DWARF if you do not want debug-info analysis feature at all. +# Define NO_LIBDW if you do not want debug-info analysis feature at all. # # Define WERROR=0 to disable treating any warnings as errors. # @@ -52,7 +52,7 @@ include ../scripts/utilities.mak # # Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds) # -# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf +# Define LIBUNWIND if you do not want libunwind dependency for dwarf # backtrace post unwind. # # Define NO_BACKTRACE if you do not want stack backtrace debug feature @@ -1128,12 +1128,11 @@ endif install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) tests/attr.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/pe-file.exe* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ - $(INSTALL) tests/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ $(INSTALL) tests/shell/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell'; \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ + $(INSTALL) tests/shell/attr/* -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/attr'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.sh -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ $(INSTALL) tests/shell/lib/*.py -m 644 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/lib'; \ diff --git a/tools/perf/arch/arc/annotate/instructions.c b/tools/perf/arch/arc/annotate/instructions.c index 2f00e995c7e3..e5619770a1af 100644 --- a/tools/perf/arch/arc/annotate/instructions.c +++ b/tools/perf/arch/arc/annotate/instructions.c @@ -5,5 +5,7 @@ static int arc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { arch->initialized = true; arch->objdump.comment_char = ';'; + arch->e_machine = EM_ARC; + arch->e_flags = 0; return 0; } diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 1d88fdab13bf..8b59ce8efb89 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,5 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/arm/annotate/instructions.c b/tools/perf/arch/arm/annotate/instructions.c index 2ff6cedeb9c5..cf91a43362b0 100644 --- a/tools/perf/arch/arm/annotate/instructions.c +++ b/tools/perf/arch/arm/annotate/instructions.c @@ -53,6 +53,8 @@ static int arm__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm__associate_instruction_ops; arch->objdump.comment_char = ';'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_ARM; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build index e6dd7cd79ebd..f7a8b37d1c68 100644 --- a/tools/perf/arch/arm/util/Build +++ b/tools/perf/arch/arm/util/Build @@ -1,7 +1,5 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o - perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c deleted file mode 100644 index fc5f71c91802..000000000000 --- a/tools/perf/arch/arm/util/dwarf-regs.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <stddef.h> -#include <linux/stringify.h> -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%r##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - REG_DWARFNUM_NAME("%fp", 11), - REG_DWARFNUM_NAME("%ip", 12), - REG_DWARFNUM_NAME("%sp", 13), - REG_DWARFNUM_NAME("%lr", 14), - REG_DWARFNUM_NAME("%pc", 15), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 5735ed4479bb..91570d5d428e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif PERF_HAVE_JITDUMP := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 HAVE_KVM_STAT_SUPPORT := 1 # diff --git a/tools/perf/arch/arm64/annotate/instructions.c b/tools/perf/arch/arm64/annotate/instructions.c index f86d9f4798bd..d465d093e7eb 100644 --- a/tools/perf/arch/arm64/annotate/instructions.c +++ b/tools/perf/arch/arm64/annotate/instructions.c @@ -113,6 +113,8 @@ static int arm64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = arm64__associate_instruction_ops; arch->objdump.comment_char = '/'; arch->objdump.skip_functions_char = '+'; + arch->e_machine = EM_AARCH64; + arch->e_flags = 0; return 0; out_free_call: diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index 343ef7589a77..a74521b79eaa 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -4,7 +4,6 @@ perf-util-y += perf_regs.o perf-util-y += tsc.o perf-util-y += pmu.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c index 2be99fdf997d..22b19dcc6beb 100644 --- a/tools/perf/arch/arm64/util/arm-spe.c +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -23,9 +23,12 @@ #include "../../../util/debug.h" #include "../../../util/auxtrace.h" #include "../../../util/record.h" +#include "../../../util/header.h" #include "../../../util/arm-spe.h" #include <tools/libc_compat.h> // reallocarray +#define ARM_SPE_CPU_MAGIC 0x1010101010101010ULL + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) @@ -37,11 +40,84 @@ struct arm_spe_recording { bool *wrapped; }; +/* + * arm_spe_find_cpus() returns a new cpu map, and the caller should invoke + * perf_cpu_map__put() to release the map after use. + */ +static struct perf_cpu_map *arm_spe_find_cpus(struct evlist *evlist) +{ + struct perf_cpu_map *event_cpus = evlist->core.user_requested_cpus; + struct perf_cpu_map *online_cpus = perf_cpu_map__new_online_cpus(); + struct perf_cpu_map *intersect_cpus; + + /* cpu map is not "any" CPU , we have specific CPUs to work with */ + if (!perf_cpu_map__has_any_cpu(event_cpus)) { + intersect_cpus = perf_cpu_map__intersect(event_cpus, online_cpus); + perf_cpu_map__put(online_cpus); + /* Event can be "any" CPU so count all CPUs. */ + } else { + intersect_cpus = online_cpus; + } + + return intersect_cpus; +} + static size_t arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, - struct evlist *evlist __maybe_unused) + struct evlist *evlist) +{ + struct perf_cpu_map *cpu_map = arm_spe_find_cpus(evlist); + size_t size; + + if (!cpu_map) + return 0; + + size = ARM_SPE_AUXTRACE_PRIV_MAX + + ARM_SPE_CPU_PRIV_MAX * perf_cpu_map__nr(cpu_map); + size *= sizeof(u64); + + perf_cpu_map__put(cpu_map); + return size; +} + +static int arm_spe_save_cpu_header(struct auxtrace_record *itr, + struct perf_cpu cpu, __u64 data[]) { - return ARM_SPE_AUXTRACE_PRIV_SIZE; + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *pmu = NULL; + char *cpuid = NULL; + u64 val; + + /* Read CPU MIDR */ + cpuid = get_cpuid_allow_env_override(cpu); + if (!cpuid) + return -ENOMEM; + val = strtol(cpuid, NULL, 16); + + data[ARM_SPE_MAGIC] = ARM_SPE_CPU_MAGIC; + data[ARM_SPE_CPU] = cpu.cpu; + data[ARM_SPE_CPU_NR_PARAMS] = ARM_SPE_CPU_PRIV_MAX - ARM_SPE_CPU_MIDR; + data[ARM_SPE_CPU_MIDR] = val; + + /* Find the associate Arm SPE PMU for the CPU */ + if (perf_cpu_map__has(sper->arm_spe_pmu->cpus, cpu)) + pmu = sper->arm_spe_pmu; + + if (!pmu) { + /* No Arm SPE PMU is found */ + data[ARM_SPE_CPU_PMU_TYPE] = ULLONG_MAX; + data[ARM_SPE_CAP_MIN_IVAL] = 0; + } else { + data[ARM_SPE_CPU_PMU_TYPE] = pmu->type; + + if (perf_pmu__scan_file(pmu, "caps/min_interval", "%lu", &val) != 1) + val = 0; + data[ARM_SPE_CAP_MIN_IVAL] = val; + } + + free(cpuid); + return ARM_SPE_CPU_PRIV_MAX; } static int arm_spe_info_fill(struct auxtrace_record *itr, @@ -49,20 +125,46 @@ static int arm_spe_info_fill(struct auxtrace_record *itr, struct perf_record_auxtrace_info *auxtrace_info, size_t priv_size) { + int i, ret; + size_t offset; struct arm_spe_recording *sper = container_of(itr, struct arm_spe_recording, itr); struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_cpu_map *cpu_map; + struct perf_cpu cpu; + __u64 *data; - if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + if (priv_size != arm_spe_info_priv_size(itr, session->evlist)) return -EINVAL; if (!session->evlist->core.nr_mmaps) return -EINVAL; + cpu_map = arm_spe_find_cpus(session->evlist); + if (!cpu_map) + return -EINVAL; + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; - auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_HEADER_VERSION] = ARM_SPE_HEADER_CURRENT_VERSION; + auxtrace_info->priv[ARM_SPE_HEADER_SIZE] = + ARM_SPE_AUXTRACE_PRIV_MAX - ARM_SPE_HEADER_VERSION; + auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2] = arm_spe_pmu->type; + auxtrace_info->priv[ARM_SPE_CPUS_NUM] = perf_cpu_map__nr(cpu_map); + + offset = ARM_SPE_AUXTRACE_PRIV_MAX; + perf_cpu_map__for_each_cpu(cpu, i, cpu_map) { + assert(offset < priv_size); + data = &auxtrace_info->priv[offset]; + ret = arm_spe_save_cpu_header(itr, cpu, data); + if (ret < 0) + goto out; + offset += ret; + } - return 0; + ret = 0; +out: + perf_cpu_map__put(cpu_map); + return ret; } static void @@ -188,9 +290,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr, evlist__for_each_entry(evlist, evsel) { if (evsel__is_aux_event(evsel)) { - if (!strstarts(evsel->pmu_name, ARM_SPE_PMU_NAME)) { + if (!strstarts(evsel->pmu->name, ARM_SPE_PMU_NAME)) { pr_err("Found unexpected auxtrace event: %s\n", - evsel->pmu_name); + evsel->pmu->name); return -EINVAL; } opts->full_auxtrace = true; diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c deleted file mode 100644 index 917b97d7c5d3..000000000000 --- a/tools/perf/arch/arm64/util/dwarf-regs.c +++ /dev/null @@ -1,92 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Will Deacon, ARM Ltd. - */ - -#include <errno.h> -#include <stddef.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> /* for struct user_pt_regs */ -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%x##num), .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} -#define DWARFNUM2OFFSET(index) \ - (index * sizeof((struct user_pt_regs *)0)->regs[0]) - -/* - * Reference: - * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - REG_DWARFNUM_NAME("%lr", 30), - REG_DWARFNUM_NAME("%sp", 31), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return DWARFNUM2OFFSET(roff->dwarfnum); - return -EINVAL; -} diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c index 741df3614a09..f445a2dd6293 100644 --- a/tools/perf/arch/arm64/util/header.c +++ b/tools/perf/arch/arm64/util/header.c @@ -14,73 +14,66 @@ #define MIDR_REVISION_MASK GENMASK(3, 0) #define MIDR_VARIANT_MASK GENMASK(23, 20) -static int _get_cpuid(char *buf, size_t sz, struct perf_cpu_map *cpus) +static int _get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { + char path[PATH_MAX]; + FILE *file; const char *sysfs = sysfs__mountpoint(); - struct perf_cpu cpu; - int idx, ret = EINVAL; + assert(cpu.cpu != -1); if (!sysfs || sz < MIDR_SIZE) return EINVAL; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - char path[PATH_MAX]; - FILE *file; - - scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, - sysfs, cpu.cpu); - - file = fopen(path, "r"); - if (!file) { - pr_debug("fopen failed for file %s\n", path); - continue; - } - - if (!fgets(buf, MIDR_SIZE, file)) { - fclose(file); - continue; - } - fclose(file); + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d" MIDR, sysfs, cpu.cpu); - /* got midr break loop */ - ret = 0; - break; + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + return EINVAL; } - return ret; + if (!fgets(buf, MIDR_SIZE, file)) { + pr_debug("Failed to read file %s\n", path); + fclose(file); + return EINVAL; + } + fclose(file); + return 0; } -int get_cpuid(char *buf, size_t sz) +int get_cpuid(char *buf, size_t sz, struct perf_cpu cpu) { - struct perf_cpu_map *cpus = perf_cpu_map__new_online_cpus(); - int ret; + struct perf_cpu_map *cpus; + int idx; + if (cpu.cpu != -1) + return _get_cpuid(buf, sz, cpu); + + cpus = perf_cpu_map__new_online_cpus(); if (!cpus) return EINVAL; - ret = _get_cpuid(buf, sz, cpus); - - perf_cpu_map__put(cpus); + perf_cpu_map__for_each_cpu(cpu, idx, cpus) { + int ret = _get_cpuid(buf, sz, cpu); - return ret; + if (ret == 0) + return 0; + } + return EINVAL; } -char *get_cpuid_str(struct perf_pmu *pmu) +char *get_cpuid_str(struct perf_cpu cpu) { - char *buf = NULL; + char *buf = malloc(MIDR_SIZE); int res; - if (!pmu || !pmu->cpus) - return NULL; - - buf = malloc(MIDR_SIZE); if (!buf) return NULL; /* read midr from list of cpus mapped to this pmu */ - res = _get_cpuid(buf, MIDR_SIZE, pmu->cpus); + res = get_cpuid(buf, MIDR_SIZE, cpu); if (res) { - pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + pr_err("failed to get cpuid string for CPU %d\n", cpu.cpu); free(buf); buf = NULL; } diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index 2a4eab2d160e..895fb0d0610c 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,30 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <internal/cpumap.h> -#include "../../../util/cpumap.h" -#include "../../../util/header.h" #include "../../../util/pmu.h" #include "../../../util/pmus.h" +#include "../../../util/tool_pmu.h" #include <api/fs/fs.h> -#include <math.h> -const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - struct perf_pmu *pmu; - - /* Metrics aren't currently supported on heterogeneous Arm systems */ - if (perf_pmus__num_core_pmus() > 1) - return NULL; - - /* Doesn't matter which one here because they'll all be the same */ - pmu = perf_pmus__find_core_pmu(); - if (pmu) - return perf_pmu__find_metrics_table(pmu); - - return NULL; -} - -double perf_pmu__cpu_slots_per_cycle(void) +u64 tool_pmu__cpu_slots_per_cycle(void) { char path[PATH_MAX]; unsigned long long slots = 0; @@ -41,5 +22,5 @@ double perf_pmu__cpu_slots_per_cycle(void) filename__read_ull(path, &slots); } - return slots ? (double)slots : NAN; + return slots; } diff --git a/tools/perf/arch/csky/Makefile b/tools/perf/arch/csky/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/csky/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/csky/annotate/instructions.c b/tools/perf/arch/csky/annotate/instructions.c index 5337bfb7d5fc..14270311d215 100644 --- a/tools/perf/arch/csky/annotate/instructions.c +++ b/tools/perf/arch/csky/annotate/instructions.c @@ -43,6 +43,11 @@ static int csky__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->objdump.comment_char = '/'; arch->associate_instruction_ops = csky__associate_ins_ops; - + arch->e_machine = EM_CSKY; +#if defined(__CSKYABIV2__) + arch->e_flags = EF_CSKY_ABIV2; +#else + arch->e_flags = EF_CSKY_ABIV1; +#endif return 0; } diff --git a/tools/perf/arch/csky/util/Build b/tools/perf/arch/csky/util/Build index 99d83f41bf43..5e6ea82c4202 100644 --- a/tools/perf/arch/csky/util/Build +++ b/tools/perf/arch/csky/util/Build @@ -1,4 +1,3 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/loongarch/Makefile b/tools/perf/arch/loongarch/Makefile index c89d6bb6b184..52544d59245b 100644 --- a/tools/perf/arch/loongarch/Makefile +++ b/tools/perf/arch/loongarch/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 diff --git a/tools/perf/arch/loongarch/annotate/instructions.c b/tools/perf/arch/loongarch/annotate/instructions.c index ab43b1ab51e3..70262d5f1444 100644 --- a/tools/perf/arch/loongarch/annotate/instructions.c +++ b/tools/perf/arch/loongarch/annotate/instructions.c @@ -131,6 +131,8 @@ int loongarch__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = loongarch__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_LOONGARCH; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/loongarch/util/Build b/tools/perf/arch/loongarch/util/Build index b6b97de48233..0aa31986ecb5 100644 --- a/tools/perf/arch/loongarch/util/Build +++ b/tools/perf/arch/loongarch/util/Build @@ -1,7 +1,6 @@ perf-util-y += header.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o diff --git a/tools/perf/arch/loongarch/util/dwarf-regs.c b/tools/perf/arch/loongarch/util/dwarf-regs.c deleted file mode 100644 index 0f6ebc387463..000000000000 --- a/tools/perf/arch/loongarch/util/dwarf-regs.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include <stdio.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -static struct pt_regs_dwarfnum loongarch_gpr_table[] = { - {"%r0", 0}, {"%r1", 1}, {"%r2", 2}, {"%r3", 3}, - {"%r4", 4}, {"%r5", 5}, {"%r6", 6}, {"%r7", 7}, - {"%r8", 8}, {"%r9", 9}, {"%r10", 10}, {"%r11", 11}, - {"%r12", 12}, {"%r13", 13}, {"%r14", 14}, {"%r15", 15}, - {"%r16", 16}, {"%r17", 17}, {"%r18", 18}, {"%r19", 19}, - {"%r20", 20}, {"%r21", 21}, {"%r22", 22}, {"%r23", 23}, - {"%r24", 24}, {"%r25", 25}, {"%r26", 26}, {"%r27", 27}, - {"%r28", 28}, {"%r29", 29}, {"%r30", 30}, {"%r31", 31}, - {NULL, 0} -}; - -const char *get_arch_regstr(unsigned int n) -{ - n %= 32; - return loongarch_gpr_table[n].name; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = loongarch_gpr_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/loongarch/util/header.c b/tools/perf/arch/loongarch/util/header.c index d962dff55512..0c6d823334a2 100644 --- a/tools/perf/arch/loongarch/util/header.c +++ b/tools/perf/arch/loongarch/util/header.c @@ -70,7 +70,7 @@ out_free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { int ret = 0; char *cpuid = _get_cpuid(); @@ -90,7 +90,7 @@ out_free: return ret; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/mips/Makefile b/tools/perf/arch/mips/Makefile index cd0b011b3be5..827168f1077a 100644 --- a/tools/perf/arch/mips/Makefile +++ b/tools/perf/arch/mips/Makefile @@ -1,8 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - # Syscall table generation for perf out := $(OUTPUT)arch/mips/include/generated/asm header := $(out)/syscalls_n64.c diff --git a/tools/perf/arch/mips/annotate/instructions.c b/tools/perf/arch/mips/annotate/instructions.c index 340993f2a897..b50b46c613d6 100644 --- a/tools/perf/arch/mips/annotate/instructions.c +++ b/tools/perf/arch/mips/annotate/instructions.c @@ -40,6 +40,8 @@ int mips__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = mips__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_MIPS; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/mips/util/Build b/tools/perf/arch/mips/util/Build index e4644f1e68a0..691fa2051958 100644 --- a/tools/perf/arch/mips/util/Build +++ b/tools/perf/arch/mips/util/Build @@ -1,3 +1,2 @@ perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/mips/util/dwarf-regs.c b/tools/perf/arch/mips/util/dwarf-regs.c deleted file mode 100644 index 25c13a91c2a7..000000000000 --- a/tools/perf/arch/mips/util/dwarf-regs.c +++ /dev/null @@ -1,38 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2013 Cavium, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#include <stdio.h> -#include <dwarf-regs.h> - -static const char *mips_gpr_names[32] = { - "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", - "$10", "$11", "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", - "$20", "$21", "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", - "$30", "$31" -}; - -const char *get_arch_regstr(unsigned int n) -{ - if (n < 32) - return mips_gpr_names[n]; - if (n == 64) - return "hi"; - if (n == 65) - return "lo"; - return NULL; -} diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index bf6d323574f6..dc8f4fb8e324 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,10 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/powerpc/annotate/instructions.c b/tools/perf/arch/powerpc/annotate/instructions.c index ede9eeade0ab..ca567cfdcbdb 100644 --- a/tools/perf/arch/powerpc/annotate/instructions.c +++ b/tools/perf/arch/powerpc/annotate/instructions.c @@ -255,7 +255,7 @@ static struct ins_ops *check_ppc_insn(struct disasm_line *dl) * is moved to r31. update_insn_state_powerpc tracks these state * changes */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_powerpc(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die * cu_die __maybe_unused, struct disasm_line *dl) @@ -300,7 +300,7 @@ static void update_insn_state_powerpc(struct type_state *state, insn_offset, src->reg1, dst->reg1); pr_debug_type_name(&tsr->type, tsr->kind); } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) { @@ -309,6 +309,8 @@ static int powerpc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = powerpc__associate_instruction_ops; arch->objdump.comment_char = '#'; annotate_opts.show_asm_raw = true; + arch->e_machine = EM_PPC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 6c588ecdf3bd..ed82715080f9 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -7,8 +7,7 @@ perf-util-y += sym-handling.o perf-util-y += evsel.o perf-util-y += event.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += skip-callchain-idx.o +perf-util-$(CONFIG_LIBDW) += skip-callchain-idx.o perf-util-$(CONFIG_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c deleted file mode 100644 index 104c7ae5c433..000000000000 --- a/tools/perf/arch/powerpc/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Ian Munsie, IBM Corporation. - */ - -#include <stddef.h> -#include <errno.h> -#include <string.h> -#include <dwarf-regs.h> -#include <linux/ptrace.h> -#include <linux/kernel.h> -#include <linux/stringify.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; - unsigned int ptregs_offset; -}; - -#define REG_DWARFNUM_NAME(r, num) \ - {.name = __stringify(%)__stringify(r), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, r)} -#define GPR_DWARFNUM_NAME(num) \ - {.name = __stringify(%gpr##num), .dwarfnum = num, \ - .ptregs_offset = offsetof(struct pt_regs, gpr[num])} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0, .ptregs_offset = 0} - -/* - * Reference: - * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html - */ -static const struct pt_regs_dwarfnum regdwarfnum_table[] = { - GPR_DWARFNUM_NAME(0), - GPR_DWARFNUM_NAME(1), - GPR_DWARFNUM_NAME(2), - GPR_DWARFNUM_NAME(3), - GPR_DWARFNUM_NAME(4), - GPR_DWARFNUM_NAME(5), - GPR_DWARFNUM_NAME(6), - GPR_DWARFNUM_NAME(7), - GPR_DWARFNUM_NAME(8), - GPR_DWARFNUM_NAME(9), - GPR_DWARFNUM_NAME(10), - GPR_DWARFNUM_NAME(11), - GPR_DWARFNUM_NAME(12), - GPR_DWARFNUM_NAME(13), - GPR_DWARFNUM_NAME(14), - GPR_DWARFNUM_NAME(15), - GPR_DWARFNUM_NAME(16), - GPR_DWARFNUM_NAME(17), - GPR_DWARFNUM_NAME(18), - GPR_DWARFNUM_NAME(19), - GPR_DWARFNUM_NAME(20), - GPR_DWARFNUM_NAME(21), - GPR_DWARFNUM_NAME(22), - GPR_DWARFNUM_NAME(23), - GPR_DWARFNUM_NAME(24), - GPR_DWARFNUM_NAME(25), - GPR_DWARFNUM_NAME(26), - GPR_DWARFNUM_NAME(27), - GPR_DWARFNUM_NAME(28), - GPR_DWARFNUM_NAME(29), - GPR_DWARFNUM_NAME(30), - GPR_DWARFNUM_NAME(31), - REG_DWARFNUM_NAME(msr, 66), - REG_DWARFNUM_NAME(ctr, 109), - REG_DWARFNUM_NAME(link, 108), - REG_DWARFNUM_NAME(xer, 101), - REG_DWARFNUM_NAME(dar, 119), - REG_DWARFNUM_NAME(dsisr, 118), - REG_DWARFNUM_END, -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (roff->dwarfnum == n) - return roff->name; - return NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - for (roff = regdwarfnum_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->ptregs_offset; - return -EINVAL; -} - -#define PPC_OP(op) (((op) >> 26) & 0x3F) -#define PPC_RA(a) (((a) >> 16) & 0x1f) -#define PPC_RT(t) (((t) >> 21) & 0x1f) -#define PPC_RB(b) (((b) >> 11) & 0x1f) -#define PPC_D(D) ((D) & 0xfffe) -#define PPC_DS(DS) ((DS) & 0xfffc) -#define OP_LD 58 -#define OP_STD 62 - -static int get_source_reg(u32 raw_insn) -{ - return PPC_RA(raw_insn); -} - -static int get_target_reg(u32 raw_insn) -{ - return PPC_RT(raw_insn); -} - -static int get_offset_opcode(u32 raw_insn) -{ - int opcode = PPC_OP(raw_insn); - - /* DS- form */ - if ((opcode == OP_LD) || (opcode == OP_STD)) - return PPC_DS(raw_insn); - else - return PPC_D(raw_insn); -} - -/* - * Fills the required fields for op_loc depending on if it - * is a source or target. - * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT - * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT - * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT - */ -void get_powerpc_regs(u32 raw_insn, int is_source, - struct annotated_op_loc *op_loc) -{ - if (is_source) - op_loc->reg1 = get_source_reg(raw_insn); - else - op_loc->reg1 = get_target_reg(raw_insn); - - if (op_loc->multi_regs) - op_loc->reg2 = PPC_RB(raw_insn); - - /* TODO: Implement offset handling for X Form */ - if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) - op_loc->offset = get_offset_opcode(raw_insn); -} diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 6b00efd53638..c7df534dbf8f 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -10,9 +10,21 @@ #include "utils_header.h" #include "metricgroup.h" #include <api/fs/fs.h> +#include <sys/auxv.h> + +static bool is_compat_mode(void) +{ + u64 base_platform = getauxval(AT_BASE_PLATFORM); + u64 platform = getauxval(AT_PLATFORM); + + if (!strcmp((char *)platform, (char *)base_platform)) + return false; + + return true; +} int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { unsigned long pvr; int nb; @@ -30,11 +42,29 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *bufp; + unsigned long pvr; + + /* + * IBM Power System supports compatible mode. That is + * Nth generation platform can support previous generation + * OS in a mode called compatibile mode. For ex. LPAR can be + * booted in a Power9 mode when the system is a Power10. + * + * In the compatible mode, care must be taken when generating + * PVR value. When read, PVR will be of the AT_BASE_PLATFORM + * To support generic events, return 0x00ffffff as pvr when + * booted in compat mode. Based on this pvr value, json will + * pick events from pmu-events/arch/powerpc/compat + */ + if (!is_compat_mode()) + pvr = mfspr(SPRN_PVR); + else + pvr = 0x00ffffff; - if (asprintf(&bufp, "0x%.8lx", mfspr(SPRN_PVR)) < 0) + if (asprintf(&bufp, "0x%.8lx", pvr) < 0) bufp = NULL; return bufp; diff --git a/tools/perf/arch/riscv/Makefile b/tools/perf/arch/riscv/Makefile index 90c3c476a242..18ad078000e2 100644 --- a/tools/perf/arch/riscv/Makefile +++ b/tools/perf/arch/riscv/Makefile @@ -1,6 +1,25 @@ -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 +# SPDX-License-Identifier: GPL-2.0 PERF_HAVE_JITDUMP := 1 HAVE_KVM_STAT_SUPPORT := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/riscv/include/generated/asm +header := $(out)/syscalls.c +incpath := $(srctree)/tools +sysdef := $(srctree)/tools/arch/riscv/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/riscv/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +$(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' '$(HOSTCC)' $(incpath) $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, riscv) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..c59f5e852b97 --- /dev/null +++ b/tools/perf/arch/riscv/entry/syscalls/mksyscalltbl @@ -0,0 +1,47 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf. Derived from +# powerpc script. +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> +# Changed by: Ravi Bangoria <ravi.bangoria@linux.vnet.ibm.com> +# Changed by: Kim Phillips <kim.phillips@arm.com> +# Changed by: Björn Töpel <bjorn@rivosinc.com> + +gcc=$1 +hostcc=$2 +incpath=$3 +input=$4 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_sc_table() +{ + local sc nr max_nr + + while read sc nr; do + printf "%s\n" " [$nr] = \"$sc\"," + max_nr=$nr + done + + echo "#define SYSCALLTBL_RISCV_MAX_ID $max_nr" +} + +create_table() +{ + echo "#include \"$input\"" + echo "static const char *const syscalltbl_riscv[] = {" + create_sc_table + echo "};" +} + +$gcc -E -dM -x c -I $incpath/include/uapi $input \ + |awk '$2 ~ "__NR" && $3 !~ "__NR3264_" { + sub("^#define __NR(3264)?_", ""); + print | "sort -k2 -n"}' \ + |create_table diff --git a/tools/perf/arch/riscv/include/dwarf-regs-table.h b/tools/perf/arch/riscv/include/dwarf-regs-table.h new file mode 100644 index 000000000000..a45b63a6d5a8 --- /dev/null +++ b/tools/perf/arch/riscv/include/dwarf-regs-table.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifdef DEFINE_DWARF_REGSTR_TABLE +/* This is included in perf/util/dwarf-regs.c */ + +#define REG_DWARFNUM_NAME(reg, idx) [idx] = "%" #reg + +static const char * const riscv_regstr_tbl[] = { + REG_DWARFNUM_NAME("%zero", 0), + REG_DWARFNUM_NAME("%ra", 1), + REG_DWARFNUM_NAME("%sp", 2), + REG_DWARFNUM_NAME("%gp", 3), + REG_DWARFNUM_NAME("%tp", 4), + REG_DWARFNUM_NAME("%t0", 5), + REG_DWARFNUM_NAME("%t1", 6), + REG_DWARFNUM_NAME("%t2", 7), + REG_DWARFNUM_NAME("%s0", 8), + REG_DWARFNUM_NAME("%s1", 9), + REG_DWARFNUM_NAME("%a0", 10), + REG_DWARFNUM_NAME("%a1", 11), + REG_DWARFNUM_NAME("%a2", 12), + REG_DWARFNUM_NAME("%a3", 13), + REG_DWARFNUM_NAME("%a4", 14), + REG_DWARFNUM_NAME("%a5", 15), + REG_DWARFNUM_NAME("%a6", 16), + REG_DWARFNUM_NAME("%a7", 17), + REG_DWARFNUM_NAME("%s2", 18), + REG_DWARFNUM_NAME("%s3", 19), + REG_DWARFNUM_NAME("%s4", 20), + REG_DWARFNUM_NAME("%s5", 21), + REG_DWARFNUM_NAME("%s6", 22), + REG_DWARFNUM_NAME("%s7", 23), + REG_DWARFNUM_NAME("%s8", 24), + REG_DWARFNUM_NAME("%s9", 25), + REG_DWARFNUM_NAME("%s10", 26), + REG_DWARFNUM_NAME("%s11", 27), + REG_DWARFNUM_NAME("%t3", 28), + REG_DWARFNUM_NAME("%t4", 29), + REG_DWARFNUM_NAME("%t5", 30), + REG_DWARFNUM_NAME("%t6", 31), +}; + +#endif diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index f865cb0489ec..58a672246024 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -2,5 +2,4 @@ perf-util-y += perf_regs.o perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/dwarf-regs.c b/tools/perf/arch/riscv/util/dwarf-regs.c deleted file mode 100644 index cd0504c02e2e..000000000000 --- a/tools/perf/arch/riscv/util/dwarf-regs.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd. - * Mapping of DWARF debug register numbers into register names. - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <dwarf-regs.h> - -struct pt_regs_dwarfnum { - const char *name; - unsigned int dwarfnum; -}; - -#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num} -#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0} - -struct pt_regs_dwarfnum riscv_dwarf_regs_table[] = { - REG_DWARFNUM_NAME("%zero", 0), - REG_DWARFNUM_NAME("%ra", 1), - REG_DWARFNUM_NAME("%sp", 2), - REG_DWARFNUM_NAME("%gp", 3), - REG_DWARFNUM_NAME("%tp", 4), - REG_DWARFNUM_NAME("%t0", 5), - REG_DWARFNUM_NAME("%t1", 6), - REG_DWARFNUM_NAME("%t2", 7), - REG_DWARFNUM_NAME("%s0", 8), - REG_DWARFNUM_NAME("%s1", 9), - REG_DWARFNUM_NAME("%a0", 10), - REG_DWARFNUM_NAME("%a1", 11), - REG_DWARFNUM_NAME("%a2", 12), - REG_DWARFNUM_NAME("%a3", 13), - REG_DWARFNUM_NAME("%a4", 14), - REG_DWARFNUM_NAME("%a5", 15), - REG_DWARFNUM_NAME("%a6", 16), - REG_DWARFNUM_NAME("%a7", 17), - REG_DWARFNUM_NAME("%s2", 18), - REG_DWARFNUM_NAME("%s3", 19), - REG_DWARFNUM_NAME("%s4", 20), - REG_DWARFNUM_NAME("%s5", 21), - REG_DWARFNUM_NAME("%s6", 22), - REG_DWARFNUM_NAME("%s7", 23), - REG_DWARFNUM_NAME("%s8", 24), - REG_DWARFNUM_NAME("%s9", 25), - REG_DWARFNUM_NAME("%s10", 26), - REG_DWARFNUM_NAME("%s11", 27), - REG_DWARFNUM_NAME("%t3", 28), - REG_DWARFNUM_NAME("%t4", 29), - REG_DWARFNUM_NAME("%t5", 30), - REG_DWARFNUM_NAME("%t6", 31), - REG_DWARFNUM_END, -}; - -#define RISCV_MAX_REGS ((sizeof(riscv_dwarf_regs_table) / \ - sizeof(riscv_dwarf_regs_table[0])) - 1) - -const char *get_arch_regstr(unsigned int n) -{ - return (n < RISCV_MAX_REGS) ? riscv_dwarf_regs_table[n].name : NULL; -} - -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_dwarfnum *roff; - - for (roff = riscv_dwarf_regs_table; roff->name; roff++) - if (!strcmp(roff->name, name)) - return roff->dwarfnum; - return -EINVAL; -} diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c index 1b29030021ee..4b839203d4a5 100644 --- a/tools/perf/arch/riscv/util/header.c +++ b/tools/perf/arch/riscv/util/header.c @@ -81,7 +81,7 @@ free: return cpuid; } -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cpuid = _get_cpuid(); int ret = 0; @@ -98,7 +98,7 @@ free: } char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return _get_cpuid(); } diff --git a/tools/perf/arch/riscv64/annotate/instructions.c b/tools/perf/arch/riscv64/annotate/instructions.c index 869a0eb28953..55cf911633f8 100644 --- a/tools/perf/arch/riscv64/annotate/instructions.c +++ b/tools/perf/arch/riscv64/annotate/instructions.c @@ -28,6 +28,8 @@ int riscv64__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->associate_instruction_ops = riscv64__associate_ins_ops; arch->initialized = true; arch->objdump.comment_char = '#'; + arch->e_machine = EM_RISCV; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 56994e63b43a..c431c21b11ef 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 # diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index eeac25cca699..c61193f1e096 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -166,6 +166,8 @@ static int s390__annotate_init(struct arch *arch, char *cpuid __maybe_unused) if (s390__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } + arch->e_machine = EM_S390; + arch->e_flags = 0; } return err; diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build index 1ac830030ff3..736c0ad09194 100644 --- a/tools/perf/arch/s390/util/Build +++ b/tools/perf/arch/s390/util/Build @@ -2,7 +2,6 @@ perf-util-y += header.o perf-util-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o perf-util-y += perf_regs.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-y += machine.o diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c deleted file mode 100644 index dfddb3099bfa..000000000000 --- a/tools/perf/arch/s390/util/dwarf-regs.c +++ /dev/null @@ -1,43 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright IBM Corp. 2010, 2017 - * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com> - * - */ - -#include <errno.h> -#include <stddef.h> -#include <stdlib.h> -#include <linux/kernel.h> -#include <asm/ptrace.h> -#include <string.h> -#include <dwarf-regs.h> -#include "dwarf-regs-table.h" - -const char *get_arch_regstr(unsigned int n) -{ - return (n >= ARRAY_SIZE(s390_dwarf_regs)) ? NULL : s390_dwarf_regs[n]; -} - -/* - * Convert the register name into an offset to struct pt_regs (kernel). - * This is required by the BPF prologue generator. The BPF - * program is called in the BPF overflow handler in the perf - * core. - */ -int regs_query_register_offset(const char *name) -{ - unsigned long gpr; - - if (!name || strncmp(name, "%r", 2)) - return -EINVAL; - - errno = 0; - gpr = strtoul(name + 2, NULL, 10); - if (errno || gpr >= 16) - return -EINVAL; - - return offsetof(user_pt_regs, gprs) + 8 * gpr; -} diff --git a/tools/perf/arch/s390/util/header.c b/tools/perf/arch/s390/util/header.c index 7933f6871c81..db54677a17d2 100644 --- a/tools/perf/arch/s390/util/header.c +++ b/tools/perf/arch/s390/util/header.c @@ -27,7 +27,7 @@ #define SYSINFO "/proc/sysinfo" #define SRVLVL "/proc/service_levels" -int get_cpuid(char *buffer, size_t sz) +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { char *cp, *line = NULL, *line2; char type[8], model[33], version[8], manufacturer[32], authorization[8]; @@ -137,11 +137,11 @@ skip_sysinfo: return (nbytes >= sz) ? ENOBUFS : 0; } -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu) { char *buf = malloc(128); - if (buf && get_cpuid(buf, 128)) + if (buf && get_cpuid(buf, 128, cpu)) zfree(&buf); return buf; } diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sh/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/sh/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sh/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c deleted file mode 100644 index 4b17fc86c73b..000000000000 --- a/tools/perf/arch/sh/util/dwarf-regs.c +++ /dev/null @@ -1,41 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -/* - * Generic dwarf analysis helpers - */ - -#define SH_MAX_REGS 18 -const char *sh_regs_table[SH_MAX_REGS] = { - "r0", - "r1", - "r2", - "r3", - "r4", - "r5", - "r6", - "r7", - "r8", - "r9", - "r10", - "r11", - "r12", - "r13", - "r14", - "r15", - "pc", - "pr", -}; - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SH_MAX_REGS) ? sh_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/sparc/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 4031db72ba71..8b59ce8efb89 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,6 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif - PERF_HAVE_JITDUMP := 1 diff --git a/tools/perf/arch/sparc/annotate/instructions.c b/tools/perf/arch/sparc/annotate/instructions.c index 2614c010c235..68c31580ccfc 100644 --- a/tools/perf/arch/sparc/annotate/instructions.c +++ b/tools/perf/arch/sparc/annotate/instructions.c @@ -163,6 +163,8 @@ static int sparc__annotate_init(struct arch *arch, char *cpuid __maybe_unused) arch->initialized = true; arch->associate_instruction_ops = sparc__associate_instruction_ops; arch->objdump.comment_char = '#'; + arch->e_machine = EM_SPARC; + arch->e_flags = 0; } return 0; diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build deleted file mode 100644 index 32f44fc4ab98..000000000000 --- a/tools/perf/arch/sparc/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c deleted file mode 100644 index 1282cb2dc7bd..000000000000 --- a/tools/perf/arch/sparc/util/dwarf-regs.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (C) 2010 David S. Miller <davem@davemloft.net> - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define SPARC_MAX_REGS 96 - -const char *sparc_regs_table[SPARC_MAX_REGS] = { - "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7", - "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7", - "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7", - "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7", - "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7", - "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15", - "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23", - "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31", - "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39", - "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47", - "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55", - "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63", -}; - -/** - * get_arch_regstr() - lookup register name from it's DWARF register number - * @n: the DWARF register number - * - * get_arch_regstr() returns the name of the register in struct - * regdwarfnum_table from it's DWARF register number. If the register is not - * found in the table, this returns NULL; - */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL; -} diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 67b4969a6738..a6b6e0a9308a 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,9 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif HAVE_KVM_STAT_SUPPORT := 1 -PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 PERF_HAVE_JITDUMP := 1 ### diff --git a/tools/perf/arch/x86/annotate/instructions.c b/tools/perf/arch/x86/annotate/instructions.c index 5caf5a17f03d..ae94b1f0b9cc 100644 --- a/tools/perf/arch/x86/annotate/instructions.c +++ b/tools/perf/arch/x86/annotate/instructions.c @@ -202,12 +202,13 @@ static int x86__annotate_init(struct arch *arch, char *cpuid) if (x86__cpuid_parse(arch, cpuid)) err = SYMBOL_ANNOTATE_ERRNO__ARCH_INIT_CPUID_PARSING; } - + arch->e_machine = EM_X86_64; + arch->e_flags = 0; arch->initialized = true; return err; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static void update_insn_state_x86(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl) diff --git a/tools/perf/arch/x86/tests/intel-cqm.c b/tools/perf/arch/x86/tests/intel-cqm.c deleted file mode 100644 index 360a082fc928..000000000000 --- a/tools/perf/arch/x86/tests/intel-cqm.c +++ /dev/null @@ -1,128 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "tests/tests.h" -#include "cloexec.h" -#include "debug.h" -#include "evlist.h" -#include "evsel.h" -#include "arch-tests.h" -#include <internal/lib.h> // page_size - -#include <signal.h> -#include <sys/mman.h> -#include <sys/wait.h> -#include <errno.h> -#include <string.h> - -static pid_t spawn(void) -{ - pid_t pid; - - pid = fork(); - if (pid) - return pid; - - while(1) - sleep(5); - return 0; -} - -/* - * Create an event group that contains both a sampled hardware - * (cpu-cycles) and software (intel_cqm/llc_occupancy/) event. We then - * wait for the hardware perf counter to overflow and generate a PMI, - * which triggers an event read for both of the events in the group. - * - * Since reading Intel CQM event counters requires sending SMP IPIs, the - * CQM pmu needs to handle the above situation gracefully, and return - * the last read counter value to avoid triggering a WARN_ON_ONCE() in - * smp_call_function_many() caused by sending IPIs from NMI context. - */ -int test__intel_cqm_count_nmi_context(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct evlist *evlist = NULL; - struct evsel *evsel = NULL; - struct perf_event_attr pe; - int i, fd[2], flag, ret; - size_t mmap_len; - void *event; - pid_t pid; - int err = TEST_FAIL; - - flag = perf_event_open_cloexec_flag(); - - evlist = evlist__new(); - if (!evlist) { - pr_debug("evlist__new failed\n"); - return TEST_FAIL; - } - - ret = parse_event(evlist, "intel_cqm/llc_occupancy/"); - if (ret) { - pr_debug("parse_events failed, is \"intel_cqm/llc_occupancy/\" available?\n"); - err = TEST_SKIP; - goto out; - } - - evsel = evlist__first(evlist); - if (!evsel) { - pr_debug("evlist__first failed\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = PERF_TYPE_HARDWARE; - pe.config = PERF_COUNT_HW_CPU_CYCLES; - pe.read_format = PERF_FORMAT_GROUP; - - pe.sample_period = 128; - pe.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; - - pid = spawn(); - - fd[0] = sys_perf_event_open(&pe, pid, -1, -1, flag); - if (fd[0] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - memset(&pe, 0, sizeof(pe)); - pe.size = sizeof(pe); - - pe.type = evsel->attr.type; - pe.config = evsel->attr.config; - - fd[1] = sys_perf_event_open(&pe, pid, -1, fd[0], flag); - if (fd[1] < 0) { - pr_debug("failed to open event\n"); - goto out; - } - - /* - * Pick a power-of-two number of pages + 1 for the meta-data - * page (struct perf_event_mmap_page). See tools/perf/design.txt. - */ - mmap_len = page_size * 65; - - event = mmap(NULL, mmap_len, PROT_READ, MAP_SHARED, fd[0], 0); - if (event == (void *)(-1)) { - pr_debug("failed to mmap %d\n", errno); - goto out; - } - - sleep(1); - - err = TEST_OK; - - munmap(event, mmap_len); - - for (i = 0; i < 2; i++) - close(fd[i]); - - kill(pid, SIGKILL); - wait(NULL); -out: - evlist__delete(evlist); - return err; -} diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 09d61fa736e3..b217ed67cd4e 100644 --- a/tools/perf/arch/x86/tests/intel-pt-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -375,7 +375,7 @@ static int get_pt_caps(int cpu, struct pt_caps *caps) return 0; } -static bool is_hydrid(void) +static bool is_hybrid(void) { unsigned int eax, ebx, ecx, edx = 0; bool result; @@ -441,7 +441,7 @@ int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) int ret = TEST_OK; int cpu; - if (!is_hydrid()) { + if (!is_hybrid()) { test->test_cases[subtest].skip_reason = "not hybrid"; return TEST_SKIP; } diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build index 2607ed5c4296..848327378694 100644 --- a/tools/perf/arch/x86/util/Build +++ b/tools/perf/arch/x86/util/Build @@ -10,10 +10,6 @@ perf-util-y += evlist.o perf-util-y += mem-events.o perf-util-y += evsel.o perf-util-y += iostat.o -perf-util-y += env.o - -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_BPF_PROLOGUE) += dwarf-regs.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c index 354780ff1605..ecbf61a7eb3a 100644 --- a/tools/perf/arch/x86/util/auxtrace.c +++ b/tools/perf/arch/x86/util/auxtrace.c @@ -55,11 +55,12 @@ struct auxtrace_record *auxtrace_record__init(struct evlist *evlist, int *err) { char buffer[64]; + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); int ret; *err = 0; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) { *err = ret; return NULL; diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c deleted file mode 100644 index 399c4a0a29d8..000000000000 --- a/tools/perf/arch/x86/util/dwarf-regs.c +++ /dev/null @@ -1,153 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. - * Extracted from probe-finder.c - * - * Written by Masami Hiramatsu <mhiramat@redhat.com> - */ - -#include <stddef.h> -#include <errno.h> /* for EINVAL */ -#include <string.h> /* for strcmp */ -#include <linux/ptrace.h> /* for struct pt_regs */ -#include <linux/kernel.h> /* for offsetof */ -#include <dwarf-regs.h> - -/* - * See arch/x86/kernel/ptrace.c. - * Different from it: - * - * - Since struct pt_regs is defined differently for user and kernel, - * but we want to use 'ax, bx' instead of 'rax, rbx' (which is struct - * field name of user's pt_regs), we make REG_OFFSET_NAME to accept - * both string name and reg field name. - * - * - Since accessing x86_32's pt_regs from x86_64 building is difficult - * and vise versa, we simply fill offset with -1, so - * get_arch_regstr() still works but regs_query_register_offset() - * returns error. - * The only inconvenience caused by it now is that we are not allowed - * to generate BPF prologue for a x86_64 kernel if perf is built for - * x86_32. This is really a rare usecase. - * - * - Order is different from kernel's ptrace.c for get_arch_regstr(). Use - * the order defined by dwarf. - */ - -struct pt_regs_offset { - const char *name; - int offset; -}; - -#define REG_OFFSET_END {.name = NULL, .offset = 0} - -#ifdef __x86_64__ -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = -1} -#else -# define REG_OFFSET_NAME_64(n, r) {.name = n, .offset = -1} -# define REG_OFFSET_NAME_32(n, r) {.name = n, .offset = offsetof(struct pt_regs, r)} -#endif - -/* TODO: switching by dwarf address size */ -#ifndef __x86_64__ -static const struct pt_regs_offset x86_32_regoffset_table[] = { - REG_OFFSET_NAME_32("%ax", eax), - REG_OFFSET_NAME_32("%cx", ecx), - REG_OFFSET_NAME_32("%dx", edx), - REG_OFFSET_NAME_32("%bx", ebx), - REG_OFFSET_NAME_32("$stack", esp), /* Stack address instead of %sp */ - REG_OFFSET_NAME_32("%bp", ebp), - REG_OFFSET_NAME_32("%si", esi), - REG_OFFSET_NAME_32("%di", edi), - REG_OFFSET_END, -}; - -#define regoffset_table x86_32_regoffset_table -#else -static const struct pt_regs_offset x86_64_regoffset_table[] = { - REG_OFFSET_NAME_64("%ax", rax), - REG_OFFSET_NAME_64("%dx", rdx), - REG_OFFSET_NAME_64("%cx", rcx), - REG_OFFSET_NAME_64("%bx", rbx), - REG_OFFSET_NAME_64("%si", rsi), - REG_OFFSET_NAME_64("%di", rdi), - REG_OFFSET_NAME_64("%bp", rbp), - REG_OFFSET_NAME_64("%sp", rsp), - REG_OFFSET_NAME_64("%r8", r8), - REG_OFFSET_NAME_64("%r9", r9), - REG_OFFSET_NAME_64("%r10", r10), - REG_OFFSET_NAME_64("%r11", r11), - REG_OFFSET_NAME_64("%r12", r12), - REG_OFFSET_NAME_64("%r13", r13), - REG_OFFSET_NAME_64("%r14", r14), - REG_OFFSET_NAME_64("%r15", r15), - REG_OFFSET_END, -}; - -#define regoffset_table x86_64_regoffset_table -#endif - -/* Minus 1 for the ending REG_OFFSET_END */ -#define ARCH_MAX_REGS ((sizeof(regoffset_table) / sizeof(regoffset_table[0])) - 1) - -/* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_arch_regstr(unsigned int n) -{ - return (n < ARCH_MAX_REGS) ? regoffset_table[n].name : NULL; -} - -/* Reuse code from arch/x86/kernel/ptrace.c */ -/** - * regs_query_register_offset() - query register offset from its name - * @name: the name of a register - * - * regs_query_register_offset() returns the offset of a register in struct - * pt_regs from its name. If the name is invalid, this returns -EINVAL; - */ -int regs_query_register_offset(const char *name) -{ - const struct pt_regs_offset *roff; - for (roff = regoffset_table; roff->name != NULL; roff++) - if (!strcmp(roff->name, name)) - return roff->offset; - return -EINVAL; -} - -struct dwarf_regs_idx { - const char *name; - int idx; -}; - -static const struct dwarf_regs_idx x86_regidx_table[] = { - { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, - { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, - { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, - { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, - { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, - { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, - { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, - { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, - { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, - { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, - { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, - { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, - { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, - { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, - { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, - { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, - { "rip", DWARF_REG_PC }, -}; - -int get_arch_regnum(const char *name) -{ - unsigned int i; - - if (*name != '%') - return -EINVAL; - - for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) - if (!strcmp(x86_regidx_table[i].name, name + 1)) - return x86_regidx_table[i].idx; - return -ENOENT; -} diff --git a/tools/perf/arch/x86/util/env.c b/tools/perf/arch/x86/util/env.c deleted file mode 100644 index 3e537ffb1353..000000000000 --- a/tools/perf/arch/x86/util/env.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include "linux/string.h" -#include "util/env.h" -#include "env.h" - -bool x86__is_amd_cpu(void) -{ - struct perf_env env = { .total_mem = 0, }; - static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ - - if (is_amd) - goto ret; - - perf_env__cpuid(&env); - is_amd = env.cpuid && strstarts(env.cpuid, "AuthenticAMD") ? 1 : -1; - perf_env__exit(&env); -ret: - return is_amd >= 1 ? true : false; -} diff --git a/tools/perf/arch/x86/util/env.h b/tools/perf/arch/x86/util/env.h deleted file mode 100644 index d78f080b6b3f..000000000000 --- a/tools/perf/arch/x86/util/env.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _X86_ENV_H -#define _X86_ENV_H - -bool x86__is_amd_cpu(void); - -#endif /* _X86_ENV_H */ diff --git a/tools/perf/arch/x86/util/evlist.c b/tools/perf/arch/x86/util/evlist.c index cebdd483149e..447a734e591c 100644 --- a/tools/perf/arch/x86/util/evlist.c +++ b/tools/perf/arch/x86/util/evlist.c @@ -1,91 +1,86 @@ // SPDX-License-Identifier: GPL-2.0 -#include <stdio.h> -#include "util/pmu.h" -#include "util/pmus.h" -#include "util/evlist.h" -#include "util/parse-events.h" -#include "util/event.h" +#include <string.h> +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" #include "topdown.h" #include "evsel.h" -static int ___evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - LIST_HEAD(head); - size_t i = 0; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - if (perf_pmus__num_core_pmus() == 1) - return evlist__add_attrs(evlist, attrs, nr_attrs); - - for (i = 0; i < nr_attrs; i++) { - struct perf_pmu *pmu = NULL; - - if (attrs[i].type == PERF_TYPE_SOFTWARE) { - struct evsel *evsel = evsel__new(attrs + i); - - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - continue; - } - - while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { - struct perf_cpu_map *cpus; - struct evsel *evsel; - - evsel = evsel__new(attrs + i); - if (evsel == NULL) - goto out_delete_partial_list; - evsel->core.attr.config |= (__u64)pmu->type << PERF_PMU_TYPE_SHIFT; - cpus = perf_cpu_map__get(pmu->cpus); - evsel->core.cpus = cpus; - evsel->core.own_cpus = perf_cpu_map__get(cpus); - evsel->pmu_name = strdup(pmu->name); - list_add_tail(&evsel->core.node, &head); - } - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - { - struct evsel *evsel, *n; - - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - } - return -1; -} - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return ___evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs) { + /* + * Currently the following topdown events sequence are supported to + * move and regroup correctly. + * + * a. all events in a group + * perf stat -e "{instructions,topdown-retiring,slots}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 15,066,240 slots + * 1,899,760 instructions + * 2,126,998 topdown-retiring + * b. all events not in a group + * perf stat -e "instructions,topdown-retiring,slots" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 2,045,561 instructions + * 17,108,370 slots + * 2,281,116 topdown-retiring + * c. slots event in a group but topdown metrics events outside the group + * perf stat -e "{instructions,slots},topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 20,323,878 slots + * 2,634,884 instructions + * 3,028,656 topdown-retiring + * d. slots event and topdown metrics events in two groups + * perf stat -e "{instructions,slots},{topdown-retiring}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 26,319,024 slots + * 2,427,791 instructions + * 2,683,508 topdown-retiring + * + * If slots event and topdown metrics events are not in same group, the + * topdown metrics events must be first event after the slots event group, + * otherwise topdown metrics events can't be regrouped correctly, e.g. + * + * a. perf stat -e "{instructions,slots},cycles,topdown-retiring" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Performance counter stats for 'CPU(s) 0': + * 17,923,134 slots + * 2,154,855 instructions + * 3,015,058 cycles + * <not supported> topdown-retiring + * + * If slots event and topdown metrics events are in two groups, the group which + * has topdown metrics events must contain only the topdown metrics event, + * otherwise topdown metrics event can't be regrouped correctly as well, e.g. + * + * a. perf stat -e "{instructions,slots},{topdown-retiring,cycles}" -C0 sleep 1 + * WARNING: events were regrouped to match PMUs + * Error: + * The sys_perf_event_open() syscall returned with 22 (Invalid argument) for + * event (topdown-retiring) + */ if (topdown_sys_has_perf_metrics() && (arch_evsel__must_be_in_group(lhs) || arch_evsel__must_be_in_group(rhs))) { /* Ensure the topdown slots comes first. */ - if (strcasestr(lhs->name, "slots") && !strcasestr(lhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(lhs)) return -1; - if (strcasestr(rhs->name, "slots") && !strcasestr(rhs->name, "uops_retired.slots")) + if (arch_is_topdown_slots(rhs)) return 1; - /* Followed by topdown events. */ - if (strcasestr(lhs->name, "topdown") && !strcasestr(rhs->name, "topdown")) + + /* + * Move topdown metrics events forward only when topdown metrics + * events are not in same group with previous slots event. If + * topdown metrics events are already in same group with slots + * event, do nothing. + */ + if (arch_is_topdown_metrics(lhs) && !arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return -1; - if (!strcasestr(lhs->name, "topdown") && strcasestr(rhs->name, "topdown")) + if (!arch_is_topdown_metrics(lhs) && arch_is_topdown_metrics(rhs) && + lhs->core.leader != rhs->core.leader) return 1; } diff --git a/tools/perf/arch/x86/util/evsel.c b/tools/perf/arch/x86/util/evsel.c index 090d0f371891..3dd29ba2c23b 100644 --- a/tools/perf/arch/x86/util/evsel.c +++ b/tools/perf/arch/x86/util/evsel.c @@ -6,6 +6,7 @@ #include "util/pmu.h" #include "util/pmus.h" #include "linux/string.h" +#include "topdown.h" #include "evsel.h" #include "util/debug.h" #include "env.h" @@ -21,7 +22,8 @@ void arch_evsel__set_sample_weight(struct evsel *evsel) /* Check whether the evsel's PMU supports the perf metrics */ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) { - const char *pmu_name = evsel->pmu_name ? evsel->pmu_name : "cpu"; + struct perf_pmu *pmu; + u32 type = evsel->core.attr.type; /* * The PERF_TYPE_RAW type is the core PMU type, e.g., "cpu" PMU @@ -31,11 +33,31 @@ bool evsel__sys_has_perf_metrics(const struct evsel *evsel) * Checking both the PERF_TYPE_RAW type and the slots event * should be good enough to detect the perf metrics feature. */ - if ((evsel->core.attr.type == PERF_TYPE_RAW) && - perf_pmus__have_event(pmu_name, "slots")) - return true; +again: + switch (type) { + case PERF_TYPE_HARDWARE: + case PERF_TYPE_HW_CACHE: + type = evsel->core.attr.config >> PERF_PMU_TYPE_SHIFT; + if (type) + goto again; + break; + case PERF_TYPE_RAW: + break; + default: + return false; + } + + pmu = evsel->pmu; + if (pmu && perf_pmu__is_fake(pmu)) + pmu = NULL; - return false; + if (!pmu) { + while ((pmu = perf_pmus__scan_core(pmu)) != NULL) { + if (pmu->type == PERF_TYPE_RAW) + break; + } + } + return pmu && perf_pmu__have_event(pmu, "slots"); } bool arch_evsel__must_be_in_group(const struct evsel *evsel) @@ -44,7 +66,7 @@ bool arch_evsel__must_be_in_group(const struct evsel *evsel) strcasestr(evsel->name, "uops_retired.slots")) return false; - return strcasestr(evsel->name, "topdown") || strcasestr(evsel->name, "slots"); + return arch_is_topdown_metrics(evsel) || arch_is_topdown_slots(evsel); } int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) @@ -63,7 +85,7 @@ int arch_evsel__hw_name(struct evsel *evsel, char *bf, size_t size) return scnprintf(bf, size, "%s", event_name); return scnprintf(bf, size, "%s/%s/", - evsel->pmu_name ? evsel->pmu_name : "cpu", + evsel->pmu ? evsel->pmu->name : "cpu", event_name); } @@ -108,7 +130,7 @@ int arch_evsel__open_strerror(struct evsel *evsel, char *msg, size_t size) return 0; if (!evsel->core.attr.precise_ip && - !(evsel->pmu_name && !strncmp(evsel->pmu_name, "ibs", 3))) + !(evsel->pmu && !strncmp(evsel->pmu->name, "ibs", 3))) return 0; /* More verbose IBS errors. */ diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index a51444a77a5f..412977f8aa83 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -58,13 +58,12 @@ __get_cpuid(char *buffer, size_t sz, const char *fmt) } int -get_cpuid(char *buffer, size_t sz) +get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu __maybe_unused) { return __get_cpuid(buffer, sz, "%s,%u,%u,%u$"); } -char * -get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char *get_cpuid_str(struct perf_cpu cpu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c index ea510a7486b1..8f235d8b67b6 100644 --- a/tools/perf/arch/x86/util/intel-pt.c +++ b/tools/perf/arch/x86/util/intel-pt.c @@ -75,7 +75,8 @@ static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu, goto out_free; attr.config = *config; - err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*err=*/NULL); + err = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/true, /*apply_hardcoded=*/false, + /*err=*/NULL); if (err) goto out_free; diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index df7b5dfcc26a..366b44d0bb7e 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -444,7 +444,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, iostat_value = (count->val - prev_count_val) / ((double) count->run / count->ena); } - out->print_metric(config, out->ctx, NULL, "%8.0f", iostat_metric, + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.0f", iostat_metric, iostat_value / (256 * 1024)); } diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c index c3d89d6ba1bf..e0060dac2a9f 100644 --- a/tools/perf/arch/x86/util/pmu.c +++ b/tools/perf/arch/x86/util/pmu.c @@ -16,7 +16,7 @@ #include "../../../util/fncache.h" #include "../../../util/pmus.h" #include "mem-events.h" -#include "env.h" +#include "util/env.h" void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused) { diff --git a/tools/perf/arch/x86/util/topdown.c b/tools/perf/arch/x86/util/topdown.c index 3f9a267d4501..f63747d0abdf 100644 --- a/tools/perf/arch/x86/util/topdown.c +++ b/tools/perf/arch/x86/util/topdown.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "api/fs/fs.h" #include "util/evsel.h" +#include "util/evlist.h" #include "util/pmu.h" #include "util/pmus.h" #include "util/topdown.h" @@ -32,6 +33,31 @@ bool topdown_sys_has_perf_metrics(void) } #define TOPDOWN_SLOTS 0x0400 +bool arch_is_topdown_slots(const struct evsel *evsel) +{ + if (evsel->core.attr.config == TOPDOWN_SLOTS) + return true; + + return false; +} + +bool arch_is_topdown_metrics(const struct evsel *evsel) +{ + int config = evsel->core.attr.config; + const char *name_from_config; + struct perf_pmu *pmu; + + /* All topdown events have an event code of 0. */ + if ((config & 0xFF) != 0) + return false; + + pmu = evsel__find_pmu(evsel); + if (!pmu || !pmu->is_core) + return false; + + name_from_config = perf_pmu__name_from_config(pmu, config); + return name_from_config && strcasestr(name_from_config, "topdown"); +} /* * Check whether a topdown group supports sample-read. @@ -41,11 +67,24 @@ bool topdown_sys_has_perf_metrics(void) */ bool arch_topdown_sample_read(struct evsel *leader) { + struct evsel *evsel; + if (!evsel__sys_has_perf_metrics(leader)) return false; - if (leader->core.attr.config == TOPDOWN_SLOTS) - return true; + if (!arch_is_topdown_slots(leader)) + return false; + + /* + * If slots event as leader event but no topdown metric events + * in group, slots event should still sample as leader. + */ + evlist__for_each_entry(leader->evlist, evsel) { + if (evsel->core.leader != leader->core.leader) + return false; + if (evsel != leader && arch_is_topdown_metrics(evsel)) + return true; + } return false; } diff --git a/tools/perf/arch/x86/util/topdown.h b/tools/perf/arch/x86/util/topdown.h index 46bf9273e572..1bae9b1822d7 100644 --- a/tools/perf/arch/x86/util/topdown.h +++ b/tools/perf/arch/x86/util/topdown.h @@ -3,5 +3,7 @@ #define _TOPDOWN_H 1 bool topdown_sys_has_perf_metrics(void); +bool arch_is_topdown_slots(const struct evsel *evsel); +bool arch_is_topdown_metrics(const struct evsel *evsel); #endif diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index e2d6cfe21057..3a439e4b12d2 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -24,9 +24,9 @@ u64 rdtsc(void) * ... * will return 3000000000. */ -static double cpuinfo_tsc_freq(void) +static u64 cpuinfo_tsc_freq(void) { - double result = 0; + u64 result = 0; FILE *cpuinfo; char *line = NULL; size_t len = 0; @@ -34,20 +34,22 @@ static double cpuinfo_tsc_freq(void) cpuinfo = fopen("/proc/cpuinfo", "r"); if (!cpuinfo) { pr_err("Failed to read /proc/cpuinfo for TSC frequency\n"); - return NAN; + return 0; } while (getline(&line, &len, cpuinfo) > 0) { if (!strncmp(line, "model name", 10)) { char *pos = strstr(line + 11, " @ "); + double float_result; - if (pos && sscanf(pos, " @ %lfGHz", &result) == 1) { - result *= 1000000000; + if (pos && sscanf(pos, " @ %lfGHz", &float_result) == 1) { + float_result *= 1000000000; + result = (u64)float_result; goto out; } } } out: - if (fpclassify(result) == FP_ZERO) + if (result == 0) pr_err("Failed to find TSC frequency in /proc/cpuinfo\n"); free(line); @@ -55,7 +57,7 @@ out: return result; } -double arch_get_tsc_freq(void) +u64 arch_get_tsc_freq(void) { unsigned int a, b, c, d, lvl; static bool cached; @@ -86,6 +88,6 @@ double arch_get_tsc_freq(void) return tsc; } - tsc = (double)c * (double)b / (double)a; + tsc = (u64)c * (u64)b / (u64)a; return tsc; } diff --git a/tools/perf/arch/xtensa/Build b/tools/perf/arch/xtensa/Build deleted file mode 100644 index e63eabc2c8f4..000000000000 --- a/tools/perf/arch/xtensa/Build +++ /dev/null @@ -1 +0,0 @@ -perf-util-y += util/ diff --git a/tools/perf/arch/xtensa/Makefile b/tools/perf/arch/xtensa/Makefile deleted file mode 100644 index 88c08eed9c7b..000000000000 --- a/tools/perf/arch/xtensa/Makefile +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -ifndef NO_DWARF -PERF_HAVE_DWARF_REGS := 1 -endif diff --git a/tools/perf/arch/xtensa/util/Build b/tools/perf/arch/xtensa/util/Build deleted file mode 100644 index e813e618954b..000000000000 --- a/tools/perf/arch/xtensa/util/Build +++ /dev/null @@ -1 +0,0 @@ -perf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/xtensa/util/dwarf-regs.c b/tools/perf/arch/xtensa/util/dwarf-regs.c deleted file mode 100644 index 12f5457300f5..000000000000 --- a/tools/perf/arch/xtensa/util/dwarf-regs.c +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Mapping of DWARF debug register numbers into register names. - * - * Copyright (c) 2015 Cadence Design Systems Inc. - */ - -#include <stddef.h> -#include <dwarf-regs.h> - -#define XTENSA_MAX_REGS 16 - -const char *xtensa_regs_table[XTENSA_MAX_REGS] = { - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "a8", "a9", "a10", "a11", "a12", "a13", "a14", "a15", -}; - -const char *get_arch_regstr(unsigned int n) -{ - return n < XTENSA_MAX_REGS ? xtensa_regs_table[n] : NULL; -} diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 1fbd7c947abc..19be2aaf4dc0 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -27,6 +27,7 @@ #include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> +#include <sys/stat.h> #include <sys/types.h> #include <linux/kernel.h> #include <linux/time64.h> @@ -35,6 +36,7 @@ #include "../util/header.h" #include "../util/mutex.h" +#include <api/fs/fs.h> #include <numa.h> #include <numaif.h> @@ -533,6 +535,57 @@ static int parse_cpu_list(const char *arg) return 0; } +/* + * Check whether a CPU is online + * + * Returns: + * 1 -> if CPU is online + * 0 -> if CPU is offline + * -1 -> error case + */ +static int is_cpu_online(unsigned int cpu) +{ + char *str; + size_t strlen; + char buf[256]; + int status = -1; + struct stat statbuf; + + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d", cpu); + if (stat(buf, &statbuf) != 0) + return 0; + + /* + * Check if /sys/devices/system/cpu/cpux/online file + * exists. Some cases cpu0 won't have online file since + * it is not expected to be turned off generally. + * In kernels without CONFIG_HOTPLUG_CPU, this + * file won't exist + */ + snprintf(buf, sizeof(buf), + "/sys/devices/system/cpu/cpu%d/online", cpu); + if (stat(buf, &statbuf) != 0) + return 1; + + /* + * Read online file using sysfs__read_str. + * If read or open fails, return -1. + * If read succeeds, return value from file + * which gets stored in "str" + */ + snprintf(buf, sizeof(buf), + "devices/system/cpu/cpu%d/online", cpu); + + if (sysfs__read_str(buf, &str, &strlen) < 0) + return status; + + status = atoi(str); + + free(str); + return status; +} + static int parse_setup_cpu_list(void) { struct thread_data *td; diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 3af6d3c55aba..e2562677df96 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -23,6 +23,7 @@ #include <errno.h> #include <fcntl.h> #include <assert.h> +#include <sys/epoll.h> #include <sys/time.h> #include <sys/types.h> #include <sys/syscall.h> @@ -34,6 +35,8 @@ struct thread_data { int nr; int pipe_read; int pipe_write; + struct epoll_event epoll_ev; + int epoll_fd; bool cgroup_failed; pthread_t pthread; }; @@ -44,6 +47,7 @@ static int loops = LOOPS_DEFAULT; /* Use processes by default: */ static bool threaded; +static bool nonblocking; static char *cgrp_names[2]; static struct cgroup *cgrps[2]; @@ -81,6 +85,7 @@ out: } static const struct option options[] = { + OPT_BOOLEAN('n', "nonblocking", &nonblocking, "Use non-blocking operations"), OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_CALLBACK('G', "cgroups", NULL, "SEND,RECV", @@ -165,11 +170,25 @@ static void exit_cgroup(int nr) free(cgrp_names[nr]); } +static inline int read_pipe(struct thread_data *td) +{ + int ret, m; +retry: + if (nonblocking) { + ret = epoll_wait(td->epoll_fd, &td->epoll_ev, 1, -1); + if (ret < 0) + return ret; + } + ret = read(td->pipe_read, &m, sizeof(int)); + if (nonblocking && ret < 0 && errno == EWOULDBLOCK) + goto retry; + return ret; +} + static void *worker_thread(void *__tdata) { struct thread_data *td = __tdata; - int m = 0, i; - int ret; + int i, ret, m = 0; ret = enter_cgroup(td->nr); if (ret < 0) { @@ -177,16 +196,23 @@ static void *worker_thread(void *__tdata) return NULL; } + if (nonblocking) { + td->epoll_ev.events = EPOLLIN; + td->epoll_fd = epoll_create(1); + BUG_ON(td->epoll_fd < 0); + BUG_ON(epoll_ctl(td->epoll_fd, EPOLL_CTL_ADD, td->pipe_read, &td->epoll_ev) < 0); + } + for (i = 0; i < loops; i++) { if (!td->nr) { - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); } else { ret = write(td->pipe_write, &m, sizeof(int)); BUG_ON(ret != sizeof(int)); - ret = read(td->pipe_read, &m, sizeof(int)); + ret = read_pipe(td); BUG_ON(ret != sizeof(int)); } } @@ -209,13 +235,16 @@ int bench_sched_pipe(int argc, const char **argv) * discarding returned value of read(), write() * causes error in building environment for perf */ - int __maybe_unused ret, wait_stat; + int __maybe_unused ret, wait_stat, flags = 0; pid_t pid, retpid __maybe_unused; argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); - BUG_ON(pipe(pipe_1)); - BUG_ON(pipe(pipe_2)); + if (nonblocking) + flags |= O_NONBLOCK; + + BUG_ON(pipe2(pipe_1, flags)); + BUG_ON(pipe2(pipe_2, flags)); gettimeofday(&start, NULL); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 3dc6197ef3fa..bb87e6e7687d 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -840,7 +840,7 @@ int cmd_annotate(int argc, const char **argv) } #endif -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT if (annotate.data_type) { pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); return -ENOTSUP; diff --git a/tools/perf/builtin-check.c b/tools/perf/builtin-check.c index 0b76b6e42b78..2346536a5ee1 100644 --- a/tools/perf/builtin-check.c +++ b/tools/perf/builtin-check.c @@ -27,15 +27,15 @@ struct feature_status supported_features[] = { FEATURE_STATUS("bpf", HAVE_LIBBPF_SUPPORT), FEATURE_STATUS("bpf_skeletons", HAVE_BPF_SKEL), FEATURE_STATUS("debuginfod", HAVE_DEBUGINFOD_SUPPORT), - FEATURE_STATUS("dwarf", HAVE_DWARF_SUPPORT), - FEATURE_STATUS("dwarf_getlocations", HAVE_DWARF_GETLOCATIONS_SUPPORT), + FEATURE_STATUS("dwarf", HAVE_LIBDW_SUPPORT), + FEATURE_STATUS("dwarf_getlocations", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("dwarf-unwind", HAVE_DWARF_UNWIND_SUPPORT), FEATURE_STATUS("auxtrace", HAVE_AUXTRACE_SUPPORT), FEATURE_STATUS("libaudit", HAVE_LIBAUDIT_SUPPORT), FEATURE_STATUS("libbfd", HAVE_LIBBFD_SUPPORT), FEATURE_STATUS("libcapstone", HAVE_LIBCAPSTONE_SUPPORT), FEATURE_STATUS("libcrypto", HAVE_LIBCRYPTO_SUPPORT), - FEATURE_STATUS("libdw-dwarf-unwind", HAVE_DWARF_SUPPORT), + FEATURE_STATUS("libdw-dwarf-unwind", HAVE_LIBDW_SUPPORT), FEATURE_STATUS("libelf", HAVE_LIBELF_SUPPORT), FEATURE_STATUS("libnuma", HAVE_LIBNUMA_SUPPORT), FEATURE_STATUS("libopencsd", HAVE_CSTRACE_SUPPORT), diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 23326dd20333..82fb7773e03e 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -469,13 +469,13 @@ out: static struct perf_diff pdiff; -static struct evsel *evsel_match(struct evsel *evsel, - struct evlist *evlist) +static struct evsel *evsel_match(struct evsel *evsel, struct evlist *evlist) { struct evsel *e; evlist__for_each_entry(evlist, e) { - if (evsel__match2(evsel, e)) + if ((evsel->core.attr.type == e->core.attr.type) && + (evsel->core.attr.config == e->core.attr.config)) return e; } diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index abcdc49b7a98..272d3c70810e 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -815,7 +815,7 @@ static void display_histogram(int buckets[], bool use_nsec) bar_len = buckets[0] * bar_total / total; printf(" %4d - %-4d %s | %10d | %.*s%*s |\n", - 0, 1, "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); + 0, 1, use_nsec ? "ns" : "us", buckets[0], bar_len, bar, bar_total - bar_len, ""); for (i = 1; i < NUM_BUCKET - 1; i++) { int start = (1 << (i - 1)); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index a756147e2eec..4d8d94146f8d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -36,7 +36,7 @@ #include <regex.h> #include <linux/ctype.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int kmem_slab; static int kmem_page; diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 55ea17c5ff02..274568d712d1 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1226,7 +1226,9 @@ static int cpu_isa_config(struct perf_kvm_stat *kvm) int err; if (kvm->live) { - err = get_cpuid(buf, sizeof(buf)); + struct perf_cpu cpu = {-1}; + + err = get_cpuid(buf, sizeof(buf), cpu); if (err != 0) { pr_err("Failed to look up CPU type: %s\n", str_error_r(err, buf, sizeof(buf))); @@ -2147,6 +2149,7 @@ int cmd_kvm(int argc, const char **argv) "buildid-list", "stat", NULL }; const char *kvm_usage[] = { NULL, NULL }; + exclude_GH_default = true; perf_host = 0; perf_guest = 1; diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index c1daf82c9b92..8234410cba4c 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -23,7 +23,7 @@ #include <subcmd/pager.h> #include <subcmd/parse-options.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <errno.h> #include <inttypes.h> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 65b8cba324be..9e7fdfcdd7ff 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -19,6 +19,7 @@ #include "util/string2.h" #include "util/strlist.h" #include "util/strbuf.h" +#include "util/tool_pmu.h" #include <subcmd/pager.h> #include <subcmd/parse-options.h> #include <linux/zalloc.h> @@ -112,7 +113,7 @@ static void wordwrap(FILE *fp, const char *s, int start, int max, int corr) } } -static void default_print_event(void *ps, const char *pmu_name, const char *topic, +static void default_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit __maybe_unused, bool deprecated, const char *event_type_desc, @@ -353,7 +354,7 @@ static void fix_escape_fprintf(FILE *fp, struct strbuf *buf, const char *fmt, .. fputs(buf->buf, fp); } -static void json_print_event(void *ps, const char *pmu_name, const char *topic, +static void json_print_event(void *ps, const char *topic, const char *pmu_name, const char *event_name, const char *event_alias, const char *scale_unit, bool deprecated, const char *event_type_desc, @@ -614,9 +615,18 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) { + char *old_pmu_glob = default_ps.pmu_glob; + print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); + default_ps.pmu_glob = strdup("tool"); + if (!default_ps.pmu_glob) { + ret = -1; + goto out; + } + perf_pmus__print_pmu_events(&print_cb, ps); + zfree(&default_ps.pmu_glob); + default_ps.pmu_glob = old_pmu_glob; } else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) print_hwcache_events(&print_cb, ps); @@ -664,7 +674,6 @@ int cmd_list(int argc, const char **argv) event_symbols_hw, PERF_COUNT_HW_MAX); print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(&print_cb, ps); print_hwcache_events(&print_cb, ps); perf_pmus__print_pmu_events(&print_cb, ps); print_tracepoint_events(&print_cb, ps); diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 003a3bcebfdf..69800e4d9530 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -229,7 +229,7 @@ static int opt_set_target_ns(const struct option *opt __maybe_unused, /* Command option callbacks */ -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int opt_show_lines(const struct option *opt, const char *str, int unset __maybe_unused) { @@ -505,7 +505,7 @@ out: return ret; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #define PROBEDEF_STR \ "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT [[NAME=]ARG ...]" #else @@ -521,7 +521,7 @@ __cmd_probe(int argc, const char **argv) "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [<options>] --del '[GROUP:]EVENT' ...", "perf probe --list [GROUP:]EVENT ...", -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "perf probe [<options>] --line 'LINEDESC'", "perf probe [<options>] --vars 'PROBEPOINT'", #endif @@ -545,7 +545,7 @@ __cmd_probe(int argc, const char **argv) "\t\tFUNC:\tFunction name\n" "\t\tOFF:\tOffset from function entry (in byte)\n" "\t\t%return:\tPut the probe at function return\n" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT "\t\tSRC:\tSource code path\n" "\t\tRL:\tRelative line number from function entry.\n" "\t\tAL:\tAbsolute line number in file.\n" @@ -612,11 +612,11 @@ __cmd_probe(int argc, const char **argv) set_option_flag(options, 'd', "del", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'D', "definition", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'l', "list", PARSE_OPT_EXCLUSIVE); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); #else -# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_DWARF=1", c) +# define set_nobuild(s, l, c) set_option_nobuild(options, s, l, "NO_LIBDW=1", c) set_nobuild('L', "line", false); set_nobuild('V', "vars", false); set_nobuild('\0', "externs", false); @@ -694,7 +694,7 @@ __cmd_probe(int argc, const char **argv) if (ret < 0) pr_err_with_code(" Error: Failed to show functions.", ret); return ret; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT case 'L': ret = show_line_range(¶ms->line_range, params->target, params->nsi, params->uprobes); diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index adbaf80b398c..f83252472921 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -4157,9 +4157,7 @@ int cmd_record(int argc, const char **argv) record.opts.tail_synthesize = true; if (rec->evlist->core.nr_entries == 0) { - bool can_profile_kernel = perf_event_paranoid_check(1); - - err = parse_event(rec->evlist, can_profile_kernel ? "cycles:P" : "cycles:Pu"); + err = parse_event(rec->evlist, "cycles:P"); if (err) goto out; } diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 5dc17ffee27a..048c91960ba9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -70,7 +70,7 @@ #include <linux/mman.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct report { @@ -455,7 +455,7 @@ static int report__setup_sample_type(struct report *rep) if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) rep->nonany_branch_mode = true; -#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT) +#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_LIBDW_SUPPORT) if (dwarf_callchain_users) { ui__warning("Please install libunwind or libdw " "development packages during the perf build.\n"); @@ -1271,6 +1271,10 @@ static int process_attr(const struct perf_tool *tool __maybe_unused, return 0; } +#define CALLCHAIN_BRANCH_SORT_ORDER \ + "srcline,symbol,dso,callchain_branch_predicted," \ + "callchain_branch_abort,callchain_branch_cycles" + int cmd_report(int argc, const char **argv) { struct perf_session *session; @@ -1639,7 +1643,7 @@ repeat: symbol_conf.use_callchain = true; callchain_register_param(&callchain_param); if (sort_order == NULL) - sort_order = "srcline,symbol,dso"; + sort_order = CALLCHAIN_BRANCH_SORT_ORDER; } if (report.mem_mode) { @@ -1701,7 +1705,7 @@ repeat: report.data_type = true; annotate_opts.annotate_src = false; -#ifndef HAVE_DWARF_GETLOCATIONS_SUPPORT +#ifndef HAVE_LIBDW_SUPPORT pr_err("Error: Data type profiling is disabled due to missing DWARF support\n"); goto error; #endif diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 5981cc51abc8..7049c60ebf77 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -68,7 +68,6 @@ struct task_desc { struct sched_atom **atoms; pthread_t thread; - sem_t sleep_sem; sem_t ready_for_work; sem_t work_done_sem; @@ -80,12 +79,10 @@ enum sched_event_type { SCHED_EVENT_RUN, SCHED_EVENT_SLEEP, SCHED_EVENT_WAKEUP, - SCHED_EVENT_MIGRATION, }; struct sched_atom { enum sched_event_type type; - int specific_wait; u64 timestamp; u64 duration; unsigned long nr; @@ -228,6 +225,7 @@ struct perf_sched { bool show_wakeups; bool show_next; bool show_migrations; + bool pre_migrations; bool show_state; bool show_prio; u64 skipped_samples; @@ -247,7 +245,9 @@ struct thread_runtime { u64 dt_iowait; /* time between CPU access by iowait (off cpu) */ u64 dt_preempt; /* time between CPU access by preempt (off cpu) */ u64 dt_delay; /* time between wakeup and sched-in */ + u64 dt_pre_mig; /* time between migration and wakeup */ u64 ready_to_run; /* time of wakeup */ + u64 migrated; /* time when a thread is migrated */ struct stats run_stats; u64 total_run_time; @@ -255,6 +255,7 @@ struct thread_runtime { u64 total_iowait_time; u64 total_preempt_time; u64 total_delay_time; + u64 total_pre_mig_time; char last_state; @@ -421,14 +422,13 @@ static void add_sched_event_wakeup(struct perf_sched *sched, struct task_desc *t wakee_event->wait_sem = zalloc(sizeof(*wakee_event->wait_sem)); sem_init(wakee_event->wait_sem, 0, 0); - wakee_event->specific_wait = 1; event->wait_sem = wakee_event->wait_sem; sched->nr_wakeup_events++; } static void add_sched_event_sleep(struct perf_sched *sched, struct task_desc *task, - u64 timestamp, const char task_state __maybe_unused) + u64 timestamp) { struct sched_atom *event = get_new_event(task, timestamp); @@ -468,7 +468,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, * every task starts in sleeping state - this gets ignored * if there's no wakeup pointing to this sleep state: */ - add_sched_event_sleep(sched, task, 0, 0); + add_sched_event_sleep(sched, task, 0); sched->pid_to_task[pid] = task; sched->nr_tasks++; @@ -529,8 +529,6 @@ static void perf_sched__process_event(struct perf_sched *sched, ret = sem_post(atom->wait_sem); BUG_ON(ret); break; - case SCHED_EVENT_MIGRATION: - break; default: BUG_ON(1); } @@ -673,7 +671,6 @@ static void create_tasks(struct perf_sched *sched) parms->task = task = sched->tasks[i]; parms->sched = sched; parms->fd = self_open_counters(sched, i); - sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); task->curr_event = 0; @@ -697,7 +694,6 @@ static void destroy_tasks(struct perf_sched *sched) task = sched->tasks[i]; err = pthread_join(task->thread, NULL); BUG_ON(err); - sem_destroy(&task->sleep_sem); sem_destroy(&task->ready_for_work); sem_destroy(&task->work_done_sem); } @@ -751,7 +747,6 @@ static void wait_for_tasks(struct perf_sched *sched) for (i = 0; i < sched->nr_tasks; i++) { task = sched->tasks[i]; - sem_init(&task->sleep_sem, 0, 0); task->curr_event = 0; } } @@ -852,7 +847,6 @@ static int replay_switch_event(struct perf_sched *sched, *next_comm = evsel__strval(evsel, sample, "next_comm"); const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"), next_pid = evsel__intval(evsel, sample, "next_pid"); - const char prev_state = evsel__taskstate(evsel, sample, "prev_state"); struct task_desc *prev, __maybe_unused *next; u64 timestamp0, timestamp = sample->time; int cpu = sample->cpu; @@ -884,7 +878,7 @@ static int replay_switch_event(struct perf_sched *sched, sched->cpu_last_switched[cpu] = timestamp; add_sched_event_run(sched, prev, timestamp, delta); - add_sched_event_sleep(sched, prev, timestamp, prev_state); + add_sched_event_sleep(sched, prev, timestamp); return 0; } @@ -1749,7 +1743,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel, } if (sched->map.comp && new_cpu) - color_fprintf(stdout, color, " (CPU %d)", this_cpu); + color_fprintf(stdout, color, " (CPU %d)", this_cpu.cpu); if (proceed != 1) { color_fprintf(stdout, color, "\n"); @@ -2083,14 +2077,15 @@ static void timehist_header(struct perf_sched *sched) printf(" "); } - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "task name", MAX_PRIO_STR_LEN, "prio", - "wait time", "sch delay", "run time"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "task name", "wait time", "sch delay", "run time"); - } + printf(" %-*s", comm_width, "task name"); + + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, "prio"); + + printf(" %9s %9s %9s", "wait time", "sch delay", "run time"); + + if (sched->pre_migrations) + printf(" %9s", "pre-mig time"); if (sched->show_state) printf(" %s", "state"); @@ -2105,17 +2100,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %*s ", ncpus, ""); - if (sched->show_prio) { - printf(" %-*s %-*s %9s %9s %9s", - comm_width, "[tid/pid]", MAX_PRIO_STR_LEN, "", - "(msec)", "(msec)", "(msec)"); - } else { - printf(" %-*s %9s %9s %9s", comm_width, - "[tid/pid]", "(msec)", "(msec)", "(msec)"); - } + printf(" %-*s", comm_width, "[tid/pid]"); - if (sched->show_state) - printf(" %5s", ""); + if (sched->show_prio) + printf(" %-*s", MAX_PRIO_STR_LEN, ""); + + printf(" %9s %9s %9s", "(msec)", "(msec)", "(msec)"); + + if (sched->pre_migrations) + printf(" %9s", "(msec)"); printf("\n"); @@ -2127,15 +2120,15 @@ static void timehist_header(struct perf_sched *sched) if (sched->show_cpu_visual) printf(" %.*s ", ncpus, graph_dotted_line); - if (sched->show_prio) { - printf(" %.*s %.*s %.9s %.9s %.9s", - comm_width, graph_dotted_line, MAX_PRIO_STR_LEN, graph_dotted_line, - graph_dotted_line, graph_dotted_line, graph_dotted_line); - } else { - printf(" %.*s %.9s %.9s %.9s", comm_width, - graph_dotted_line, graph_dotted_line, graph_dotted_line, - graph_dotted_line); - } + printf(" %.*s", comm_width, graph_dotted_line); + + if (sched->show_prio) + printf(" %.*s", MAX_PRIO_STR_LEN, graph_dotted_line); + + printf(" %.9s %.9s %.9s", graph_dotted_line, graph_dotted_line, graph_dotted_line); + + if (sched->pre_migrations) + printf(" %.9s", graph_dotted_line); if (sched->show_state) printf(" %.5s", graph_dotted_line); @@ -2190,6 +2183,8 @@ static void timehist_print_sample(struct perf_sched *sched, print_sched_time(tr->dt_delay, 6); print_sched_time(tr->dt_run, 6); + if (sched->pre_migrations) + print_sched_time(tr->dt_pre_mig, 6); if (sched->show_state) printf(" %5c ", thread__tid(thread) == 0 ? 'I' : state); @@ -2227,18 +2222,21 @@ out: * last_time = time of last sched change event for current task * (i.e, time process was last scheduled out) * ready_to_run = time of wakeup for current task + * migrated = time of task migration to another CPU * - * -----|------------|------------|------------|------ - * last ready tprev t + * -----|-------------|-------------|-------------|-------------|----- + * last ready migrated tprev t * time to run * - * |-------- dt_wait --------| - * |- dt_delay -|-- dt_run --| + * |---------------- dt_wait ----------------| + * |--------- dt_delay ---------|-- dt_run --| + * |- dt_pre_mig -| * - * dt_run = run time of current task - * dt_wait = time between last schedule out event for task and tprev - * represents time spent off the cpu - * dt_delay = time between wakeup and schedule-in of task + * dt_run = run time of current task + * dt_wait = time between last schedule out event for task and tprev + * represents time spent off the cpu + * dt_delay = time between wakeup and schedule-in of task + * dt_pre_mig = time between wakeup and migration to another CPU */ static void timehist_update_runtime_stats(struct thread_runtime *r, @@ -2249,6 +2247,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->dt_iowait = 0; r->dt_preempt = 0; r->dt_run = 0; + r->dt_pre_mig = 0; if (tprev) { r->dt_run = t - tprev; @@ -2257,6 +2256,9 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, pr_debug("time travel: wakeup time for task > previous sched_switch event\n"); else r->dt_delay = tprev - r->ready_to_run; + + if ((r->migrated > r->ready_to_run) && (r->migrated < tprev)) + r->dt_pre_mig = r->migrated - r->ready_to_run; } if (r->last_time > tprev) @@ -2280,6 +2282,7 @@ static void timehist_update_runtime_stats(struct thread_runtime *r, r->total_sleep_time += r->dt_sleep; r->total_iowait_time += r->dt_iowait; r->total_preempt_time += r->dt_preempt; + r->total_pre_mig_time += r->dt_pre_mig; } static bool is_idle_sample(struct perf_sample *sample, @@ -2693,9 +2696,13 @@ static int timehist_migrate_task_event(const struct perf_tool *tool, return -1; tr->migrations++; + tr->migrated = sample->time; /* show migrations if requested */ - timehist_print_migration_event(sched, evsel, sample, machine, thread); + if (sched->show_migrations) { + timehist_print_migration_event(sched, evsel, sample, + machine, thread); + } return 0; } @@ -2846,11 +2853,13 @@ out: /* last state is used to determine where to account wait time */ tr->last_state = state; - /* sched out event for task so reset ready to run time */ + /* sched out event for task so reset ready to run time and migrated time */ if (state == 'R') tr->ready_to_run = t; else tr->ready_to_run = 0; + + tr->migrated = 0; } evsel__save_time(evsel, sample->time, sample->cpu); @@ -3290,8 +3299,8 @@ static int perf_sched__timehist(struct perf_sched *sched) goto out; } - if (sched->show_migrations && - perf_session__set_tracepoints_handlers(session, migrate_handlers)) + if ((sched->show_migrations || sched->pre_migrations) && + perf_session__set_tracepoints_handlers(session, migrate_handlers)) goto out; /* pre-allocate struct for per-CPU idle stats */ @@ -3833,6 +3842,7 @@ int cmd_sched(int argc, const char **argv) OPT_BOOLEAN(0, "show-prio", &sched.show_prio, "Show task priority"), OPT_STRING(0, "prio", &sched.prio_str, "prio", "analyze events only for given task priority(ies)"), + OPT_BOOLEAN('P', "pre-migrations", &sched.pre_migrations, "Show pre-migration wait time"), OPT_PARENT(sched_options) }; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index a644787fa9e1..9e47905f75a6 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -67,7 +67,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static char const *script_name; @@ -1728,6 +1728,7 @@ static struct { {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_TRACE_END, "tr end"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMENTRY, "vmentry"}, {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_CALL | PERF_IP_FLAG_VMEXIT, "vmexit"}, + {PERF_IP_FLAG_BRANCH | PERF_IP_FLAG_BRANCH_MISS, "br miss"}, {0, NULL} }; @@ -2136,11 +2137,11 @@ struct metric_ctx { }; static void script_print_metric(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color, - const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct metric_ctx *mctx = ctx; + const char *color = metric_threshold_classify__color(thresh); if (!fmt) return; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 689a3d43c258..fdf5172646a5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -46,6 +46,7 @@ #include "util/parse-events.h" #include "util/pmus.h" #include "util/pmu.h" +#include "util/tool_pmu.h" #include "util/event.h" #include "util/evlist.h" #include "util/evsel.h" @@ -294,14 +295,14 @@ static int read_single_counter(struct evsel *counter, int cpu_map_idx, int threa * terminates. Use the wait4 values in that case. */ if (err && cpu_map_idx == 0 && - (evsel__tool_event(counter) == PERF_TOOL_USER_TIME || - evsel__tool_event(counter) == PERF_TOOL_SYSTEM_TIME)) { + (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME || + evsel__tool_event(counter) == TOOL_PMU__EVENT_SYSTEM_TIME)) { u64 val, *start_time; struct perf_counts_values *count = perf_counts(counter->counts, cpu_map_idx, thread); start_time = xyarray__entry(counter->start_times, cpu_map_idx, thread); - if (evsel__tool_event(counter) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(counter) == TOOL_PMU__EVENT_USER_TIME) val = ru_stats.ru_utime_usec_stat.mean; else val = ru_stats.ru_stime_usec_stat.mean; @@ -639,8 +640,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) * (behavior changed with commit b0a873e). */ if (errno == EINVAL || errno == ENOSYS || - errno == ENOENT || errno == EOPNOTSUPP || - errno == ENXIO) { + errno == ENOENT || errno == ENXIO) { if (verbose > 0) ui__warning("%s event is not supported by the kernel.\n", evsel__name(counter)); @@ -658,7 +658,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) if (verbose > 0) ui__warning("%s\n", msg); return COUNTER_RETRY; - } else if (target__has_per_thread(&target) && + } else if (target__has_per_thread(&target) && errno != EOPNOTSUPP && evsel_list->core.threads && evsel_list->core.threads->err_thread != -1) { /* @@ -679,6 +679,19 @@ static enum counter_recovery stat_handle_error(struct evsel *counter) return COUNTER_SKIP; } + if (errno == EOPNOTSUPP) { + if (verbose > 0) { + ui__warning("%s event is not supported by the kernel.\n", + evsel__name(counter)); + } + counter->supported = false; + counter->errored = true; + + if ((evsel__leader(counter) != counter) || + !(counter->core.leader->nr_members > 1)) + return COUNTER_SKIP; + } + evsel__open_strerror(counter, &target, errno, msg, sizeof(msg)); ui__error("%s\n", msg); @@ -716,15 +729,19 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx) } if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) { - if (affinity__setup(&saved_affinity) < 0) - return -1; + if (affinity__setup(&saved_affinity) < 0) { + err = -1; + goto err_out; + } affinity = &saved_affinity; } evlist__for_each_entry(evsel_list, counter) { counter->reset_group = false; - if (bpf_counter__load(counter, &target)) - return -1; + if (bpf_counter__load(counter, &target)) { + err = -1; + goto err_out; + } if (!(evsel__is_bperf(counter))) all_counters_use_bpf = false; } @@ -767,7 +784,8 @@ try_again: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again; case COUNTER_SKIP: @@ -808,7 +826,8 @@ try_again_reset: switch (stat_handle_error(counter)) { case COUNTER_FATAL: - return -1; + err = -1; + goto err_out; case COUNTER_RETRY: goto try_again_reset; case COUNTER_SKIP: @@ -821,6 +840,7 @@ try_again_reset: } } affinity__cleanup(affinity); + affinity = NULL; evlist__for_each_entry(evsel_list, counter) { if (!counter->supported) { @@ -833,8 +853,10 @@ try_again_reset: stat_config.unit_width = l; if (evsel__should_store_id(counter) && - evsel__store_ids(counter, evsel_list)) - return -1; + evsel__store_ids(counter, evsel_list)) { + err = -1; + goto err_out; + } } if (evlist__apply_filters(evsel_list, &counter, &target)) { @@ -855,20 +877,23 @@ try_again_reset: } if (err < 0) - return err; + goto err_out; err = perf_event__synthesize_stat_events(&stat_config, NULL, evsel_list, process_synthesized_event, is_pipe); if (err < 0) - return err; + goto err_out; + } if (target.initial_delay) { pr_info(EVLIST_DISABLED_MSG); } else { err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } } /* Exec the command, if any */ @@ -878,8 +903,10 @@ try_again_reset: if (target.initial_delay > 0) { usleep(target.initial_delay * USEC_PER_MSEC); err = enable_counters(); - if (err) - return -1; + if (err) { + err = -1; + goto err_out; + } pr_info(EVLIST_ENABLED_MSG); } @@ -899,7 +926,8 @@ try_again_reset: if (workload_exec_errno) { const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg)); pr_err("Workload failed: %s\n", emsg); - return -1; + err = -1; + goto err_out; } if (WIFSIGNALED(status)) @@ -946,8 +974,23 @@ try_again_reset: evlist__close(evsel_list); return WEXITSTATUS(status); + +err_out: + if (forks) + evlist__cancel_workload(evsel_list); + + affinity__cleanup(affinity); + return err; } +/* + * Returns -1 for fatal errors which signifies to not continue + * when in repeat mode. + * + * Returns < -1 error codes when stat record is used. These + * result in the stat information being displayed, but writing + * to the file fails and is non fatal. + */ static int run_perf_stat(int argc, const char **argv, int run_idx) { int ret; @@ -1814,130 +1857,25 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) } /* - * Add default attributes, if there were no attributes specified or + * Add default events, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: */ -static int add_default_attributes(void) +static int add_default_events(void) { - struct perf_event_attr default_attrs0[] = { - - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CONTEXT_SWITCHES }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_MIGRATIONS }, - { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_PAGE_FAULTS }, - - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }, -}; - struct perf_event_attr frontend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, -}; - struct perf_event_attr backend_attrs[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, -}; - struct perf_event_attr default_attrs1[] = { - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, - { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_BRANCH_MISSES }, - -}; - -/* - * Detailed stats (-d), covering the L1 and last level data caches: - */ - struct perf_event_attr detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_LL << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; - -/* - * Very detailed stats (-d -d), covering the instruction cache and the TLB caches: - */ - struct perf_event_attr very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1I << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_DTLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_ITLB << 0 | - (PERF_COUNT_HW_CACHE_OP_READ << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, - -}; + const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + struct parse_events_error err; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + int ret = 0; -/* - * Very, very detailed stats (-d -d -d), adding prefetch events: - */ - struct perf_event_attr very_very_detailed_attrs[] = { - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) }, - - { .type = PERF_TYPE_HW_CACHE, - .config = - PERF_COUNT_HW_CACHE_L1D << 0 | - (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | - (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) }, -}; + if (!evlist) + return -ENOMEM; - struct perf_event_attr default_null_attrs[] = {}; - const char *pmu = parse_events_option_args.pmu_filter ?: "all"; + parse_events_error__init(&err); /* Set attrs if no event is selected and !null_run: */ if (stat_config.null_run) - return 0; + goto out; if (transaction_run) { /* Handle -T as -M transaction. Once platform specific metrics @@ -1947,9 +1885,10 @@ static int add_default_attributes(void) */ if (!metricgroup__has_metric(pmu, "transaction")) { pr_err("Missing transaction metrics\n"); - return -1; + ret = -1; + goto out; } - return metricgroup__parse_groups(evsel_list, pmu, "transaction", + ret = metricgroup__parse_groups(evlist, pmu, "transaction", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1957,6 +1896,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (smi_cost) { @@ -1964,26 +1904,29 @@ static int add_default_attributes(void) if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) { pr_err("freeze_on_smi is not supported.\n"); - return -1; + ret = -1; + goto out; } if (!smi) { if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) { - fprintf(stderr, "Failed to set freeze_on_smi.\n"); - return -1; + pr_err("Failed to set freeze_on_smi.\n"); + ret = -1; + goto out; } smi_reset = true; } if (!metricgroup__has_metric(pmu, "smi")) { pr_err("Missing smi metrics\n"); - return -1; + ret = -1; + goto out; } if (!force_metric_only) stat_config.metric_only = true; - return metricgroup__parse_groups(evsel_list, pmu, "smi", + ret = metricgroup__parse_groups(evlist, pmu, "smi", stat_config.metric_no_group, stat_config.metric_no_merge, stat_config.metric_no_threshold, @@ -1991,6 +1934,7 @@ static int add_default_attributes(void) stat_config.system_wide, stat_config.hardware_aware_grouping, &stat_config.metric_events); + goto out; } if (topdown_run) { @@ -2003,21 +1947,23 @@ static int add_default_attributes(void) if (!max_level) { pr_err("Topdown requested but the topdown metric groups aren't present.\n" "(See perf list the metric groups have names like TopdownL1)\n"); - return -1; + ret = -1; + goto out; } if (stat_config.topdown_level > max_level) { pr_err("Invalid top-down metrics level. The max level is %u.\n", max_level); - return -1; - } else if (!stat_config.topdown_level) + ret = -1; + goto out; + } else if (!stat_config.topdown_level) { stat_config.topdown_level = 1; - + } if (!stat_config.interval && !stat_config.metric_only) { fprintf(stat_config.output, "Topdown accuracy may decrease when measuring long periods.\n" "Please print the result regularly, e.g. -I1000\n"); } str[8] = stat_config.topdown_level + '0'; - if (metricgroup__parse_groups(evsel_list, + if (metricgroup__parse_groups(evlist, pmu, str, /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2025,41 +1971,49 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; + } } if (!stat_config.topdown_level) stat_config.topdown_level = 1; - if (!evsel_list->core.nr_entries) { + if (!evlist->core.nr_entries && !evsel_list->core.nr_entries) { /* No events so add defaults. */ if (target__has_cpu(&target)) - default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK; + ret = parse_events(evlist, "cpu-clock", &err); + else + ret = parse_events(evlist, "task-clock", &err); + if (ret) + goto out; + + ret = parse_events(evlist, + "context-switches," + "cpu-migrations," + "page-faults," + "instructions," + "cycles," + "stalled-cycles-frontend," + "stalled-cycles-backend," + "branches," + "branch-misses", + &err); + if (ret) + goto out; - if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0) - return -1; - if (perf_pmus__have_event("cpu", "stalled-cycles-frontend")) { - if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0) - return -1; - } - if (perf_pmus__have_event("cpu", "stalled-cycles-backend")) { - if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0) - return -1; - } - if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0) - return -1; /* * Add TopdownL1 metrics if they exist. To minimize * multiplexing, don't request threshold computation. */ if (metricgroup__has_metric(pmu, "Default")) { struct evlist *metric_evlist = evlist__new(); - struct evsel *metric_evsel; - - if (!metric_evlist) - return -1; + if (!metric_evlist) { + ret = -ENOMEM; + goto out; + } if (metricgroup__parse_groups(metric_evlist, pmu, "Default", /*metric_no_group=*/false, /*metric_no_merge=*/false, @@ -2067,43 +2021,71 @@ static int add_default_attributes(void) stat_config.user_requested_cpu_list, stat_config.system_wide, stat_config.hardware_aware_grouping, - &stat_config.metric_events) < 0) - return -1; - - evlist__for_each_entry(metric_evlist, metric_evsel) { - metric_evsel->skippable = true; - metric_evsel->default_metricgroup = true; + &stat_config.metric_events) < 0) { + ret = -1; + goto out; } - evlist__splice_list_tail(evsel_list, &metric_evlist->core.entries); + + evlist__for_each_entry(metric_evlist, evsel) + evsel->default_metricgroup = true; + + evlist__splice_list_tail(evlist, &metric_evlist->core.entries); evlist__delete(metric_evlist); } - - /* Platform specific attrs */ - if (evlist__add_default_attrs(evsel_list, default_null_attrs) < 0) - return -1; } /* Detailed events get appended to the event list: */ - if (detailed_run < 1) - return 0; - - /* Append detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0) - return -1; - - if (detailed_run < 2) - return 0; - - /* Append very detailed run extra attributes: */ - if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0) - return -1; - - if (detailed_run < 3) - return 0; - - /* Append very, very detailed run extra attributes: */ - return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs); + if (!ret && detailed_run >= 1) { + /* + * Detailed stats (-d), covering the L1 and last level data + * caches: + */ + ret = parse_events(evlist, + "L1-dcache-loads," + "L1-dcache-load-misses," + "LLC-loads," + "LLC-load-misses", + &err); + } + if (!ret && detailed_run >= 2) { + /* + * Very detailed stats (-d -d), covering the instruction cache + * and the TLB caches: + */ + ret = parse_events(evlist, + "L1-icache-loads," + "L1-icache-load-misses," + "dTLB-loads," + "dTLB-load-misses," + "iTLB-loads," + "iTLB-load-misses", + &err); + } + if (!ret && detailed_run >= 3) { + /* + * Very, very detailed stats (-d -d -d), adding prefetch events: + */ + ret = parse_events(evlist, + "L1-dcache-prefetches," + "L1-dcache-prefetch-misses", + &err); + } +out: + if (!ret) { + evlist__for_each_entry(evlist, evsel) { + /* + * Make at least one event non-skippable so fatal errors are visible. + * 'cycles' always used to be default and non-skippable, so use that. + */ + if (strcmp("cycles", evsel__name(evsel))) + evsel->skippable = true; + } + } + parse_events_error__exit(&err); + evlist__splice_list_tail(evsel_list, &evlist->core.entries); + evlist__delete(evlist); + return ret; } static const char * const stat_record_usage[] = { @@ -2591,6 +2573,14 @@ int cmd_stat(int argc, const char **argv) goto out; } + if (stat_config.csv_output || (stat_config.metric_only && stat_config.json_output)) { + /* + * Current CSV and metric-only JSON output doesn't display the + * metric threshold so don't compute it. + */ + stat_config.metric_no_threshold = true; + } + if (stat_config.walltime_run_table && stat_config.run_count <= 1) { fprintf(stderr, "--table is only supported with -r\n"); parse_options_usage(stat_usage, stat_options, "r", 1); @@ -2651,6 +2641,7 @@ int cmd_stat(int argc, const char **argv) } else if (big_num_opt == 0) /* User passed --no-big-num */ stat_config.big_num = false; + target.inherit = !stat_config.no_inherit; err = target__validate(&target); if (err) { target__strerror(&target, err, errbuf, BUFSIZ); @@ -2760,7 +2751,7 @@ int cmd_stat(int argc, const char **argv) } } - if (add_default_attributes()) + if (add_default_events()) goto out; if (stat_config.cgroup_list) { @@ -2879,7 +2870,10 @@ int cmd_stat(int argc, const char **argv) evlist__reset_prev_raw_counts(evsel_list); status = run_perf_stat(argc, argv, run_idx); - if (forever && status != -1 && !interval) { + if (status == -1) + break; + + if (forever && !interval) { print_counters(NULL, argc, argv); perf_stat__reset_stats(); } diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index 218c8b44d7be..068d297aaf44 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -38,7 +38,7 @@ #include "util/tracepoint.h" #include "util/util.h" #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE FILE *open_memstream(char **ptr, size_t *sizeloc); @@ -1158,7 +1158,6 @@ static void draw_io_bars(struct timechart *tchart) } svg_box(Y, c->start_time, c->end_time, "process3"); - sample = c->io_samples; for (sample = c->io_samples; sample; sample = sample->next) { double h = (double)sample->bytes / c->max_bytes; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f6e847529073..6a1a128fe645 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -88,7 +88,7 @@ #include <perf/mmap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #ifndef O_CLOEXEC @@ -1399,7 +1399,7 @@ static const struct syscall_fmt syscall_fmts[] = { .arg = { [2] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, { .name = "waitid", .errpid = true, .arg = { [3] = { .scnprintf = SCA_WAITID_OPTIONS, /* options */ }, }, }, - { .name = "write", .errpid = true, + { .name = "write", .arg = { [1] = { .scnprintf = SCA_BUF /* buf */, .from_user = true, }, }, }, }; @@ -1873,7 +1873,7 @@ static int trace__process_event(struct trace *trace, struct machine *machine, switch (event->header.type) { case PERF_RECORD_LOST: color_fprintf(trace->output, PERF_COLOR_RED, - "LOST %" PRIu64 " events!\n", event->lost.lost); + "LOST %" PRIu64 " events!\n", (u64)event->lost.lost); ret = machine__process_lost_event(machine, event, sample); break; default: @@ -2702,6 +2702,7 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, char msg[1024]; void *args, *augmented_args = NULL; int augmented_args_size; + size_t printed = 0; if (sc == NULL) return -1; @@ -2717,8 +2718,8 @@ static int trace__fprintf_sys_enter(struct trace *trace, struct evsel *evsel, args = perf_evsel__sc_tp_ptr(evsel, args, sample); augmented_args = syscall__augmented_args(sc, sample, &augmented_args_size, trace->raw_augmented_syscalls_args_size); - syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); - fprintf(trace->output, "%s", msg); + printed += syscall__scnprintf_args(sc, msg, sizeof(msg), args, augmented_args, augmented_args_size, trace, thread); + fprintf(trace->output, "%.*s", (int)printed, msg); err = 0; out_put: thread__put(thread); @@ -3087,7 +3088,7 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel, printed += syscall_arg_fmt__scnprintf_val(arg, bf + printed, size - printed, &syscall_arg, val); } - return printed + fprintf(trace->output, "%s", bf); + return printed + fprintf(trace->output, "%.*s", (int)printed, bf); } static int trace__event_handler(struct trace *trace, struct evsel *evsel, @@ -3096,13 +3097,8 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel, { struct thread *thread; int callchain_ret = 0; - /* - * Check if we called perf_evsel__disable(evsel) due to, for instance, - * this event's max_events having been hit and this is an entry coming - * from the ring buffer that we should discard, since the max events - * have already been considered/printed. - */ - if (evsel->disabled) + + if (evsel->nr_events_printed >= evsel->max_events) return 0; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); @@ -4326,6 +4322,9 @@ static int trace__run(struct trace *trace, int argc, const char **argv) sizeof(__u32), BPF_ANY); } } + + if (trace->skel) + trace->filter_pids.map = trace->skel->maps.pids_filtered; #endif err = trace__set_filter_pids(trace); if (err < 0) @@ -5449,6 +5448,10 @@ init_augmented_syscall_tp: if (trace.summary_only) trace.summary = trace.summary_only; + /* Keep exited threads, otherwise information might be lost for summary */ + if (trace.summary) + symbol_conf.keep_exited_threads = true; + if (output_name != NULL) { err = trace__open_output(&trace, output_name); if (err < 0) { diff --git a/tools/perf/check-header_ignore_hunks/lib/list_sort.c b/tools/perf/check-header_ignore_hunks/lib/list_sort.c new file mode 100644 index 000000000000..b7316d29857d --- /dev/null +++ b/tools/perf/check-header_ignore_hunks/lib/list_sort.c @@ -0,0 +1,24 @@ +@@ -50,6 +50,7 @@ + struct list_head *a, struct list_head *b) + { + struct list_head *tail = head; ++ u8 count = 0; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ +@@ -75,6 +76,15 @@ + /* Finish linking remainder of list b on to tail */ + tail->next = b; + do { ++ /* ++ * If the merge is highly unbalanced (e.g. the input is ++ * already sorted), this loop may run many iterations. ++ * Continue callbacks to the client even though no ++ * element comparison is needed, so the client's cmp() ++ * routine can invoke cond_resched() periodically. ++ */ ++ if (unlikely(!++count)) ++ cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 714c78e5da07..a05c1c105c51 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -22,6 +22,7 @@ FILES=( "include/vdso/bits.h" "include/linux/const.h" "include/vdso/const.h" + "include/vdso/unaligned.h" "include/linux/hash.h" "include/linux/list-sort.h" "include/uapi/linux/hw_breakpoint.h" @@ -136,6 +137,30 @@ beauty_check () { check_2 "tools/perf/trace/beauty/$file" "$file" "$@" } +check_ignore_some_hunks () { + orig_file="$1" + tools_file="tools/$orig_file" + hunks_to_ignore="tools/perf/check-header_ignore_hunks/$orig_file" + + if [ ! -f "$hunks_to_ignore" ]; then + echo "$hunks_to_ignore not found. Skipping $orig_file check." + FAILURES+=( + "$tools_file $orig_file" + ) + return + fi + + cmd="diff -u \"$tools_file\" \"$orig_file\" | grep -vf \"$hunks_to_ignore\" | wc -l | grep -qw 0" + + if [ -f "$orig_file" ] && ! eval "$cmd" + then + FAILURES+=( + "$tools_file $orig_file" + ) + fi +} + + # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. @@ -163,13 +188,12 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' -check include/asm-generic/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' +check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' -check lib/list_sort.c '-I "^#include <linux/bug.h>"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl @@ -187,6 +211,10 @@ done check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c +# Files with larger differences + +check_ignore_some_hunks lib/list_sort.c + cd tools/perf || exit if [ ${#FAILURES[@]} -gt 0 ] diff --git a/tools/perf/dlfilters/dlfilter-test-api-v0.c b/tools/perf/dlfilters/dlfilter-test-api-v0.c index 4083b1abeaab..4ca2d7b2ea6c 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v0.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v0.c @@ -220,7 +220,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/dlfilters/dlfilter-test-api-v2.c b/tools/perf/dlfilters/dlfilter-test-api-v2.c index 32ff619e881c..00d73a16c4fd 100644 --- a/tools/perf/dlfilters/dlfilter-test-api-v2.c +++ b/tools/perf/dlfilters/dlfilter-test-api-v2.c @@ -235,7 +235,7 @@ static int check_sample(struct filter_data *d, const struct perf_dlfilter_sample CHECK_SAMPLE(raw_callchain_nr); CHECK(!sample->raw_callchain); -#define EVENT_NAME "branches:" +#define EVENT_NAME "branches" CHECK(!strncmp(sample->event, EVENT_NAME, strlen(EVENT_NAME))); return 0; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 4def800f4089..a2987f2cfe1a 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -542,8 +542,6 @@ int main(int argc, const char **argv) } cmd = argv[0]; - test_attr__init(); - /* * We use PATH to find perf commands, but we prepend some higher * precedence paths: the "--exec-path" option, the PERF_EXEC_PATH diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json new file mode 100644 index 000000000000..74ac12660a29 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/ddrc.json @@ -0,0 +1,9 @@ +[ + { + "BriefDescription": "ddr cycles event", + "EventCode": "0x00", + "EventName": "imx91_ddr.cycles", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json new file mode 100644 index 000000000000..f0c5911eb2d0 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx91/sys/metrics.json @@ -0,0 +1,26 @@ +[ + { + "BriefDescription": "bandwidth usage for lpddr4 evk board", + "MetricName": "imx91_bandwidth_usage.lpddr4", + "MetricExpr": "(((( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8 ) + (( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32 )) / duration_time) / (2400 * 1000000 * 2)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters read from ddr", + "MetricName": "imx91_ddr_read.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_0@ ) * 2 * 8", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + }, + { + "BriefDescription": "bytes all masters write to ddr", + "MetricName": "imx91_ddr_write.all", + "MetricExpr": "( imx9_ddr0@ddrc_pm_3@ + imx9_ddr0@ddrc_pm_5@ + imx9_ddr0@ddrc_pm_7@ + imx9_ddr0@ddrc_pm_9@ - imx9_ddr0@ddrc_pm_2@ - imx9_ddr0@ddrc_pm_4@ - imx9_ddr0@ddrc_pm_6@ - imx9_ddr0@ddrc_pm_8@ ) * 32", + "ScaleUnit": "9.765625e-4KB", + "Unit": "imx9_ddr", + "Compat": "imx91" + } +] diff --git a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json index 126ce980f6f2..45a0d51dfb63 100644 --- a/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/freescale/imx95/sys/metrics.json @@ -8,6 +8,14 @@ "Compat": "imx95" }, { + "BriefDescription": "bandwidth usage for lpddr4x evk board", + "MetricName": "imx95_bandwidth_usage.lpddr4x", + "MetricExpr": "(( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ + imx9_ddr0@eddrtq_pm_wr_beat_filt\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32 / duration_time) / (4000 * 1000000 * 4)", + "ScaleUnit": "1e2%", + "Unit": "imx9_ddr", + "Compat": "imx95" + }, + { "BriefDescription": "bytes of all masters read from ddr", "MetricName": "imx95_ddr_read.all", "MetricExpr": "( imx9_ddr0@eddrtq_pm_rd_beat_filt0\\,axi_mask\\=0x000\\,axi_id\\=0x000@ ) * 32", diff --git a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json index 6463531b9941..b6a0d2de8534 100644 --- a/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json +++ b/tools/perf/pmu-events/arch/arm64/hisilicon/hip08/metrics.json @@ -3,235 +3,235 @@ "MetricExpr": "FETCH_BUBBLE / (4 * CPU_CYCLES)", "PublicDescription": "Frontend bound L1 topdown metric", "BriefDescription": "Frontend bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "frontend_bound" }, { "MetricExpr": "(INST_SPEC - INST_RETIRED) / (4 * CPU_CYCLES)", "PublicDescription": "Bad Speculation L1 topdown metric", "BriefDescription": "Bad Speculation L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "bad_speculation" }, { "MetricExpr": "INST_RETIRED / (CPU_CYCLES * 4)", "PublicDescription": "Retiring L1 topdown metric", "BriefDescription": "Retiring L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "retiring" }, { "MetricExpr": "1 - (frontend_bound + bad_speculation + retiring)", "PublicDescription": "Backend Bound L1 topdown metric", "BriefDescription": "Backend Bound L1 topdown metric", - "DefaultMetricgroupName": "TopDownL1", - "MetricGroup": "Default;TopDownL1", + "DefaultMetricgroupName": "TopdownL1", + "MetricGroup": "Default;TopdownL1", "MetricName": "backend_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201d@ / CPU_CYCLES", "PublicDescription": "Fetch latency bound L2 topdown metric", "BriefDescription": "Fetch latency bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_latency_bound" }, { "MetricExpr": "frontend_bound - fetch_latency_bound", "PublicDescription": "Fetch bandwidth bound L2 topdown metric", "BriefDescription": "Fetch bandwidth bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "fetch_bandwidth_bound" }, { "MetricExpr": "(bad_speculation * BR_MIS_PRED) / (BR_MIS_PRED + armv8_pmuv3_0@event\\=0x2013@)", "PublicDescription": "Branch mispredicts L2 topdown metric", "BriefDescription": "Branch mispredicts L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "branch_mispredicts" }, { "MetricExpr": "bad_speculation - branch_mispredicts", "PublicDescription": "Machine clears L2 topdown metric", "BriefDescription": "Machine clears L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "machine_clears" }, { "MetricExpr": "(EXE_STALL_CYCLE - (MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@)) / CPU_CYCLES", "PublicDescription": "Core bound L2 topdown metric", "BriefDescription": "Core bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "core_bound" }, { "MetricExpr": "(MEM_STALL_ANYLOAD + armv8_pmuv3_0@event\\=0x7005@) / CPU_CYCLES", "PublicDescription": "Memory bound L2 topdown metric", "BriefDescription": "Memory bound L2 topdown metric", - "MetricGroup": "TopDownL2", + "MetricGroup": "TopdownL2", "MetricName": "memory_bound" }, { "MetricExpr": "(((L2I_TLB - L2I_TLB_REFILL) * 15) + (L2I_TLB_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by itlb miss L3 topdown metric", "BriefDescription": "Idle by itlb miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_itlb_miss" }, { "MetricExpr": "(((L2I_CACHE - L2I_CACHE_REFILL) * 15) + (L2I_CACHE_REFILL * 100)) / CPU_CYCLES", "PublicDescription": "Idle by icache miss L3 topdown metric", "BriefDescription": "Idle by icache miss L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "idle_by_icache_miss" }, { "MetricExpr": "(BR_MIS_PRED * 5) / CPU_CYCLES", "PublicDescription": "BP misp flush L3 topdown metric", "BriefDescription": "BP misp flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "bp_misp_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2013@ * 5) / CPU_CYCLES", "PublicDescription": "OOO flush L3 topdown metric", "BriefDescription": "OOO flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ooo_flush" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1001@ * 5) / CPU_CYCLES", "PublicDescription": "Static predictor flush L3 topdown metric", "BriefDescription": "Static predictor flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sp_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x1010@ / BR_MIS_PRED", "PublicDescription": "Indirect branch L3 topdown metric", "BriefDescription": "Indirect branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "indirect_branch" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x1013@ + armv8_pmuv3_0@event\\=0x1016@) / BR_MIS_PRED", "PublicDescription": "Push branch L3 topdown metric", "BriefDescription": "Push branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "push_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x100d@ / BR_MIS_PRED", "PublicDescription": "Pop branch L3 topdown metric", "BriefDescription": "Pop branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pop_branch" }, { "MetricExpr": "(BR_MIS_PRED - armv8_pmuv3_0@event\\=0x1010@ - armv8_pmuv3_0@event\\=0x1013@ - armv8_pmuv3_0@event\\=0x1016@ - armv8_pmuv3_0@event\\=0x100d@) / BR_MIS_PRED", "PublicDescription": "Other branch L3 topdown metric", "BriefDescription": "Other branch L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_branch" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2012@ / armv8_pmuv3_0@event\\=0x2013@", "PublicDescription": "Nuke flush L3 topdown metric", "BriefDescription": "Nuke flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "nuke_flush" }, { "MetricExpr": "1 - nuke_flush", "PublicDescription": "Other flush L3 topdown metric", "BriefDescription": "Other flush L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "other_flush" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2010@ / CPU_CYCLES", "PublicDescription": "Sync stall L3 topdown metric", "BriefDescription": "Sync stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "sync_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2004@ / CPU_CYCLES", "PublicDescription": "Rob stall L3 topdown metric", "BriefDescription": "Rob stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "rob_stall" }, { "MetricExpr": "(armv8_pmuv3_0@event\\=0x2006@ + armv8_pmuv3_0@event\\=0x2007@ + armv8_pmuv3_0@event\\=0x2008@) / CPU_CYCLES", "PublicDescription": "Ptag stall L3 topdown metric", "BriefDescription": "Ptag stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "ptag_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x201e@ / CPU_CYCLES", "PublicDescription": "SaveOpQ stall L3 topdown metric", "BriefDescription": "SaveOpQ stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "saveopq_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x2005@ / CPU_CYCLES", "PublicDescription": "PC buffer stall L3 topdown metric", "BriefDescription": "PC buffer stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "pc_buffer_stall" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7002@ / CPU_CYCLES", "PublicDescription": "Divider L3 topdown metric", "BriefDescription": "Divider L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "divider" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7003@ / CPU_CYCLES", "PublicDescription": "FSU stall L3 topdown metric", "BriefDescription": "FSU stall L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "fsu_stall" }, { "MetricExpr": "core_bound - divider - fsu_stall", "PublicDescription": "EXE ports util L3 topdown metric", "BriefDescription": "EXE ports util L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "exe_ports_util" }, { "MetricExpr": "(MEM_STALL_ANYLOAD - MEM_STALL_L1MISS) / CPU_CYCLES", "PublicDescription": "L1 bound L3 topdown metric", "BriefDescription": "L1 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l1_bound" }, { "MetricExpr": "(MEM_STALL_L1MISS - MEM_STALL_L2MISS) / CPU_CYCLES", "PublicDescription": "L2 bound L3 topdown metric", "BriefDescription": "L2 bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "l2_bound" }, { "MetricExpr": "MEM_STALL_L2MISS / CPU_CYCLES", "PublicDescription": "Mem bound L3 topdown metric", "BriefDescription": "Mem bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "mem_bound" }, { "MetricExpr": "armv8_pmuv3_0@event\\=0x7005@ / CPU_CYCLES", "PublicDescription": "Store bound L3 topdown metric", "BriefDescription": "Store bound L3 topdown metric", - "MetricGroup": "TopDownL3", + "MetricGroup": "TopdownL3", "MetricName": "store_bound" } ] diff --git a/tools/perf/pmu-events/arch/common/common/tool.json b/tools/perf/pmu-events/arch/common/common/tool.json new file mode 100644 index 000000000000..12f2ef1813a6 --- /dev/null +++ b/tools/perf/pmu-events/arch/common/common/tool.json @@ -0,0 +1,74 @@ +[ + { + "Unit": "tool", + "EventName": "duration_time", + "BriefDescription": "Wall clock interval time in nanoseconds", + "ConfigCode": "1" + }, + { + "Unit": "tool", + "EventName": "user_time", + "BriefDescription": "User (non-kernel) time in nanoseconds", + "ConfigCode": "2" + }, + { + "Unit": "tool", + "EventName": "system_time", + "BriefDescription": "System/kernel time in nanoseconds", + "ConfigCode": "3" + }, + { + "Unit": "tool", + "EventName": "has_pmem", + "BriefDescription": "1 if persistent memory installed otherwise 0", + "ConfigCode": "4" + }, + { + "Unit": "tool", + "EventName": "num_cores", + "BriefDescription": "Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU", + "ConfigCode": "5" + }, + { + "Unit": "tool", + "EventName": "num_cpus", + "BriefDescription": "Number of logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "6" + }, + { + "Unit": "tool", + "EventName": "num_cpus_online", + "BriefDescription": "Number of online logical Linux CPUs. There may be multiple such CPUs on a core", + "ConfigCode": "7" + }, + { + "Unit": "tool", + "EventName": "num_dies", + "BriefDescription": "Number of dies. Each die has 1 or more cores", + "ConfigCode": "8" + }, + { + "Unit": "tool", + "EventName": "num_packages", + "BriefDescription": "Number of packages. Each package has 1 or more die", + "ConfigCode": "9" + }, + { + "Unit": "tool", + "EventName": "slots", + "BriefDescription": "Number of functional units that in parallel can execute parts of an instruction", + "ConfigCode": "10" + }, + { + "Unit": "tool", + "EventName": "smt_on", + "BriefDescription": "1 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0", + "ConfigCode": "11" + }, + { + "Unit": "tool", + "EventName": "system_tsc_freq", + "BriefDescription": "The amount a Time Stamp Counter (TSC) increases per second", + "ConfigCode": "12" + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json new file mode 100644 index 000000000000..6f5e8efcb098 --- /dev/null +++ b/tools/perf/pmu-events/arch/powerpc/compat/generic-events.json @@ -0,0 +1,117 @@ +[ + { + "EventCode": "0x600F4", + "EventName": "PM_CYC", + "BriefDescription": "Processor cycles." + }, + { + "EventCode": "0x100F2", + "EventName": "PM_CYC_INST_CMPL", + "BriefDescription": "1 or more ppc insts finished" + }, + { + "EventCode": "0x100f4", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operations Finished." + }, + { + "EventCode": "0x100F6", + "EventName": "PM_L1_ITLB_MISS", + "BriefDescription": "Number of I-ERAT reloads." + }, + { + "EventCode": "0x100F8", + "EventName": "PM_NO_INST_AVAIL", + "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread." + }, + { + "EventCode": "0x100fc", + "EventName": "PM_LD_CMPL", + "BriefDescription": "Load instruction completed." + }, + { + "EventCode": "0x200F0", + "EventName": "PM_ST_CMPL", + "BriefDescription": "Stores completed from S2Q (2nd-level store queue)." + }, + { + "EventCode": "0x200F2", + "EventName": "PM_INST_DISP", + "BriefDescription": "PowerPC instruction dispatched." + }, + { + "EventCode": "0x200F4", + "EventName": "PM_RUN_CYC", + "BriefDescription": "Processor cycles gated by the run latch." + }, + { + "EventCode": "0x200F6", + "EventName": "PM_L1_DTLB_RELOAD", + "BriefDescription": "DERAT Reloaded due to a DERAT miss." + }, + { + "EventCode": "0x200FA", + "EventName": "PM_BR_TAKEN_CMPL", + "BriefDescription": "Branch Taken instruction completed." + }, + { + "EventCode": "0x200FC", + "EventName": "PM_L1_ICACHE_MISS", + "BriefDescription": "Demand instruction cache miss." + }, + { + "EventCode": "0x200FE", + "EventName": "PM_L1_RELOAD_FROM_MEM", + "BriefDescription": "L1 Dcache reload from memory" + }, + { + "EventCode": "0x300F0", + "EventName": "PM_ST_MISS_L1", + "BriefDescription": "Store Missed L1" + }, + { + "EventCode": "0x300FC", + "EventName": "PM_DTLB_MISS", + "BriefDescription": "Data PTEG reload" + }, + { + "EventCode": "0x300FE", + "EventName": "PM_DATA_FROM_L3MISS", + "BriefDescription": "Demand LD - L3 Miss (not L2 hit and not L3 hit)" + }, + { + "EventCode": "0x400F0", + "EventName": "PM_LD_MISS_L1", + "BriefDescription": "L1 Dcache load miss" + }, + { + "EventCode": "0x400F2", + "EventName": "PM_CYC_INST_DISP", + "BriefDescription": "Cycle when instruction(s) dispatched." + }, + { + "EventCode": "0x400F6", + "EventName": "PM_BR_MPRED_CMPL", + "BriefDescription": "A mispredicted branch completed. Includes direction and target." + }, + { + "EventCode": "0x400FA", + "EventName": "PM_RUN_INST_CMPL", + "BriefDescription": "PowerPC instruction completed while the run latch is set." + }, + { + "EventCode": "0x400FC", + "EventName": "PM_ITLB_MISS", + "BriefDescription": "Instruction TLB reload (after a miss), all page sizes. Includes only demand misses." + }, + { + "EventCode": "0x400fe", + "EventName": "PM_LD_NOT_CACHED", + "BriefDescription": "Load data not cached." + }, + { + "EventCode": "0x500fa", + "EventName": "PM_INST_CMPL", + "BriefDescription": "Instructions." + } +] diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index 4d5e9138d4cc..cbd3cb443784 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -16,3 +16,4 @@ 0x004e[[:xdigit:]]{4},1,power9,core 0x0080[[:xdigit:]]{4},1,power10,core 0x0082[[:xdigit:]]{4},1,power10,core +0x00ffffff,1,compat,core diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json new file mode 100644 index 000000000000..fa06569d881d --- /dev/null +++ b/tools/perf/pmu-events/arch/x86/amdzen5/data-fabric.json @@ -0,0 +1,1634 @@ +[ + { + "EventName": "local_or_remote_socket_read_data_beats_dram_0", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_1", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_2", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_3", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_4", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_5", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_6", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_7", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_8", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_9", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_10", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_read_data_beats_dram_11", + "PublicDescription": "Read data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_0", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 0.", + "EventCode": "0x1f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_1", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 1.", + "EventCode": "0x5f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_2", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 2.", + "EventCode": "0x9f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_3", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 3.", + "EventCode": "0xdf", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_4", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 4.", + "EventCode": "0x11f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_5", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 5.", + "EventCode": "0x15f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_6", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 6.", + "EventCode": "0x19f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_7", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 7.", + "EventCode": "0x1df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_8", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 8.", + "EventCode": "0x21f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_9", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 9.", + "EventCode": "0x25f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_10", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 10.", + "EventCode": "0x29f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_write_data_beats_dram_11", + "PublicDescription": "Write data beats (64 bytes) for transactions between local or remote socket and DRAM Channel 11.", + "EventCode": "0x2df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_0", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_1", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_2", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_3", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_4", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_5", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_6", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_read_data_beats_io_7", + "PublicDescription": "Upstream DMA read data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_0", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 0.", + "EventCode": "0x81f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_1", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 1.", + "EventCode": "0x85f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_2", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 2.", + "EventCode": "0x89f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_3", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 3.", + "EventCode": "0x8df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_4", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 4.", + "EventCode": "0x91f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_5", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 5.", + "EventCode": "0x95f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_6", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 6.", + "EventCode": "0x99f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_upstream_write_data_beats_io_7", + "PublicDescription": "Upstream DMA write data beats (64 bytes) for transactions between local or remote socket and IO Root Complex 7.", + "EventCode": "0x9df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7fe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0x7ff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbfe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xbff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_0", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_1", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_2", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_3", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_4", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_5", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_6", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_7", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_8", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_9", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_10", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_11", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_12", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_13", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_14", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_inbound_data_beats_cfi_15", + "PublicDescription": "Inbound data beats (32 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xffe", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 0.", + "EventCode": "0x41e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 1.", + "EventCode": "0x45e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 2.", + "EventCode": "0x49e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 3.", + "EventCode": "0x4de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 4.", + "EventCode": "0x51e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 5.", + "EventCode": "0x55e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_6", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 6.", + "EventCode": "0x59e", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_7", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 7.", + "EventCode": "0x5de", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_8", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 8.", + "EventCode": "0x41f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_9", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 9.", + "EventCode": "0x45f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_10", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 10.", + "EventCode": "0x49f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_11", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 11.", + "EventCode": "0x4df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_12", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 12.", + "EventCode": "0x51f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_13", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 13.", + "EventCode": "0x55f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_14", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 14.", + "EventCode": "0x59f", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_or_remote_socket_outbound_data_beats_cfi_15", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local or remote socket and Core-to-Fabric Interface 15.", + "EventCode": "0x5df", + "UMask": "0xfff", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_0", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_1", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_2", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_3", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_4", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_inbound_data_beats_link_5", + "PublicDescription": "Inbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3f", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_0", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 0.", + "EventCode": "0xd5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_1", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 1.", + "EventCode": "0xd9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_2", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 2.", + "EventCode": "0xddf", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_3", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 3.", + "EventCode": "0xe1f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_4", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 4.", + "EventCode": "0xe5f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + }, + { + "EventName": "local_socket_outbound_data_beats_link_5", + "PublicDescription": "Outbound data beats (64 bytes) for transactions between local socket and remote socket over Cross-socket Link 5.", + "EventCode": "0xe9f", + "UMask": "0xf3e", + "PerPkg": "1", + "Unit": "DFPMC" + } +] diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json index af2fdf1f55d6..ff6627a77805 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/load-store.json @@ -97,6 +97,12 @@ "UMask": "0x02" }, { + "EventName": "ls_dmnd_fills_from_sys.local_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_dmnd_fills_from_sys.near_cache", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -115,12 +121,30 @@ "UMask": "0x10" }, { + "EventName": "ls_dmnd_fills_from_sys.remote_cache", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_dmnd_fills_from_sys.dram_io_far", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_dmnd_fills_from_sys.dram_io_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_dmnd_fills_from_sys.far_all", + "EventCode": "0x43", + "BriefDescription": "Demand data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_dmnd_fills_from_sys.alternate_memories", "EventCode": "0x43", "BriefDescription": "Demand data cache fills from extension memory.", @@ -193,12 +217,6 @@ "UMask": "0x50" }, { - "EventName": "ls_any_fills_from_sys.all_dram_io", - "EventCode": "0x44", - "BriefDescription": "Any data cache fills from either DRAM or MMIO in any NUMA node (same or different socket).", - "UMask": "0x48" - }, - { "EventName": "ls_any_fills_from_sys.alternate_memories", "EventCode": "0x44", "BriefDescription": "Any data cache fills from extension memory.", @@ -343,6 +361,12 @@ "UMask": "0x02" }, { + "EventName": "ls_sw_pf_dc_fills.local_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_sw_pf_dc_fills.near_cache", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from cache of another CCX in the same NUMA node.", @@ -361,12 +385,30 @@ "UMask": "0x10" }, { + "EventName": "ls_sw_pf_dc_fills.remote_cache", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_sw_pf_dc_fills.dram_io_far", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_sw_pf_dc_fills.dram_io_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_sw_pf_dc_fills.far_all", + "EventCode": "0x59", + "BriefDescription": "Software prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_sw_pf_dc_fills.alternate_memories", "EventCode": "0x59", "BriefDescription": "Software prefetch data cache fills from extension memory.", @@ -391,6 +433,12 @@ "UMask": "0x02" }, { + "EventName": "ls_hw_pf_dc_fills.local_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from local L2 cache, L3 cache or different L2 cache in the same CCX.", + "UMask": "0x03" + }, + { "EventName": "ls_hw_pf_dc_fills.near_cache", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same NUMA node.", @@ -409,12 +457,30 @@ "UMask": "0x10" }, { + "EventName": "ls_hw_pf_dc_fills.remote_cache", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from cache of another CCX when the address was in the same or a different NUMA node.", + "UMask": "0x14" + }, + { "EventName": "ls_hw_pf_dc_fills.dram_io_far", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in a different NUMA node (same or different socket).", "UMask": "0x40" }, { + "EventName": "ls_hw_pf_dc_fills.dram_io_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either DRAM or MMIO in the same or a different NUMA node (same or different socket).", + "UMask": "0x48" + }, + { + "EventName": "ls_hw_pf_dc_fills.far_all", + "EventCode": "0x5a", + "BriefDescription": "Hardware prefetch data cache fills from either cache of another CCX, DRAM or MMIO when the address was in a different NUMA node (same or different socket).", + "UMask": "0x50" + }, + { "EventName": "ls_hw_pf_dc_fills.alternate_memories", "EventCode": "0x5a", "BriefDescription": "Hardware prefetch data cache fills from extension memory.", diff --git a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json index c97874039c1e..635d57e3bc15 100644 --- a/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json +++ b/tools/perf/pmu-events/arch/x86/amdzen5/recommended.json @@ -341,5 +341,117 @@ "MetricGroup": "memory_controller", "PerPkg": "1", "ScaleUnit": "1per_memclk" + }, + { + "MetricName": "dram_read_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM read data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_read_data_beats_dram_0 + local_or_remote_socket_read_data_beats_dram_1 + local_or_remote_socket_read_data_beats_dram_2 + local_or_remote_socket_read_data_beats_dram_3 + local_or_remote_socket_read_data_beats_dram_4 + local_or_remote_socket_read_data_beats_dram_5 + local_or_remote_socket_read_data_beats_dram_6 + local_or_remote_socket_read_data_beats_dram_7 + local_or_remote_socket_read_data_beats_dram_8 + local_or_remote_socket_read_data_beats_dram_9 + local_or_remote_socket_read_data_beats_dram_10 + local_or_remote_socket_read_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_write_data_beats_dram_0 + local_socket_write_data_beats_dram_1 + local_socket_write_data_beats_dram_2 + local_socket_write_data_beats_dram_3 + local_socket_write_data_beats_dram_4 + local_socket_write_data_beats_dram_5 + local_socket_write_data_beats_dram_6 + local_socket_write_data_beats_dram_7 + local_socket_write_data_beats_dram_8 + local_socket_write_data_beats_dram_9 + local_socket_write_data_beats_dram_10 + local_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_write_data_beats_dram_0 + remote_socket_write_data_beats_dram_1 + remote_socket_write_data_beats_dram_2 + remote_socket_write_data_beats_dram_3 + remote_socket_write_data_beats_dram_4 + remote_socket_write_data_beats_dram_5 + remote_socket_write_data_beats_dram_6 + remote_socket_write_data_beats_dram_7 + remote_socket_write_data_beats_dram_8 + remote_socket_write_data_beats_dram_9 + remote_socket_write_data_beats_dram_10 + remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "dram_write_bandwidth_for_local_or_remote_socket", + "BriefDescription": "DRAM write data bandwidth for accesses in local or remote socket.", + "MetricExpr": "(local_or_remote_socket_write_data_beats_dram_0 + local_or_remote_socket_write_data_beats_dram_1 + local_or_remote_socket_write_data_beats_dram_2 + local_or_remote_socket_write_data_beats_dram_3 + local_or_remote_socket_write_data_beats_dram_4 + local_or_remote_socket_write_data_beats_dram_5 + local_or_remote_socket_write_data_beats_dram_6 + local_or_remote_socket_write_data_beats_dram_7 + local_or_remote_socket_write_data_beats_dram_8 + local_or_remote_socket_write_data_beats_dram_9 + local_or_remote_socket_write_data_beats_dram_10 + local_or_remote_socket_write_data_beats_dram_11) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_read_data_beats_io_0 + local_socket_upstream_read_data_beats_io_1 + local_socket_upstream_read_data_beats_io_2 + local_socket_upstream_read_data_beats_io_3 + local_socket_upstream_read_data_beats_io_4 + local_socket_upstream_read_data_beats_io_5 + local_socket_upstream_read_data_beats_io_6 + local_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_local_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_upstream_write_data_beats_io_0 + local_socket_upstream_write_data_beats_io_1 + local_socket_upstream_write_data_beats_io_2 + local_socket_upstream_write_data_beats_io_3 + local_socket_upstream_write_data_beats_io_4 + local_socket_upstream_write_data_beats_io_5 + local_socket_upstream_write_data_beats_io_6 + local_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_read_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA read data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_read_data_beats_io_0 + remote_socket_upstream_read_data_beats_io_1 + remote_socket_upstream_read_data_beats_io_2 + remote_socket_upstream_read_data_beats_io_3 + remote_socket_upstream_read_data_beats_io_4 + remote_socket_upstream_read_data_beats_io_5 + remote_socket_upstream_read_data_beats_io_6 + remote_socket_upstream_read_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "upstream_dma_write_bandwidth_for_remote_socket", + "BriefDescription": "Upstream DMA write data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_upstream_write_data_beats_io_0 + remote_socket_upstream_write_data_beats_io_1 + remote_socket_upstream_write_data_beats_io_2 + remote_socket_upstream_write_data_beats_io_3 + remote_socket_upstream_write_data_beats_io_4 + remote_socket_upstream_write_data_beats_io_5 + remote_socket_upstream_write_data_beats_io_6 + remote_socket_upstream_write_data_beats_io_7) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_inbound_data_beats_cfi_0 + local_socket_inbound_data_beats_cfi_1 + local_socket_inbound_data_beats_cfi_2 + local_socket_inbound_data_beats_cfi_3 + local_socket_inbound_data_beats_cfi_4 + local_socket_inbound_data_beats_cfi_5 + local_socket_inbound_data_beats_cfi_6 + local_socket_inbound_data_beats_cfi_7 + local_socket_inbound_data_beats_cfi_8 + local_socket_inbound_data_beats_cfi_9 + local_socket_inbound_data_beats_cfi_10 + local_socket_inbound_data_beats_cfi_11 + local_socket_inbound_data_beats_cfi_12 + local_socket_inbound_data_beats_cfi_13 + local_socket_inbound_data_beats_cfi_14 + local_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in local socket.", + "MetricExpr": "(local_socket_outbound_data_beats_cfi_0 + local_socket_outbound_data_beats_cfi_1 + local_socket_outbound_data_beats_cfi_2 + local_socket_outbound_data_beats_cfi_3 + local_socket_outbound_data_beats_cfi_4 + local_socket_outbound_data_beats_cfi_5 + local_socket_outbound_data_beats_cfi_6 + local_socket_outbound_data_beats_cfi_7 + local_socket_outbound_data_beats_cfi_8 + local_socket_outbound_data_beats_cfi_9 + local_socket_outbound_data_beats_cfi_10 + local_socket_outbound_data_beats_cfi_11 + local_socket_outbound_data_beats_cfi_12 + local_socket_outbound_data_beats_cfi_13 + local_socket_outbound_data_beats_cfi_14 + local_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "core_inbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core inbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_inbound_data_beats_cfi_0 + remote_socket_inbound_data_beats_cfi_1 + remote_socket_inbound_data_beats_cfi_2 + remote_socket_inbound_data_beats_cfi_3 + remote_socket_inbound_data_beats_cfi_4 + remote_socket_inbound_data_beats_cfi_5 + remote_socket_inbound_data_beats_cfi_6 + remote_socket_inbound_data_beats_cfi_7 + remote_socket_inbound_data_beats_cfi_8 + remote_socket_inbound_data_beats_cfi_9 + remote_socket_inbound_data_beats_cfi_10 + remote_socket_inbound_data_beats_cfi_11 + remote_socket_inbound_data_beats_cfi_12 + remote_socket_inbound_data_beats_cfi_13 + remote_socket_inbound_data_beats_cfi_14 + remote_socket_inbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "3.2e-5MB/s" + }, + { + "MetricName": "core_outbound_data_bandwidth_for_remote_socket", + "BriefDescription": "Core outbound data bandwidth for accesses in remote socket.", + "MetricExpr": "(remote_socket_outbound_data_beats_cfi_0 + remote_socket_outbound_data_beats_cfi_1 + remote_socket_outbound_data_beats_cfi_2 + remote_socket_outbound_data_beats_cfi_3 + remote_socket_outbound_data_beats_cfi_4 + remote_socket_outbound_data_beats_cfi_5 + remote_socket_outbound_data_beats_cfi_6 + remote_socket_outbound_data_beats_cfi_7 + remote_socket_outbound_data_beats_cfi_8 + remote_socket_outbound_data_beats_cfi_9 + remote_socket_outbound_data_beats_cfi_10 + remote_socket_outbound_data_beats_cfi_11 + remote_socket_outbound_data_beats_cfi_12 + remote_socket_outbound_data_beats_cfi_13 + remote_socket_outbound_data_beats_cfi_14 + remote_socket_outbound_data_beats_cfi_15) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_inbound_data_bandwidth_for_local_socket", + "BriefDescription": "Inbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_inbound_data_beats_link_0 + local_socket_inbound_data_beats_link_1 + local_socket_inbound_data_beats_link_2 + local_socket_inbound_data_beats_link_3 + local_socket_inbound_data_beats_link_4 + local_socket_inbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" + }, + { + "MetricName": "cross_socket_outbound_data_bandwidth_for_local_socket", + "BriefDescription": "Outbound data bandwidth for accesses between local socket and remote socket.", + "MetricExpr": "(local_socket_outbound_data_beats_link_0 + local_socket_outbound_data_beats_link_1 + local_socket_outbound_data_beats_link_2 + local_socket_outbound_data_beats_link_3 + local_socket_outbound_data_beats_link_4 + local_socket_outbound_data_beats_link_5) / duration_time", + "MetricGroup": "data_fabric", + "PerPkg": "1", + "ScaleUnit": "6.4e-5MB/s" } ] diff --git a/tools/perf/pmu-events/empty-pmu-events.c b/tools/perf/pmu-events/empty-pmu-events.c index c592079982fb..1c7a2cfa321f 100644 --- a/tools/perf/pmu-events/empty-pmu-events.c +++ b/tools/perf/pmu-events/empty-pmu-events.c @@ -19,72 +19,109 @@ struct pmu_table_entry { }; static const char *const big_c_string = -/* offset=0 */ "default_core\000" -/* offset=13 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" -/* offset=72 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" -/* offset=131 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" -/* offset=226 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" -/* offset=325 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" -/* offset=455 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" -/* offset=570 */ "hisi_sccl,ddrc\000" -/* offset=585 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" -/* offset=671 */ "uncore_cbox\000" -/* offset=683 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" -/* offset=914 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" -/* offset=979 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" -/* offset=1050 */ "hisi_sccl,l3c\000" -/* offset=1064 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" -/* offset=1144 */ "uncore_imc_free_running\000" -/* offset=1168 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" -/* offset=1263 */ "uncore_imc\000" -/* offset=1274 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" -/* offset=1352 */ "uncore_sys_ddr_pmu\000" -/* offset=1371 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" -/* offset=1444 */ "uncore_sys_ccn_pmu\000" -/* offset=1463 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" -/* offset=1537 */ "uncore_sys_cmn_pmu\000" -/* offset=1556 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" -/* offset=1696 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" -/* offset=1718 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" -/* offset=1781 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" -/* offset=1947 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2011 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" -/* offset=2078 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" -/* offset=2149 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" -/* offset=2243 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" -/* offset=2377 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" -/* offset=2441 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2509 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" -/* offset=2579 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" -/* offset=2601 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" -/* offset=2623 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" -/* offset=2643 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" +/* offset=0 */ "tool\000" +/* offset=5 */ "duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000" +/* offset=78 */ "user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000" +/* offset=145 */ "system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000" +/* offset=210 */ "has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000" +/* offset=283 */ "num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000" +/* offset=425 */ "num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000" +/* offset=525 */ "num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000" +/* offset=639 */ "num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000" +/* offset=712 */ "num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000" +/* offset=795 */ "slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000" +/* offset=902 */ "smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000" +/* offset=1006 */ "system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000" +/* offset=1102 */ "default_core\000" +/* offset=1115 */ "bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000" +/* offset=1174 */ "bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000" +/* offset=1233 */ "l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000" +/* offset=1328 */ "segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000" +/* offset=1427 */ "dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000" +/* offset=1557 */ "eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000" +/* offset=1672 */ "hisi_sccl,ddrc\000" +/* offset=1687 */ "uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000" +/* offset=1773 */ "uncore_cbox\000" +/* offset=1785 */ "unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000" +/* offset=2016 */ "event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000" +/* offset=2081 */ "event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000" +/* offset=2152 */ "hisi_sccl,l3c\000" +/* offset=2166 */ "uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000" +/* offset=2246 */ "uncore_imc_free_running\000" +/* offset=2270 */ "uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000" +/* offset=2365 */ "uncore_imc\000" +/* offset=2376 */ "uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000" +/* offset=2454 */ "uncore_sys_ddr_pmu\000" +/* offset=2473 */ "sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000" +/* offset=2546 */ "uncore_sys_ccn_pmu\000" +/* offset=2565 */ "sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000" +/* offset=2639 */ "uncore_sys_cmn_pmu\000" +/* offset=2658 */ "sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000" +/* offset=2798 */ "CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000" +/* offset=2820 */ "IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000" +/* offset=2883 */ "Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000" +/* offset=3049 */ "dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3113 */ "icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000" +/* offset=3180 */ "cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000" +/* offset=3251 */ "DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000" +/* offset=3345 */ "DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000" +/* offset=3479 */ "DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000" +/* offset=3543 */ "DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3611 */ "DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000" +/* offset=3681 */ "M1\000\000ipc + M2\000\000\000\000\000\000\000\00000" +/* offset=3703 */ "M2\000\000ipc + M1\000\000\000\000\000\000\000\00000" +/* offset=3725 */ "M3\000\0001 / M3\000\000\000\000\000\000\000\00000" +/* offset=3745 */ "L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000" ; +static const struct compact_pmu_event pmu_events__common_tool[] = { +{ 5 }, /* duration_time\000tool\000Wall clock interval time in nanoseconds\000config=1\000\00000\000\000 */ +{ 210 }, /* has_pmem\000tool\0001 if persistent memory installed otherwise 0\000config=4\000\00000\000\000 */ +{ 283 }, /* num_cores\000tool\000Number of cores. A core consists of 1 or more thread, with each thread being associated with a logical Linux CPU\000config=5\000\00000\000\000 */ +{ 425 }, /* num_cpus\000tool\000Number of logical Linux CPUs. There may be multiple such CPUs on a core\000config=6\000\00000\000\000 */ +{ 525 }, /* num_cpus_online\000tool\000Number of online logical Linux CPUs. There may be multiple such CPUs on a core\000config=7\000\00000\000\000 */ +{ 639 }, /* num_dies\000tool\000Number of dies. Each die has 1 or more cores\000config=8\000\00000\000\000 */ +{ 712 }, /* num_packages\000tool\000Number of packages. Each package has 1 or more die\000config=9\000\00000\000\000 */ +{ 795 }, /* slots\000tool\000Number of functional units that in parallel can execute parts of an instruction\000config=0xa\000\00000\000\000 */ +{ 902 }, /* smt_on\000tool\0001 if simultaneous multithreading (aka hyperthreading) is enable otherwise 0\000config=0xb\000\00000\000\000 */ +{ 145 }, /* system_time\000tool\000System/kernel time in nanoseconds\000config=3\000\00000\000\000 */ +{ 1006 }, /* system_tsc_freq\000tool\000The amount a Time Stamp Counter (TSC) increases per second\000config=0xc\000\00000\000\000 */ +{ 78 }, /* user_time\000tool\000User (non-kernel) time in nanoseconds\000config=2\000\00000\000\000 */ + +}; + +const struct pmu_table_entry pmu_events__common[] = { +{ + .entries = pmu_events__common_tool, + .num_entries = ARRAY_SIZE(pmu_events__common_tool), + .pmu_name = { 0 /* tool\000 */ }, +}, +}; + static const struct compact_pmu_event pmu_events__test_soc_cpu_default_core[] = { -{ 13 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ -{ 72 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ -{ 325 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ -{ 455 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ -{ 131 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ -{ 226 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ +{ 1115 }, /* bp_l1_btb_correct\000branch\000L1 BTB Correction\000event=0x8a\000\00000\000\000 */ +{ 1174 }, /* bp_l2_btb_correct\000branch\000L2 BTB Correction\000event=0x8b\000\00000\000\000 */ +{ 1427 }, /* dispatch_blocked.any\000other\000Memory cluster signals to block micro-op dispatch for any reason\000event=9,period=200000,umask=0x20\000\00000\000\000 */ +{ 1557 }, /* eist_trans\000other\000Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions\000event=0x3a,period=200000\000\00000\000\000 */ +{ 1233 }, /* l3_cache_rd\000cache\000L3 cache access, read\000event=0x40\000\00000\000Attributable Level 3 cache access, read\000 */ +{ 1328 }, /* segment_reg_loads.any\000other\000Number of segment register loads\000event=6,period=200000,umask=0x80\000\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_ddrc[] = { -{ 585 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ +{ 1687 }, /* uncore_hisi_ddrc.flux_wcmd\000uncore\000DDRC write commands\000event=2\000\00000\000DDRC write commands\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_hisi_sccl_l3c[] = { -{ 1064 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ +{ 2166 }, /* uncore_hisi_l3c.rd_hit_cpipe\000uncore\000Total read hits\000event=7\000\00000\000Total read hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_cbox[] = { -{ 914 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ -{ 979 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ -{ 683 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ +{ 2016 }, /* event-hyphen\000uncore\000UNC_CBO_HYPHEN\000event=0xe0\000\00000\000UNC_CBO_HYPHEN\000 */ +{ 2081 }, /* event-two-hyph\000uncore\000UNC_CBO_TWO_HYPH\000event=0xc0\000\00000\000UNC_CBO_TWO_HYPH\000 */ +{ 1785 }, /* unc_cbo_xsnp_response.miss_eviction\000uncore\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000event=0x22,umask=0x81\000\00000\000A cross-core snoop resulted from L3 Eviction which misses in some processor core\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc[] = { -{ 1274 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ +{ 2376 }, /* uncore_imc.cache_hits\000uncore\000Total cache hits\000event=0x34\000\00000\000Total cache hits\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_cpu_uncore_imc_free_running[] = { -{ 1168 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ +{ 2270 }, /* uncore_imc_free_running.cache_miss\000uncore\000Total cache misses\000event=0x12\000\00000\000Total cache misses\000 */ }; @@ -92,51 +129,51 @@ const struct pmu_table_entry pmu_events__test_soc_cpu[] = { { .entries = pmu_events__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_ddrc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_ddrc), - .pmu_name = { 570 /* hisi_sccl,ddrc\000 */ }, + .pmu_name = { 1672 /* hisi_sccl,ddrc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_hisi_sccl_l3c, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_hisi_sccl_l3c), - .pmu_name = { 1050 /* hisi_sccl,l3c\000 */ }, + .pmu_name = { 2152 /* hisi_sccl,l3c\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_cbox, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_cbox), - .pmu_name = { 671 /* uncore_cbox\000 */ }, + .pmu_name = { 1773 /* uncore_cbox\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc), - .pmu_name = { 1263 /* uncore_imc\000 */ }, + .pmu_name = { 2365 /* uncore_imc\000 */ }, }, { .entries = pmu_events__test_soc_cpu_uncore_imc_free_running, .num_entries = ARRAY_SIZE(pmu_events__test_soc_cpu_uncore_imc_free_running), - .pmu_name = { 1144 /* uncore_imc_free_running\000 */ }, + .pmu_name = { 2246 /* uncore_imc_free_running\000 */ }, }, }; static const struct compact_pmu_event pmu_metrics__test_soc_cpu_default_core[] = { -{ 1696 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ -{ 2377 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ -{ 2149 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ -{ 2243 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ -{ 2441 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 2509 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ -{ 1781 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ -{ 1718 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ -{ 2643 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ -{ 2579 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ -{ 2601 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ -{ 2623 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ -{ 2078 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ -{ 1947 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ -{ 2011 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 2798 }, /* CPI\000\0001 / IPC\000\000\000\000\000\000\000\00000 */ +{ 3479 }, /* DCache_L2_All\000\000DCache_L2_All_Hits + DCache_L2_All_Miss\000\000\000\000\000\000\000\00000 */ +{ 3251 }, /* DCache_L2_All_Hits\000\000l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit\000\000\000\000\000\000\000\00000 */ +{ 3345 }, /* DCache_L2_All_Miss\000\000max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + l2_rqsts.pf_miss + l2_rqsts.rfo_miss\000\000\000\000\000\000\000\00000 */ +{ 3543 }, /* DCache_L2_Hits\000\000d_ratio(DCache_L2_All_Hits, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 3611 }, /* DCache_L2_Misses\000\000d_ratio(DCache_L2_All_Miss, DCache_L2_All)\000\000\000\000\000\000\000\00000 */ +{ 2883 }, /* Frontend_Bound_SMT\000\000idq_uops_not_delivered.core / (4 * (cpu_clk_unhalted.thread / 2 * (1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk)))\000\000\000\000\000\000\000\00000 */ +{ 2820 }, /* IPC\000group1\000inst_retired.any / cpu_clk_unhalted.thread\000\000\000\000\000\000\000\00000 */ +{ 3745 }, /* L1D_Cache_Fill_BW\000\00064 * l1d.replacement / 1e9 / duration_time\000\000\000\000\000\000\000\00000 */ +{ 3681 }, /* M1\000\000ipc + M2\000\000\000\000\000\000\000\00000 */ +{ 3703 }, /* M2\000\000ipc + M1\000\000\000\000\000\000\000\00000 */ +{ 3725 }, /* M3\000\0001 / M3\000\000\000\000\000\000\000\00000 */ +{ 3180 }, /* cache_miss_cycles\000group1\000dcache_miss_cpi + icache_miss_cycles\000\000\000\000\000\000\000\00000 */ +{ 3049 }, /* dcache_miss_cpi\000\000l1d\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ +{ 3113 }, /* icache_miss_cycles\000\000l1i\\-loads\\-misses / inst_retired.any\000\000\000\000\000\000\000\00000 */ }; @@ -144,18 +181,18 @@ const struct pmu_table_entry pmu_metrics__test_soc_cpu[] = { { .entries = pmu_metrics__test_soc_cpu_default_core, .num_entries = ARRAY_SIZE(pmu_metrics__test_soc_cpu_default_core), - .pmu_name = { 0 /* default_core\000 */ }, + .pmu_name = { 1102 /* default_core\000 */ }, }, }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ccn_pmu[] = { -{ 1463 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ +{ 2565 }, /* sys_ccn_pmu.read_cycles\000uncore\000ccn read-cycles event\000config=0x2c\0000x01\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_cmn_pmu[] = { -{ 1556 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ +{ 2658 }, /* sys_cmn_pmu.hnf_cache_miss\000uncore\000Counts total cache misses in first lookup result (high priority)\000eventid=1,type=5\000(434|436|43c|43a).*\00000\000\000 */ }; static const struct compact_pmu_event pmu_events__test_soc_sys_uncore_sys_ddr_pmu[] = { -{ 1371 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ +{ 2473 }, /* sys_ddr_pmu.write_cycles\000uncore\000ddr write-cycles event\000event=0x2b\000v8\00000\000\000 */ }; @@ -163,17 +200,17 @@ const struct pmu_table_entry pmu_events__test_soc_sys[] = { { .entries = pmu_events__test_soc_sys_uncore_sys_ccn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ccn_pmu), - .pmu_name = { 1444 /* uncore_sys_ccn_pmu\000 */ }, + .pmu_name = { 2546 /* uncore_sys_ccn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_cmn_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_cmn_pmu), - .pmu_name = { 1537 /* uncore_sys_cmn_pmu\000 */ }, + .pmu_name = { 2639 /* uncore_sys_cmn_pmu\000 */ }, }, { .entries = pmu_events__test_soc_sys_uncore_sys_ddr_pmu, .num_entries = ARRAY_SIZE(pmu_events__test_soc_sys_uncore_sys_ddr_pmu), - .pmu_name = { 1352 /* uncore_sys_ddr_pmu\000 */ }, + .pmu_name = { 2454 /* uncore_sys_ddr_pmu\000 */ }, }, }; @@ -211,6 +248,15 @@ struct pmu_events_map { */ const struct pmu_events_map pmu_events_map[] = { { + .arch = "common", + .cpuid = "common", + .event_table = { + .pmus = pmu_events__common, + .num_pmus = ARRAY_SIZE(pmu_events__common), + }, + .metric_table = {}, +}, +{ .arch = "testarch", .cpuid = "testcpu", .event_table = { @@ -380,7 +426,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -457,11 +503,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -472,10 +518,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -506,12 +552,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -532,24 +587,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - if (!pmu) - return &map->metric_table; - - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py index bb0a5d92df4a..d781a377757a 100755 --- a/tools/perf/pmu-events/jevents.py +++ b/tools/perf/pmu-events/jevents.py @@ -292,6 +292,7 @@ class JsonEvent: 'cpu_atom': 'cpu_atom', 'ali_drw': 'ali_drw', 'arm_cmn': 'arm_cmn', + 'tool': 'tool', } return table[unit] if unit in table else f'uncore_{unit.lower()}' @@ -722,6 +723,17 @@ const struct pmu_events_map pmu_events_map[] = { \t} }, """) + elif arch == 'common': + _args.output_file.write("""{ +\t.arch = "common", +\t.cpuid = "common", +\t.event_table = { +\t\t.pmus = pmu_events__common, +\t\t.num_pmus = ARRAY_SIZE(pmu_events__common), +\t}, +\t.metric_table = {}, +}, +""") else: with open(f'{_args.starting_dir}/{arch}/mapfile.csv') as csvfile: table = csv.reader(csvfile) @@ -930,7 +942,7 @@ int pmu_events_table__for_each_event(const struct pmu_events_table *table, continue; ret = pmu_events_table__for_each_event_pmu(table, table_pmu, fn, data); - if (pmu || ret) + if (ret) return ret; } return 0; @@ -1007,11 +1019,11 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, return 0; } -static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +static const struct pmu_events_map *map_for_cpu(struct perf_cpu cpu) { static struct { const struct pmu_events_map *map; - struct perf_pmu *pmu; + struct perf_cpu cpu; } last_result; static struct { const struct pmu_events_map *map; @@ -1022,10 +1034,10 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) char *cpuid = NULL; size_t i; - if (has_last_result && last_result.pmu == pmu) + if (has_last_result && last_result.cpu.cpu == cpu.cpu) return last_result.map; - cpuid = perf_pmu__getcpuid(pmu); + cpuid = get_cpuid_allow_env_override(cpu); /* * On some platforms which uses cpus map, cpuid can be NULL for @@ -1056,12 +1068,21 @@ static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) has_last_map_search = true; } out_update_last_result: - last_result.pmu = pmu; + last_result.cpu = cpu; last_result.map = map; has_last_result = true; return map; } +static const struct pmu_events_map *map_for_pmu(struct perf_pmu *pmu) +{ + struct perf_cpu cpu = {-1}; + + if (pmu) + cpu = perf_cpu_map__min(pmu->cpus); + return map_for_cpu(cpu); +} + const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) { const struct pmu_events_map *map = map_for_pmu(pmu); @@ -1082,24 +1103,12 @@ const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu) return NULL; } -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu) +const struct pmu_metrics_table *pmu_metrics_table__find(void) { - const struct pmu_events_map *map = map_for_pmu(pmu); - - if (!map) - return NULL; - - if (!pmu) - return &map->metric_table; + struct perf_cpu cpu = {-1}; + const struct pmu_events_map *map = map_for_cpu(cpu); - for (size_t i = 0; i < map->metric_table.num_pmus; i++) { - const struct pmu_table_entry *table_pmu = &map->metric_table.pmus[i]; - const char *pmu_name = &big_c_string[table_pmu->pmu_name.offset]; - - if (pmu__name_match(pmu, pmu_name)) - return &map->metric_table; - } - return NULL; + return map ? &map->metric_table : NULL; } const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid) @@ -1241,7 +1250,7 @@ def main() -> None: if len(parents) == _args.model.split(',')[0].count('/'): # We're testing the correct directory. item_path = '/'.join(parents) + ('/' if len(parents) > 0 else '') + item.name - if 'test' not in item_path and item_path not in _args.model.split(','): + if 'test' not in item_path and 'common' not in item_path and item_path not in _args.model.split(','): continue action(parents, item) if item.is_dir(): @@ -1289,7 +1298,7 @@ struct pmu_table_entry { for item in os.scandir(_args.starting_dir): if not item.is_dir(): continue - if item.name == _args.arch or _args.arch == 'all' or item.name == 'test': + if item.name == _args.arch or _args.arch == 'all' or item.name == 'test' or item.name == 'common': archs.append(item.name) if len(archs) < 2 and _args.arch != 'none': diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h index 5435ad92180c..675562e6f770 100644 --- a/tools/perf/pmu-events/pmu-events.h +++ b/tools/perf/pmu-events/pmu-events.h @@ -103,7 +103,7 @@ int pmu_metrics_table__for_each_metric(const struct pmu_metrics_table *table, pm void *data); const struct pmu_events_table *perf_pmu__find_events_table(struct perf_pmu *pmu); -const struct pmu_metrics_table *perf_pmu__find_metrics_table(struct perf_pmu *pmu); +const struct pmu_metrics_table *pmu_metrics_table__find(void); const struct pmu_events_table *find_core_events_table(const char *arch, const char *cpuid); const struct pmu_metrics_table *find_core_metrics_table(const char *arch, const char *cpuid); int pmu_for_each_core_event(pmu_event_iter_fn fn, void *data); diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 3954bd1587ce..01f54d6724a5 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -12,6 +12,7 @@ #define PY_SSIZE_T_CLEAN #include <Python.h> +#include "../../../util/config.h" #include "../../../util/trace-event.h" #include "../../../util/event.h" #include "../../../util/symbol.h" @@ -182,6 +183,15 @@ static PyObject *perf_sample_srccode(PyObject *obj, PyObject *args) return perf_sample_src(obj, args, true); } +static PyObject *__perf_config_get(PyObject *obj, PyObject *args) +{ + const char *config_name; + + if (!PyArg_ParseTuple(args, "s", &config_name)) + return NULL; + return Py_BuildValue("s", perf_config_get(config_name)); +} + static PyMethodDef ContextMethods[] = { #ifdef HAVE_LIBTRACEEVENT { "common_pc", perf_trace_context_common_pc, METH_VARARGS, @@ -199,6 +209,7 @@ static PyMethodDef ContextMethods[] = { METH_VARARGS, "Get source file name and line number."}, { "perf_sample_srccode", perf_sample_srccode, METH_VARARGS, "Get source file name, line number and line."}, + { "perf_config_get", __perf_config_get, METH_VARARGS, "Get perf config entry"}, { NULL, NULL, 0, NULL} }; diff --git a/tools/perf/scripts/python/arm-cs-trace-disasm.py b/tools/perf/scripts/python/arm-cs-trace-disasm.py index 7aff02d84ffb..ba208c90d631 100755 --- a/tools/perf/scripts/python/arm-cs-trace-disasm.py +++ b/tools/perf/scripts/python/arm-cs-trace-disasm.py @@ -11,36 +11,74 @@ import os from os import path import re from subprocess import * -from optparse import OptionParser, make_option +import argparse +import platform -from perf_trace_context import perf_set_itrace_options, \ - perf_sample_insn, perf_sample_srccode +from perf_trace_context import perf_sample_srccode, perf_config_get # Below are some example commands for using this script. +# Note a --kcore recording is required for accurate decode +# due to the alternatives patching mechanism. However this +# script only supports reading vmlinux for disassembly dump, +# meaning that any patched instructions will appear +# as unpatched, but the instruction ranges themselves will +# be correct. In addition to this, source line info comes +# from Perf, and when using kcore there is no debug info. The +# following lists the supported features in each mode: +# +# +-----------+-----------------+------------------+------------------+ +# | Recording | Accurate decode | Source line dump | Disassembly dump | +# +-----------+-----------------+------------------+------------------+ +# | --kcore | yes | no | yes | +# | normal | no | yes | yes | +# +-----------+-----------------+------------------+------------------+ +# +# Output disassembly with objdump and auto detect vmlinux +# (when running on same machine.) +# perf script -s scripts/python/arm-cs-trace-disasm.py -d # -# Output disassembly with objdump: -# perf script -s scripts/python/arm-cs-trace-disasm.py \ -# -- -d objdump -k path/to/vmlinux # Output disassembly with llvm-objdump: # perf script -s scripts/python/arm-cs-trace-disasm.py \ # -- -d llvm-objdump-11 -k path/to/vmlinux +# # Output only source line and symbols: # perf script -s scripts/python/arm-cs-trace-disasm.py -# Command line parsing. -option_list = [ - # formatting options for the bottom entry of the stack - make_option("-k", "--vmlinux", dest="vmlinux_name", - help="Set path to vmlinux file"), - make_option("-d", "--objdump", dest="objdump_name", - help="Set path to objdump executable file"), - make_option("-v", "--verbose", dest="verbose", - action="store_true", default=False, - help="Enable debugging log") -] +def default_objdump(): + config = perf_config_get("annotate.objdump") + return config if config else "objdump" -parser = OptionParser(option_list=option_list) -(options, args) = parser.parse_args() +# Command line parsing. +def int_arg(v): + v = int(v) + if v < 0: + raise argparse.ArgumentTypeError("Argument must be a positive integer") + return v + +args = argparse.ArgumentParser() +args.add_argument("-k", "--vmlinux", + help="Set path to vmlinux file. Omit to autodetect if running on same machine") +args.add_argument("-d", "--objdump", nargs="?", const=default_objdump(), + help="Show disassembly. Can also be used to change the objdump path"), +args.add_argument("-v", "--verbose", action="store_true", help="Enable debugging log") +args.add_argument("--start-time", type=int_arg, help="Monotonic clock time of sample to start from. " + "See 'time' field on samples in -v mode.") +args.add_argument("--stop-time", type=int_arg, help="Monotonic clock time of sample to stop at. " + "See 'time' field on samples in -v mode.") +args.add_argument("--start-sample", type=int_arg, help="Index of sample to start from. " + "See 'index' field on samples in -v mode.") +args.add_argument("--stop-sample", type=int_arg, help="Index of sample to stop at. " + "See 'index' field on samples in -v mode.") + +options = args.parse_args() +if (options.start_time and options.stop_time and + options.start_time >= options.stop_time): + print("--start-time must less than --stop-time") + exit(2) +if (options.start_sample and options.stop_sample and + options.start_sample >= options.stop_sample): + print("--start-sample must less than --stop-sample") + exit(2) # Initialize global dicts and regular expression disasm_cache = dict() @@ -48,11 +86,23 @@ cpu_data = dict() disasm_re = re.compile(r"^\s*([0-9a-fA-F]+):") disasm_func_re = re.compile(r"^\s*([0-9a-fA-F]+)\s.*:") cache_size = 64*1024 +sample_idx = -1 glb_source_file_name = None glb_line_number = None glb_dso = None +kver = platform.release() +vmlinux_paths = [ + f"/usr/lib/debug/boot/vmlinux-{kver}.debug", + f"/usr/lib/debug/lib/modules/{kver}/vmlinux", + f"/lib/modules/{kver}/build/vmlinux", + f"/usr/lib/debug/boot/vmlinux-{kver}", + f"/boot/vmlinux-{kver}", + f"/boot/vmlinux", + f"vmlinux" +] + def get_optional(perf_dict, field): if field in perf_dict: return perf_dict[field] @@ -63,12 +113,25 @@ def get_offset(perf_dict, field): return "+%#x" % perf_dict[field] return "" +def find_vmlinux(): + if hasattr(find_vmlinux, "path"): + return find_vmlinux.path + + for v in vmlinux_paths: + if os.access(v, os.R_OK): + find_vmlinux.path = v + break + else: + find_vmlinux.path = None + + return find_vmlinux.path + def get_dso_file_path(dso_name, dso_build_id): if (dso_name == "[kernel.kallsyms]" or dso_name == "vmlinux"): - if (options.vmlinux_name): - return options.vmlinux_name; + if (options.vmlinux): + return options.vmlinux; else: - return dso_name + return find_vmlinux() if find_vmlinux() else dso_name if (dso_name == "[vdso]") : append = "/vdso" @@ -92,7 +155,7 @@ def read_disam(dso_fname, dso_start, start_addr, stop_addr): else: start_addr = start_addr - dso_start; stop_addr = stop_addr - dso_start; - disasm = [ options.objdump_name, "-d", "-z", + disasm = [ options.objdump, "-d", "-z", "--start-address="+format(start_addr,"#x"), "--stop-address="+format(stop_addr,"#x") ] disasm += [ dso_fname ] @@ -112,10 +175,10 @@ def print_disam(dso_fname, dso_start, start_addr, stop_addr): def print_sample(sample): print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \ - "pid: %d tid: %d period: %d time: %d }" % \ + "pid: %d tid: %d period: %d time: %d index: %d}" % \ (sample['cpu'], sample['addr'], sample['phys_addr'], \ sample['ip'], sample['pid'], sample['tid'], \ - sample['period'], sample['time'])) + sample['period'], sample['time'], sample_idx)) def trace_begin(): print('ARM CoreSight Trace Data Assembler Dump') @@ -177,6 +240,7 @@ def print_srccode(comm, param_dict, sample, symbol, dso): def process_event(param_dict): global cache_size global options + global sample_idx sample = param_dict["sample"] comm = param_dict["comm"] @@ -187,11 +251,26 @@ def process_event(param_dict): dso_start = get_optional(param_dict, "dso_map_start") dso_end = get_optional(param_dict, "dso_map_end") symbol = get_optional(param_dict, "symbol") + map_pgoff = get_optional(param_dict, "map_pgoff") + # check for valid map offset + if (str(map_pgoff) == '[unknown]'): + map_pgoff = 0 cpu = sample["cpu"] ip = sample["ip"] addr = sample["addr"] + sample_idx += 1 + + if (options.start_time and sample["time"] < options.start_time): + return + if (options.stop_time and sample["time"] > options.stop_time): + exit(0) + if (options.start_sample and sample_idx < options.start_sample): + return + if (options.stop_sample and sample_idx > options.stop_sample): + exit(0) + if (options.verbose == True): print("Event type: %s" % name) print_sample(sample) @@ -243,9 +322,10 @@ def process_event(param_dict): # Record for previous sample packet cpu_data[str(cpu) + 'addr'] = addr - # Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4 - if (start_addr == 0 and stop_addr == 4): - print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) + # Filter out zero start_address. Optionally identify CS_ETM_TRACE_ON packet + if (start_addr == 0): + if ((stop_addr == 4) and (options.verbose == True)): + print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu) return if (start_addr < int(dso_start) or start_addr > int(dso_end)): @@ -256,19 +336,20 @@ def process_event(param_dict): print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso)) return - if (options.objdump_name != None): + if (options.objdump != None): # It doesn't need to decrease virtual memory offset for disassembly # for kernel dso and executable file dso, so in this case we set # vm_start to zero. if (dso == "[kernel.kallsyms]" or dso_start == 0x400000): dso_vm_start = 0 + map_pgoff = 0 else: dso_vm_start = int(dso_start) dso_fname = get_dso_file_path(dso, dso_bid) if path.exists(dso_fname): - print_disam(dso_fname, dso_vm_start, start_addr, stop_addr) + print_disam(dso_fname, dso_vm_start, start_addr + map_pgoff, stop_addr + map_pgoff) else: - print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr)) + print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr + map_pgoff, stop_addr + map_pgoff)) print_srccode(comm, param_dict, sample, symbol, dso) diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build index 5671ee530019..ec4e1f034742 100644 --- a/tools/perf/tests/Build +++ b/tools/perf/tests/Build @@ -4,7 +4,6 @@ perf-test-y += builtin-test.o perf-test-y += tests-scripts.o perf-test-y += parse-events.o perf-test-y += dso-data.o -perf-test-y += attr.o perf-test-y += vmlinux-kallsyms.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o perf-test-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o @@ -67,12 +66,13 @@ perf-test-y += sigtrap.o perf-test-y += event_groups.o perf-test-y += symbols.o perf-test-y += util.o +perf-test-y += hwmon_pmu.o +perf-test-y += tool_pmu.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 arm arm64 powerpc)) perf-test-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o endif -CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c deleted file mode 100644 index 97e1bdd6ec0e..000000000000 --- a/tools/perf/tests/attr.c +++ /dev/null @@ -1,218 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * The struct perf_event_attr test support. - * - * This test is embedded inside into perf directly and is governed - * by the PERF_TEST_ATTR environment variable and hook inside - * sys_perf_event_open function. - * - * The general idea is to store 'struct perf_event_attr' details for - * each event created within single perf command. Each event details - * are stored into separate text file. Once perf command is finished - * these files can be checked for values we expect for command. - * - * Besides 'struct perf_event_attr' values we also store 'fd' and - * 'group_fd' values to allow checking for groups created. - * - * This all is triggered by setting PERF_TEST_ATTR environment variable. - * It must contain name of existing directory with access and write - * permissions. All the event text files are stored there. - */ - -#include <debug.h> -#include <errno.h> -#include <inttypes.h> -#include <stdlib.h> -#include <stdio.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <sys/param.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <unistd.h> -#include <subcmd/exec-cmd.h> -#include "event.h" -#include "util.h" -#include "tests.h" -#include "pmus.h" - -#define ENV "PERF_TEST_ATTR" - -static char *dir; -static bool ready; - -void test_attr__init(void) -{ - dir = getenv(ENV); - test_attr__enabled = (dir != NULL); -} - -#define BUFSIZE 1024 - -#define __WRITE_ASS(str, fmt, data) \ -do { \ - char buf[BUFSIZE]; \ - size_t size; \ - \ - size = snprintf(buf, BUFSIZE, #str "=%"fmt "\n", data); \ - if (1 != fwrite(buf, size, 1, file)) { \ - perror("test attr - failed to write event file"); \ - fclose(file); \ - return -1; \ - } \ - \ -} while (0) - -#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) - -static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - FILE *file; - char path[PATH_MAX]; - - if (!ready) - return 0; - - snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, - attr->type, attr->config, fd); - - file = fopen(path, "w+"); - if (!file) { - perror("test attr - failed to open event file"); - return -1; - } - - if (fprintf(file, "[event-%d-%llu-%d]\n", - attr->type, attr->config, fd) < 0) { - perror("test attr - failed to write event file"); - fclose(file); - return -1; - } - - /* syscall arguments */ - __WRITE_ASS(fd, "d", fd); - __WRITE_ASS(group_fd, "d", group_fd); - __WRITE_ASS(cpu, "d", cpu.cpu); - __WRITE_ASS(pid, "d", pid); - __WRITE_ASS(flags, "lu", flags); - - /* struct perf_event_attr */ - WRITE_ASS(type, PRIu32); - WRITE_ASS(size, PRIu32); - WRITE_ASS(config, "llu"); - WRITE_ASS(sample_period, "llu"); - WRITE_ASS(sample_type, "llu"); - WRITE_ASS(read_format, "llu"); - WRITE_ASS(disabled, "d"); - WRITE_ASS(inherit, "d"); - WRITE_ASS(pinned, "d"); - WRITE_ASS(exclusive, "d"); - WRITE_ASS(exclude_user, "d"); - WRITE_ASS(exclude_kernel, "d"); - WRITE_ASS(exclude_hv, "d"); - WRITE_ASS(exclude_idle, "d"); - WRITE_ASS(mmap, "d"); - WRITE_ASS(comm, "d"); - WRITE_ASS(freq, "d"); - WRITE_ASS(inherit_stat, "d"); - WRITE_ASS(enable_on_exec, "d"); - WRITE_ASS(task, "d"); - WRITE_ASS(watermark, "d"); - WRITE_ASS(precise_ip, "d"); - WRITE_ASS(mmap_data, "d"); - WRITE_ASS(sample_id_all, "d"); - WRITE_ASS(exclude_host, "d"); - WRITE_ASS(exclude_guest, "d"); - WRITE_ASS(exclude_callchain_kernel, "d"); - WRITE_ASS(exclude_callchain_user, "d"); - WRITE_ASS(mmap2, "d"); - WRITE_ASS(comm_exec, "d"); - WRITE_ASS(context_switch, "d"); - WRITE_ASS(write_backward, "d"); - WRITE_ASS(namespaces, "d"); - WRITE_ASS(use_clockid, "d"); - WRITE_ASS(wakeup_events, PRIu32); - WRITE_ASS(bp_type, PRIu32); - WRITE_ASS(config1, "llu"); - WRITE_ASS(config2, "llu"); - WRITE_ASS(branch_sample_type, "llu"); - WRITE_ASS(sample_regs_user, "llu"); - WRITE_ASS(sample_stack_user, PRIu32); - - fclose(file); - return 0; -} - -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags) -{ - int errno_saved = errno; - - if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { - pr_err("test attr FAILED"); - exit(128); - } - - errno = errno_saved; -} - -void test_attr__ready(void) -{ - if (unlikely(test_attr__enabled) && !ready) - ready = true; -} - -static int run_dir(const char *d, const char *perf) -{ - char v[] = "-vvvvv"; - int vcnt = min(verbose, (int) sizeof(v) - 1); - char cmd[3*PATH_MAX]; - - if (verbose > 0) - vcnt++; - - scnprintf(cmd, 3*PATH_MAX, PYTHON " %s/attr.py -d %s/attr/ -p %s %.*s", - d, d, perf, vcnt, v); - - return system(cmd) ? TEST_FAIL : TEST_OK; -} - -static int test__attr(struct test_suite *test __maybe_unused, int subtest __maybe_unused) -{ - struct stat st; - char path_perf[PATH_MAX]; - char path_dir[PATH_MAX]; - char *exec_path; - - if (perf_pmus__num_core_pmus() > 1) { - /* - * TODO: Attribute tests hard code the PMU type. If there are >1 - * core PMU then each PMU will have a different type which - * requires additional support. - */ - pr_debug("Skip test on hybrid systems"); - return TEST_SKIP; - } - - /* First try development tree tests. */ - if (!lstat("./tests", &st)) - return run_dir("./tests", "./perf"); - - exec_path = get_argv_exec_path(); - if (exec_path == NULL) - return -1; - - /* Then installed path. */ - snprintf(path_dir, PATH_MAX, "%s/tests", exec_path); - snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR); - free(exec_path); - - if (!lstat(path_dir, &st) && - !lstat(path_perf, &st)) - return run_dir(path_dir, path_perf); - - return TEST_SKIP; -} - -DEFINE_SUITE("Setup struct perf_event_attr", attr); diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 470a9709427d..8dcf74d3c0a3 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -8,6 +8,7 @@ #include <errno.h> #include <poll.h> #include <unistd.h> +#include <setjmp.h> #include <string.h> #include <stdlib.h> #include <sys/types.h> @@ -39,11 +40,8 @@ * making them easier to debug. */ static bool dont_fork; -/* Don't fork the tests in parallel and wait for their completion. */ -static bool sequential = true; -/* Do it in parallel, lacks infrastructure to avoid running tests that clash for resources, - * So leave it as the developers choice to enable while working on the needed infra */ -static bool parallel; +/* Fork the tests in parallel and wait for their completion. */ +static bool sequential; const char *dso_to_test; const char *test_objdump_path = "objdump"; @@ -73,13 +71,14 @@ static struct test_suite *generic_tests[] = { &suite__PERF_RECORD, &suite__pmu, &suite__pmu_events, + &suite__hwmon_pmu, + &suite__tool_pmu, &suite__dso_data, &suite__perf_evsel__roundtrip_name_test, #ifdef HAVE_LIBTRACEEVENT &suite__perf_evsel__tp_sched_test, &suite__syscall_openat_tp_fields, #endif - &suite__attr, &suite__hists_link, &suite__python_use, &suite__bp_signal, @@ -139,12 +138,6 @@ static struct test_suite *generic_tests[] = { NULL, }; -static struct test_suite **tests[] = { - generic_tests, - arch_tests, - NULL, /* shell tests created at runtime. */ -}; - static struct test_workload *workloads[] = { &workload__noploop, &workload__thloop, @@ -155,6 +148,9 @@ static struct test_workload *workloads[] = { &workload__landlock, }; +#define workloads__for_each(workload) \ + for (unsigned i = 0; i < ARRAY_SIZE(workloads) && ({ workload = workloads[i]; 1; }); i++) + static int num_subtests(const struct test_suite *t) { int num; @@ -198,6 +194,14 @@ static test_fnptr test_function(const struct test_suite *t, int subtest) return t->test_cases[subtest].run_case; } +static bool test_exclusive(const struct test_suite *t, int subtest) +{ + if (subtest <= 0) + return t->test_cases[0].exclusive; + + return t->test_cases[subtest].exclusive; +} + static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[]) { int i; @@ -229,20 +233,47 @@ struct child_test { int subtest; }; +static jmp_buf run_test_jmp_buf; + +static void child_test_sig_handler(int sig) +{ + siglongjmp(run_test_jmp_buf, sig); +} + static int run_test_child(struct child_process *process) { + const int signals[] = { + SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGINT, SIGPIPE, SIGQUIT, SIGSEGV, SIGTERM, + }; struct child_test *child = container_of(process, struct child_test, process); int err; + err = sigsetjmp(run_test_jmp_buf, 1); + if (err) { + fprintf(stderr, "\n---- unexpected signal (%d) ----\n", err); + err = err > 0 ? -err : -1; + goto err_out; + } + + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], child_test_sig_handler); + pr_debug("--- start ---\n"); pr_debug("test child forked, pid %d\n", getpid()); err = test_function(child->test, child->subtest)(child->test, child->subtest); pr_debug("---- end(%d) ----\n", err); + +err_out: fflush(NULL); + for (size_t i = 0; i < ARRAY_SIZE(signals); i++) + signal(signals[i], SIG_DFL); return -err; } -static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width) +#define TEST_RUNNING -3 + +static int print_test_result(struct test_suite *t, int i, int subtest, int result, int width, + int running) { if (has_subtests(t)) { int subw = width > 2 ? width - 2 : width; @@ -252,6 +283,9 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul pr_info("%3d: %-*s:", i + 1, width, test_description(t, subtest)); switch (result) { + case TEST_RUNNING: + color_fprintf(stderr, PERF_COLOR_YELLOW, " Running (%d active)\n", running); + break; case TEST_OK: pr_info(" Ok\n"); break; @@ -273,16 +307,25 @@ static int print_test_result(struct test_suite *t, int i, int subtest, int resul return 0; } -static int finish_test(struct child_test *child_test, int width) +static void finish_test(struct child_test **child_tests, int running_test, int child_test_num, + int width) { - struct test_suite *t = child_test->test; - int i = child_test->test_num; - int subi = child_test->subtest; - int err = child_test->process.err; - bool err_done = err <= 0; + struct child_test *child_test = child_tests[running_test]; + struct test_suite *t; + int i, subi, err; + bool err_done = false; struct strbuf err_output = STRBUF_INIT; + int last_running = -1; int ret; + if (child_test == NULL) { + /* Test wasn't started. */ + return; + } + t = child_test->test; + i = child_test->test_num; + subi = child_test->subtest; + err = child_test->process.err; /* * For test suites with subtests, display the suite name ahead of the * sub test names. @@ -294,7 +337,7 @@ static int finish_test(struct child_test *child_test, int width) * Busy loop reading from the child's stdout/stderr that are set to be * non-blocking until EOF. */ - if (!err_done) + if (err > 0) fcntl(err, F_SETFL, O_NONBLOCK); if (verbose > 1) { if (has_subtests(t)) @@ -308,57 +351,90 @@ static int finish_test(struct child_test *child_test, int width) .events = POLLIN | POLLERR | POLLHUP | POLLNVAL, }, }; - char buf[512]; - ssize_t len; + if (perf_use_color_default) { + int running = 0; - /* Poll to avoid excessive spinning, timeout set for 100ms. */ - poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); - if (!err_done && pfds[0].revents) { - errno = 0; - len = read(err, buf, sizeof(buf) - 1); + for (int y = running_test; y < child_test_num; y++) { + if (child_tests[y] == NULL) + continue; + if (check_if_command_finished(&child_tests[y]->process) == 0) + running++; + } + if (running != last_running) { + if (last_running != -1) { + /* + * Erase "Running (.. active)" line + * printed before poll/sleep. + */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); + } + print_test_result(t, i, subi, TEST_RUNNING, width, running); + last_running = running; + } + } - if (len <= 0) { - err_done = errno != EAGAIN; - } else { - buf[len] = '\0'; - if (verbose > 1) - fprintf(stdout, "%s", buf); - else + err_done = true; + if (err <= 0) { + /* No child stderr to poll, sleep for 10ms for child to complete. */ + usleep(10 * 1000); + } else { + /* Poll to avoid excessive spinning, timeout set for 100ms. */ + poll(pfds, ARRAY_SIZE(pfds), /*timeout=*/100); + if (pfds[0].revents) { + char buf[512]; + ssize_t len; + + len = read(err, buf, sizeof(buf) - 1); + + if (len > 0) { + err_done = false; + buf[len] = '\0'; strbuf_addstr(&err_output, buf); + } } } + if (err_done) + err_done = check_if_command_finished(&child_test->process); + } + if (perf_use_color_default && last_running != -1) { + /* Erase "Running (.. active)" line printed before poll/sleep. */ + fprintf(debug_file(), PERF_COLOR_DELETE_LINE); } /* Clean up child process. */ ret = finish_command(&child_test->process); - if (verbose == 1 && ret == TEST_FAIL) { - /* Add header for test that was skipped above. */ - if (has_subtests(t)) - pr_info("%3d.%1d: %s:\n", i + 1, subi + 1, test_description(t, subi)); - else - pr_info("%3d: %s:\n", i + 1, test_description(t, -1)); + if (verbose > 1 || (verbose == 1 && ret == TEST_FAIL)) fprintf(stderr, "%s", err_output.buf); - } + strbuf_release(&err_output); - print_test_result(t, i, subi, ret, width); + print_test_result(t, i, subi, ret, width, /*running=*/0); if (err > 0) close(err); - return 0; + zfree(&child_tests[running_test]); } static int start_test(struct test_suite *test, int i, int subi, struct child_test **child, - int width) + int width, int pass) { int err; *child = NULL; if (dont_fork) { - pr_debug("--- start ---\n"); - err = test_function(test, subi)(test, subi); - pr_debug("---- end ----\n"); - print_test_result(test, i, subi, err, width); + if (pass == 1) { + pr_debug("--- start ---\n"); + err = test_function(test, subi)(test, subi); + pr_debug("---- end ----\n"); + print_test_result(test, i, subi, err, width, /*running=*/0); + } + return 0; + } + if (pass == 1 && !sequential && test_exclusive(test, subi)) { + /* When parallel, skip exclusive tests on the first pass. */ + return 0; + } + if (pass != 1 && (sequential || !test_exclusive(test, subi))) { + /* Sequential and non-exclusive tests were run on the first pass. */ return 0; } - *child = zalloc(sizeof(**child)); if (!*child) return -ENOMEM; @@ -377,35 +453,42 @@ static int start_test(struct test_suite *test, int i, int subi, struct child_tes (*child)->process.err = -1; } (*child)->process.no_exec_cmd = run_test_child; - err = start_command(&(*child)->process); - if (err || !sequential) - return err; - return finish_test(*child, width); + if (sequential || pass == 2) { + err = start_command(&(*child)->process); + if (err) + return err; + finish_test(child, /*running_test=*/0, /*child_test_num=*/1, width); + return 0; + } + return start_command(&(*child)->process); } -#define for_each_test(j, k, t) \ - for (j = 0, k = 0; j < ARRAY_SIZE(tests); j++, k = 0) \ - while ((t = tests[j][k++]) != NULL) +/* State outside of __cmd_test for the sake of the signal handler. */ + +static size_t num_tests; +static struct child_test **child_tests; +static jmp_buf cmd_test_jmp_buf; -static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) +static void cmd_test_sig_handler(int sig) { - struct test_suite *t; - unsigned int j, k; - int i = 0; - int width = 0; - size_t num_tests = 0; - struct child_test **child_tests; - int child_test_num = 0; + siglongjmp(cmd_test_jmp_buf, sig); +} + +static int __cmd_test(struct test_suite **suites, int argc, const char *argv[], + struct intlist *skiplist) +{ + static int width = 0; + int err = 0; - for_each_test(j, k, t) { - int len = strlen(test_description(t, -1)); + for (struct test_suite **t = suites; *t; t++) { + int len = strlen(test_description(*t, -1)); if (width < len) width = len; - if (has_subtests(t)) { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - len = strlen(test_description(t, subi)); + if (has_subtests(*t)) { + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + len = strlen(test_description(*t, subi)); if (width < len) width = len; num_tests++; @@ -418,97 +501,137 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist) if (!child_tests) return -ENOMEM; - for_each_test(j, k, t) { - int curr = i++; - - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) { - bool skip = true; + err = sigsetjmp(cmd_test_jmp_buf, 1); + if (err) { + pr_err("\nSignal (%d) while running tests.\nTerminating tests with the same signal\n", + err); + for (size_t x = 0; x < num_tests; x++) { + struct child_test *child_test = child_tests[x]; - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - if (perf_test__matches(test_description(t, subi), - curr, argc, argv)) - skip = false; - } - - if (skip) + if (!child_test) continue; - } - if (intlist__find(skiplist, i)) { - pr_info("%3d: %-*s:", curr + 1, width, test_description(t, -1)); - color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); - continue; + pr_debug3("Killing %d pid %d\n", + child_test->test_num + 1, + child_test->process.pid); + kill(child_test->process.pid, err); } + goto err_out; + } + signal(SIGINT, cmd_test_sig_handler); + signal(SIGTERM, cmd_test_sig_handler); - if (!has_subtests(t)) { - int err = start_test(t, curr, -1, &child_tests[child_test_num++], width); + /* + * In parallel mode pass 1 runs non-exclusive tests in parallel, pass 2 + * runs the exclusive tests sequentially. In other modes all tests are + * run in pass 1. + */ + for (int pass = 1; pass <= 2; pass++) { + int child_test_num = 0; + int i = 0; + + for (struct test_suite **t = suites; *t; t++) { + int curr = i++; + + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) { + /* + * Test suite shouldn't be run based on + * description. See if subtest should. + */ + bool skip = true; + + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (perf_test__matches(test_description(*t, subi), + curr, argc, argv)) + skip = false; + } + + if (skip) + continue; + } - if (err) { - /* TODO: if !sequential waitpid the already forked children. */ - free(child_tests); - return err; + if (intlist__find(skiplist, i)) { + pr_info("%3d: %-*s:", curr + 1, width, test_description(*t, -1)); + color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (user override)\n"); + continue; } - } else { - for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { - int err; - if (!perf_test__matches(test_description(t, subi), + if (!has_subtests(*t)) { + err = start_test(*t, curr, -1, &child_tests[child_test_num++], + width, pass); + if (err) + goto err_out; + continue; + } + for (int subi = 0, subn = num_subtests(*t); subi < subn; subi++) { + if (!perf_test__matches(test_description(*t, subi), curr, argc, argv)) continue; - err = start_test(t, curr, subi, &child_tests[child_test_num++], - width); + err = start_test(*t, curr, subi, &child_tests[child_test_num++], + width, pass); if (err) - return err; + goto err_out; } } - } - for (i = 0; i < child_test_num; i++) { if (!sequential) { - int ret = finish_test(child_tests[i], width); - - if (ret) - return ret; + /* Parallel mode starts tests but doesn't finish them. Do that now. */ + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } - free(child_tests[i]); + } +err_out: + signal(SIGINT, SIG_DFL); + signal(SIGTERM, SIG_DFL); + if (err) { + pr_err("Internal test harness failure. Completing any started tests:\n:"); + for (size_t x = 0; x < num_tests; x++) + finish_test(child_tests, x, num_tests, width); } free(child_tests); - return 0; + return err; } -static int perf_test__list(int argc, const char **argv) +static int perf_test__list(struct test_suite **suites, int argc, const char **argv) { - unsigned int j, k; - struct test_suite *t; int i = 0; - for_each_test(j, k, t) { + for (struct test_suite **t = suites; *t; t++) { int curr = i++; - if (!perf_test__matches(test_description(t, -1), curr, argc, argv)) + if (!perf_test__matches(test_description(*t, -1), curr, argc, argv)) continue; - pr_info("%3d: %s\n", i, test_description(t, -1)); + pr_info("%3d: %s\n", i, test_description(*t, -1)); - if (has_subtests(t)) { - int subn = num_subtests(t); + if (has_subtests(*t)) { + int subn = num_subtests(*t); int subi; for (subi = 0; subi < subn; subi++) pr_info("%3d:%1d: %s\n", i, subi + 1, - test_description(t, subi)); + test_description(*t, subi)); } } return 0; } +static int workloads__fprintf_list(FILE *fp) +{ + struct test_workload *twl; + int printed = 0; + + workloads__for_each(twl) + printed += fprintf(fp, "%s\n", twl->name); + + return printed; +} + static int run_workload(const char *work, int argc, const char **argv) { - unsigned int i = 0; struct test_workload *twl; - for (i = 0; i < ARRAY_SIZE(workloads); i++) { - twl = workloads[i]; + workloads__for_each(twl) { if (!strcmp(twl->name, work)) return twl->func(argc, argv); } @@ -526,6 +649,55 @@ static int perf_test__config(const char *var, const char *value, return 0; } +static struct test_suite **build_suites(void) +{ + /* + * TODO: suites is static to avoid needing to clean up the scripts tests + * for leak sanitizer. + */ + static struct test_suite **suites[] = { + generic_tests, + arch_tests, + NULL, + }; + struct test_suite **result; + struct test_suite *t; + size_t n = 0, num_suites = 0; + + if (suites[2] == NULL) + suites[2] = create_script_test_suites(); + +#define for_each_test(t) \ + for (size_t i = 0, j = 0; i < ARRAY_SIZE(suites); i++, j = 0) \ + while ((t = suites[i][j++]) != NULL) + + for_each_test(t) + num_suites++; + + result = calloc(num_suites + 1, sizeof(struct test_suite *)); + + for (int pass = 1; pass <= 2; pass++) { + for_each_test(t) { + bool exclusive = false; + + if (!has_subtests(t)) { + exclusive = test_exclusive(t, -1); + } else { + for (int subi = 0, subn = num_subtests(t); subi < subn; subi++) { + if (test_exclusive(t, subi)) { + exclusive = true; + break; + } + } + } + if ((!exclusive && pass == 1) || (exclusive && pass == 2)) + result[n++] = t; + } + } + return result; +#undef for_each_test +} + int cmd_test(int argc, const char **argv) { const char *test_usage[] = { @@ -534,16 +706,17 @@ int cmd_test(int argc, const char **argv) }; const char *skip = NULL; const char *workload = NULL; + bool list_workloads = false; const struct option test_options[] = { OPT_STRING('s', "skip", &skip, "tests", "tests to skip"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('F', "dont-fork", &dont_fork, "Do not fork for testcase"), - OPT_BOOLEAN('p', "parallel", ¶llel, "Run the tests in parallel"), OPT_BOOLEAN('S', "sequential", &sequential, "Run the tests one after another rather than in parallel"), - OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"), + OPT_STRING('w', "workload", &workload, "work", "workload to run for testing, use '--list-workloads' to list the available ones."), + OPT_BOOLEAN(0, "list-workloads", &list_workloads, "List the available builtin workloads to use with -w/--workload"), OPT_STRING(0, "dso", &dso_to_test, "dso", "dso to test"), OPT_STRING(0, "objdump", &test_objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -552,6 +725,7 @@ int cmd_test(int argc, const char **argv) const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); + struct test_suite **suites; if (ret < 0) return ret; @@ -561,22 +735,29 @@ int cmd_test(int argc, const char **argv) /* Unbuffered output */ setvbuf(stdout, NULL, _IONBF, 0); - tests[2] = create_script_test_suites(); argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); - if (argc >= 1 && !strcmp(argv[0], "list")) - return perf_test__list(argc - 1, argv + 1); + if (argc >= 1 && !strcmp(argv[0], "list")) { + suites = build_suites(); + ret = perf_test__list(suites, argc - 1, argv + 1); + free(suites); + return ret; + } if (workload) return run_workload(workload, argc, argv); + if (list_workloads) { + workloads__fprintf_list(stdout); + return 0; + } + if (dont_fork) sequential = true; - else if (parallel) - sequential = false; symbol_conf.priv_size = sizeof(int); symbol_conf.try_vmlinux_path = true; + if (symbol__init(NULL) < 0) return -1; @@ -588,5 +769,8 @@ int cmd_test(int argc, const char **argv) */ rlimit__bump_memlock(); - return __cmd_test(argc, argv, skiplist); + suites = build_suites(); + ret = __cmd_test(suites, argc, argv, skiplist); + free(suites); + return ret; } diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c index 44d1be303b67..93c94408bdc8 100644 --- a/tools/perf/tests/demangle-java-test.c +++ b/tools/perf/tests/demangle-java-test.c @@ -2,6 +2,7 @@ #include <string.h> #include <stdlib.h> #include <stdio.h> +#include <linux/kernel.h> #include "tests.h" #include "session.h" #include "debug.h" @@ -28,7 +29,7 @@ static int test__demangle_java(struct test_suite *test __maybe_unused, int subte "void java.lang.Object<init>()" }, }; - for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) { + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { buf = java_demangle_sym(test_cases[i].mangled, 0); if (strcmp(buf, test_cases[i].demangled)) { pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled, diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c index e155f0e0e04d..deefe5003bfc 100644 --- a/tools/perf/tests/event-times.c +++ b/tools/perf/tests/event-times.c @@ -126,6 +126,7 @@ static int attach__cpu_disabled(struct evlist *evlist) evsel->core.attr.disabled = 1; err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err) { if (err == -EACCES) return TEST_SKIP; @@ -134,7 +135,6 @@ static int attach__cpu_disabled(struct evlist *evlist) return err; } - perf_cpu_map__put(cpus); return evsel__enable(evsel); } @@ -153,10 +153,10 @@ static int attach__cpu_enabled(struct evlist *evlist) } err = evsel__open_per_cpu(evsel, cpus, -1); + perf_cpu_map__put(cpus); if (err == -EACCES) return TEST_SKIP; - perf_cpu_map__put(cpus); return err ? TEST_FAIL : TEST_OK; } @@ -188,6 +188,7 @@ static int test_times(int (attach)(struct evlist *), err = attach(evlist); if (err == TEST_SKIP) { pr_debug(" SKIP : not enough rights\n"); + evlist__delete(evlist); return err; } diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c index cf4da3d748c2..226196fb9677 100644 --- a/tools/perf/tests/evsel-tp-sched.c +++ b/tools/perf/tests/evsel-tp-sched.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include "evsel.h" #include "tests.h" #include "debug.h" @@ -36,33 +36,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse int subtest __maybe_unused) { struct evsel *evsel = evsel__newtp("sched", "sched_switch"); - int ret = 0; + int ret = TEST_OK; if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; } if (evsel__test_field(evsel, "prev_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prev_state", sizeof(long), true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "next_prio", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); @@ -70,23 +70,33 @@ static int test__perf_evsel__tp_sched_test(struct test_suite *test __maybe_unuse if (IS_ERR(evsel)) { pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel)); - return -1; + return TEST_FAIL; } if (evsel__test_field(evsel, "comm", 16, false)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "pid", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "prio", 4, true)) - ret = -1; + ret = TEST_FAIL; if (evsel__test_field(evsel, "target_cpu", 4, true)) - ret = -1; + ret = TEST_FAIL; evsel__delete(evsel); return ret; } -DEFINE_SUITE("Parse sched tracepoints fields", perf_evsel__tp_sched_test); +static struct test_case tests__perf_evsel__tp_sched_test[] = { + TEST_CASE_REASON("Parse sched tracepoints fields", + perf_evsel__tp_sched_test, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__perf_evsel__tp_sched_test = { + .desc = "Parse sched tracepoints fields", + .test_cases = tests__perf_evsel__tp_sched_test, +}; diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index e3aa9d4fcf3a..41ff1affdfcd 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -6,6 +6,7 @@ #include "util/header.h" #include "util/smt.h" #include "tests.h" +#include <perf/cpumap.h> #include <math.h> #include <stdlib.h> #include <string.h> @@ -76,8 +77,8 @@ static int test__expr(struct test_suite *t __maybe_unused, int subtest __maybe_u struct expr_parse_ctx *ctx; bool is_intel = false; char strcmp_cpuid_buf[256]; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); char *escaped_cpuid1, *escaped_cpuid2; TEST_ASSERT_VAL("get_cpuid", cpuid); diff --git a/tools/perf/tests/hwmon_pmu.c b/tools/perf/tests/hwmon_pmu.c new file mode 100644 index 000000000000..f8bcee9660d5 --- /dev/null +++ b/tools/perf/tests/hwmon_pmu.c @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "hwmon_pmu.h" +#include "parse-events.h" +#include "tests.h" +#include <fcntl.h> +#include <sys/stat.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/string.h> + +static const struct test_event { + const char *name; + const char *alias; + long config; +} test_events[] = { + { + "temp_test_hwmon_event1", + "temp1", + 0xA0001, + }, + { + "temp_test_hwmon_event2", + "temp2", + 0xA0002, + }, +}; + +/* Cleanup test PMU directory. */ +static int test_pmu_put(const char *dir, struct perf_pmu *hwm) +{ + char buf[PATH_MAX + 20]; + int ret; + + if (scnprintf(buf, sizeof(buf), "rm -fr %s", dir) < 0) { + pr_err("Failure to set up buffer for \"%s\"\n", dir); + return -EINVAL; + } + ret = system(buf); + if (ret) + pr_err("Failure to \"%s\"\n", buf); + + list_del(&hwm->list); + perf_pmu__delete(hwm); + return ret; +} + +/* + * Prepare test PMU directory data, normally exported by kernel at + * /sys/class/hwmon/hwmon<number>/. Give as input a buffer to hold the file + * path, the result is PMU loaded using that directory. + */ +static struct perf_pmu *test_pmu_get(char *dir, size_t sz) +{ + const char *test_hwmon_name_nl = "A test hwmon PMU\n"; + const char *test_hwmon_name = "A test hwmon PMU"; + /* Simulated hwmon items. */ + const struct test_item { + const char *name; + const char *value; + } test_items[] = { + { "temp1_label", "test hwmon event1\n", }, + { "temp1_input", "40000\n", }, + { "temp2_label", "test hwmon event2\n", }, + { "temp2_input", "50000\n", }, + }; + int dirfd, file; + struct perf_pmu *hwm = NULL; + ssize_t len; + + /* Create equivalent of sysfs mount point. */ + scnprintf(dir, sz, "/tmp/perf-hwmon-pmu-test-XXXXXX"); + if (!mkdtemp(dir)) { + pr_err("mkdtemp failed\n"); + dir[0] = '\0'; + return NULL; + } + dirfd = open(dir, O_DIRECTORY); + if (dirfd < 0) { + pr_err("Failed to open test directory \"%s\"\n", dir); + goto err_out; + } + + /* Create the test hwmon directory and give it a name. */ + if (mkdirat(dirfd, "hwmon1234", 0755) < 0) { + pr_err("Failed to mkdir hwmon directory\n"); + goto err_out; + } + file = openat(dirfd, "hwmon1234/name", O_WRONLY | O_CREAT, 0600); + if (!file) { + pr_err("Failed to open for writing file \"name\"\n"); + goto err_out; + } + len = strlen(test_hwmon_name_nl); + if (write(file, test_hwmon_name_nl, len) < len) { + close(file); + pr_err("Failed to write to 'name' file\n"); + goto err_out; + } + close(file); + + /* Create test hwmon files. */ + for (size_t i = 0; i < ARRAY_SIZE(test_items); i++) { + const struct test_item *item = &test_items[i]; + + file = openat(dirfd, item->name, O_WRONLY | O_CREAT, 0600); + if (!file) { + pr_err("Failed to open for writing file \"%s\"\n", item->name); + goto err_out; + } + + if (write(file, item->value, strlen(item->value)) < 0) { + pr_err("Failed to write to file \"%s\"\n", item->name); + close(file); + goto err_out; + } + close(file); + } + + /* Make the PMU reading the files created above. */ + hwm = perf_pmus__add_test_hwmon_pmu(dirfd, "hwmon1234", test_hwmon_name); + if (!hwm) + pr_err("Test hwmon creation failed\n"); + +err_out: + if (!hwm) { + test_pmu_put(dir, hwm); + if (dirfd >= 0) + close(dirfd); + } + return hwm; +} + +static int do_test(size_t i, bool with_pmu, bool with_alias) +{ + const char *test_event = with_alias ? test_events[i].alias : test_events[i].name; + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "hwmon_a_test_hwmon_pmu/%s/", test_event); + else + strlcpy(str, test_event, sizeof(str)); + + pr_debug("Testing '%s'\n", str); + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->pmu || !evsel->pmu->name || + strcmp(evsel->pmu->name, "hwmon_a_test_hwmon_pmu")) + continue; + + if (evsel->core.attr.config != (u64)test_events[i].config) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %ld\n", + __FILE__, __LINE__, str, + evsel->core.attr.config, + test_events[i].config); + ret = TEST_FAIL; + goto out; + } + found = true; + } + + if (!found) { + pr_debug("FAILED %s:%d Didn't find hwmon event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__hwmon_pmu(bool with_pmu) +{ + char dir[PATH_MAX]; + struct perf_pmu *pmu = test_pmu_get(dir, sizeof(dir)); + int ret = TEST_OK; + + if (!pmu) + return TEST_FAIL; + + for (size_t i = 0; i < ARRAY_SIZE(test_events); i++) { + ret = do_test(i, with_pmu, /*with_alias=*/false); + + if (ret != TEST_OK) + break; + + ret = do_test(i, with_pmu, /*with_alias=*/true); + + if (ret != TEST_OK) + break; + } + test_pmu_put(dir, pmu); + return ret; +} + +static int test__hwmon_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/false); +} + +static int test__hwmon_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + return test__hwmon_pmu(/*with_pmu=*/true); +} + +static int test__parse_hwmon_filename(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + const struct hwmon_parse_test { + const char *filename; + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + bool parse_ok; + } tests[] = { + { + .filename = "cpu0_accuracy", + .type = HWMON_TYPE_CPU, + .number = 0, + .item = HWMON_ITEM_ACCURACY, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "temp1_input", + .type = HWMON_TYPE_TEMP, + .number = 1, + .item = HWMON_ITEM_INPUT, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "fan2_vid", + .type = HWMON_TYPE_FAN, + .number = 2, + .item = HWMON_ITEM_VID, + .alarm = false, + .parse_ok = true, + }, + { + .filename = "power3_crit_alarm", + .type = HWMON_TYPE_POWER, + .number = 3, + .item = HWMON_ITEM_CRIT, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "intrusion4_average_interval_min_alarm", + .type = HWMON_TYPE_INTRUSION, + .number = 4, + .item = HWMON_ITEM_AVERAGE_INTERVAL_MIN, + .alarm = true, + .parse_ok = true, + }, + { + .filename = "badtype5_baditem", + .type = HWMON_TYPE_NONE, + .number = 5, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + { + .filename = "humidity6_baditem", + .type = HWMON_TYPE_NONE, + .number = 6, + .item = HWMON_ITEM_NONE, + .alarm = false, + .parse_ok = false, + }, + }; + + for (size_t i = 0; i < ARRAY_SIZE(tests); i++) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + + TEST_ASSERT_EQUAL("parse_hwmon_filename", + parse_hwmon_filename( + tests[i].filename, + &type, + &number, + &item, + &alarm), + tests[i].parse_ok + ); + if (tests[i].parse_ok) { + TEST_ASSERT_EQUAL("parse_hwmon_filename type", type, tests[i].type); + TEST_ASSERT_EQUAL("parse_hwmon_filename number", number, tests[i].number); + TEST_ASSERT_EQUAL("parse_hwmon_filename item", item, tests[i].item); + TEST_ASSERT_EQUAL("parse_hwmon_filename alarm", alarm, tests[i].alarm); + } + } + return TEST_OK; +} + +static struct test_case tests__hwmon_pmu[] = { + TEST_CASE("Basic parsing test", parse_hwmon_filename), + TEST_CASE("Parsing without PMU name", hwmon_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", hwmon_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__hwmon_pmu = { + .desc = "Hwmon PMU", + .test_cases = tests__hwmon_pmu, +}; diff --git a/tools/perf/tests/make b/tools/perf/tests/make index a5040772043f..a7fcbd589752 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -81,7 +81,7 @@ make_no_gtk2 := NO_GTK2=1 make_no_ui := NO_SLANG=1 NO_GTK2=1 make_no_demangle := NO_DEMANGLE=1 make_no_libelf := NO_LIBELF=1 -make_no_libunwind := NO_LIBUNWIND=1 +make_libunwind := LIBUNWIND=1 make_no_libdw_dwarf_unwind := NO_LIBDW_DWARF_UNWIND=1 make_no_backtrace := NO_BACKTRACE=1 make_no_libcapstone := NO_CAPSTONE=1 @@ -121,7 +121,7 @@ make_static := LDFLAGS=-static NO_PERF_READ_VDSO32=1 NO_PERF_READ_VDSOX3 # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_GTK2=1 -make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 +make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1 make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1 @@ -153,7 +153,7 @@ run += make_no_gtk2 run += make_no_ui run += make_no_demangle run += make_no_libelf -run += make_no_libunwind +run += make_libunwind run += make_no_libdw_dwarf_unwind run += make_no_backtrace run += make_no_libcapstone diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 888df8eca981..3943da441979 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -40,7 +40,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused int flags = O_RDONLY | O_DIRECTORY; struct evlist *evlist = evlist__new(); struct evsel *evsel; - int err = -1, i, nr_events = 0, nr_polls = 0; + int ret = TEST_FAIL, err, i, nr_events = 0, nr_polls = 0; char sbuf[STRERR_BUFSIZE]; if (evlist == NULL) { @@ -51,6 +51,7 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused evsel = evsel__newtp("syscalls", "sys_enter_openat"); if (IS_ERR(evsel)) { pr_debug("%s: evsel__newtp\n", __func__); + ret = PTR_ERR(evsel) == -EACCES ? TEST_SKIP : TEST_FAIL; goto out_delete_evlist; } @@ -138,11 +139,21 @@ static int test__syscall_openat_tp_fields(struct test_suite *test __maybe_unused } } out_ok: - err = 0; + ret = TEST_OK; out_delete_evlist: evlist__delete(evlist); out: - return err; + return ret; } -DEFINE_SUITE("syscalls:sys_enter_openat event fields", syscall_openat_tp_fields); +static struct test_case tests__syscall_openat_tp_fields[] = { + TEST_CASE_REASON("syscalls:sys_enter_openat event fields", + syscall_openat_tp_fields, + "permissions"), + { .name = NULL, } +}; + +struct test_suite suite__syscall_openat_tp_fields = { + .desc = "syscalls:sys_enter_openat event fields", + .test_cases = tests__syscall_openat_tp_fields, +}; diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 9e3086d02150..82a19674a38f 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -730,7 +730,7 @@ static int test__checkevent_pmu_events(struct evlist *evlist) TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries); TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type || - strcmp(evsel->pmu_name, "cpu")); + strcmp(evsel->pmu->name, "cpu")); TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", @@ -898,8 +898,7 @@ static int test__group1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -932,7 +931,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -947,7 +946,7 @@ static int test__group2(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); if (evsel__has_leader(evsel, leader)) @@ -1016,9 +1015,8 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ TEST_ASSERT_VAL("wrong exclude guest", - evsel->core.attr.exclude_guest); + !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", @@ -1072,7 +1070,7 @@ static int test__group3(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1103,8 +1101,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1122,8 +1119,7 @@ static int test__group4(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - /* use of precise requires exclude_guest */ - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1222,7 +1218,7 @@ static int test__group5(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel)); @@ -1437,7 +1433,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1453,7 +1449,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong leader", evsel__has_leader(evsel, leader)); @@ -1468,7 +1464,7 @@ static int test__leader_sample1(struct evlist *evlist) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1497,7 +1493,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); @@ -1513,7 +1509,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user); TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel); TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv); - TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest); TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host); TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip); TEST_ASSERT_VAL("wrong group name", !evsel->group_name); diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index be18506f6a24..6a681e3fb552 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -176,7 +176,8 @@ static int test__pmu_format(struct test_suite *test __maybe_unused, int subtest } memset(&attr, 0, sizeof(attr)); - ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, /*err=*/NULL); + ret = perf_pmu__config_terms(pmu, &attr, &terms, /*zero=*/false, + /*apply_hardcoded=*/false, /*err=*/NULL); if (ret) { pr_err("perf_pmu__config_terms failed"); goto err_out; diff --git a/tools/perf/tests/shell/annotate.sh b/tools/perf/tests/shell/annotate.sh index 2ccf4f1d46b6..1590a37363de 100755 --- a/tools/perf/tests/shell/annotate.sh +++ b/tools/perf/tests/shell/annotate.sh @@ -44,7 +44,7 @@ test_basic() { fi # Generate the annotated output file - perf annotate -i "${perfdata}" --stdio 2> /dev/null > "${perfout}" + perf annotate --no-demangle -i "${perfdata}" --stdio 2> /dev/null | head -250 > "${perfout}" # check if it has the target symbol if ! grep "${testsym}" "${perfout}" @@ -63,8 +63,8 @@ test_basic() { fi # check again with a target symbol name - if ! perf annotate -i "${perfdata}" "${testsym}" 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" "${testsym}" 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output when specifying the target symbol]" err=1 @@ -72,8 +72,8 @@ test_basic() { fi # check one more with external objdump tool (forced by --objdump option) - if ! perf annotate -i "${perfdata}" --objdump=objdump 2> /dev/null | \ - grep -m 3 "${disasm_regex}" + if ! perf annotate --no-demangle -i "${perfdata}" --objdump=objdump 2> /dev/null | \ + head -250 | grep -m 3 "${disasm_regex}" then echo "Basic annotate [Failed: missing disasm output from non default disassembler (using --objdump)]" err=1 diff --git a/tools/perf/tests/shell/attr.sh b/tools/perf/tests/shell/attr.sh new file mode 100755 index 000000000000..5a4e43b2471d --- /dev/null +++ b/tools/perf/tests/shell/attr.sh @@ -0,0 +1,22 @@ +#!/bin/bash +# Perf attribute expectations test +# SPDX-License-Identifier: GPL-2.0 + +err=0 + +cleanup() { + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +shelldir=$(dirname "$0") +perf_path=$(which perf) +python "${shelldir}"/lib/attr.py -d "${shelldir}"/attr -v -p "$perf_path" +cleanup +exit $err diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/shell/attr/README index 4066fec7180a..67c4ca76b85d 100644 --- a/tools/perf/tests/attr/README +++ b/tools/perf/tests/shell/attr/README @@ -51,6 +51,8 @@ Following tests are defined (with perf commands): perf record --call-graph fp kill (test-record-graph-fp-aarch64) perf record -e '{cycles,instructions}' kill (test-record-group1) perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2) + perf record -e '{cycles,cache-misses}:S' kill (test-record-group-sampling1) + perf record -c 10000 -e '{cycles,cache-misses}:S' kill (test-record-group-sampling2) perf record -D kill (test-record-no-delay) perf record -i kill (test-record-no-inherit) perf record -n kill (test-record-no-samples) diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/shell/attr/base-record index b44e4e6e4443..b44e4e6e4443 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/shell/attr/base-record diff --git a/tools/perf/tests/attr/base-record-spe b/tools/perf/tests/shell/attr/base-record-spe index 08fa96b59240..08fa96b59240 100644 --- a/tools/perf/tests/attr/base-record-spe +++ b/tools/perf/tests/shell/attr/base-record-spe diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/shell/attr/base-stat index fccd8ec4d1b0..fccd8ec4d1b0 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/shell/attr/base-stat diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/shell/attr/system-wide-dummy index a1e1d6a263bf..a1e1d6a263bf 100644 --- a/tools/perf/tests/attr/system-wide-dummy +++ b/tools/perf/tests/shell/attr/system-wide-dummy diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/shell/attr/test-record-C0 index 198e8429a1bf..1049ac8b52f2 100644 --- a/tools/perf/tests/attr/test-record-C0 +++ b/tools/perf/tests/shell/attr/test-record-C0 @@ -18,5 +18,7 @@ sample_type=65927 mmap=0 comm=0 task=0 +inherit=0 [event:system-wide-dummy] +inherit=0 diff --git a/tools/perf/tests/attr/test-record-basic b/tools/perf/tests/shell/attr/test-record-basic index b0ca42a5ecc9..b0ca42a5ecc9 100644 --- a/tools/perf/tests/attr/test-record-basic +++ b/tools/perf/tests/shell/attr/test-record-basic diff --git a/tools/perf/tests/attr/test-record-branch-any b/tools/perf/tests/shell/attr/test-record-branch-any index 1a99b3ce6b89..1a99b3ce6b89 100644 --- a/tools/perf/tests/attr/test-record-branch-any +++ b/tools/perf/tests/shell/attr/test-record-branch-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any b/tools/perf/tests/shell/attr/test-record-branch-filter-any index 709768b508c6..709768b508c6 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_call b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call index f943221f7825..f943221f7825 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-any_ret b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret index fd4f5b4154a9..fd4f5b4154a9 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-any_ret +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-any_ret diff --git a/tools/perf/tests/attr/test-record-branch-filter-hv b/tools/perf/tests/shell/attr/test-record-branch-filter-hv index 4e52d685ebe1..4e52d685ebe1 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-hv +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-hv diff --git a/tools/perf/tests/attr/test-record-branch-filter-ind_call b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call index e08c6ab3796e..e08c6ab3796e 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-ind_call +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-ind_call diff --git a/tools/perf/tests/attr/test-record-branch-filter-k b/tools/perf/tests/shell/attr/test-record-branch-filter-k index b4b98f84fc2f..b4b98f84fc2f 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-k +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-k diff --git a/tools/perf/tests/attr/test-record-branch-filter-u b/tools/perf/tests/shell/attr/test-record-branch-filter-u index fb9610edbb0d..fb9610edbb0d 100644 --- a/tools/perf/tests/attr/test-record-branch-filter-u +++ b/tools/perf/tests/shell/attr/test-record-branch-filter-u diff --git a/tools/perf/tests/attr/test-record-count b/tools/perf/tests/shell/attr/test-record-count index 5e9b9019d786..5e9b9019d786 100644 --- a/tools/perf/tests/attr/test-record-count +++ b/tools/perf/tests/shell/attr/test-record-count diff --git a/tools/perf/tests/attr/test-record-data b/tools/perf/tests/shell/attr/test-record-data index a99bb13149c2..a99bb13149c2 100644 --- a/tools/perf/tests/attr/test-record-data +++ b/tools/perf/tests/shell/attr/test-record-data diff --git a/tools/perf/tests/attr/test-record-dummy-C0 b/tools/perf/tests/shell/attr/test-record-dummy-C0 index 576ec48b3aaf..91499405fff4 100644 --- a/tools/perf/tests/attr/test-record-dummy-C0 +++ b/tools/perf/tests/shell/attr/test-record-dummy-C0 @@ -19,7 +19,7 @@ sample_period=4000 sample_type=391 read_format=4|20 disabled=0 -inherit=1 +inherit=0 pinned=0 exclusive=0 exclude_user=0 @@ -37,7 +37,7 @@ precise_ip=0 mmap_data=0 sample_id_all=1 exclude_host=0 -exclude_guest=1 +exclude_guest=0 exclude_callchain_kernel=0 exclude_callchain_user=0 mmap2=1 diff --git a/tools/perf/tests/attr/test-record-freq b/tools/perf/tests/shell/attr/test-record-freq index 89e29f6b2ae0..89e29f6b2ae0 100644 --- a/tools/perf/tests/attr/test-record-freq +++ b/tools/perf/tests/shell/attr/test-record-freq diff --git a/tools/perf/tests/attr/test-record-graph-default b/tools/perf/tests/shell/attr/test-record-graph-default index f0a18b4ea4f5..f0a18b4ea4f5 100644 --- a/tools/perf/tests/attr/test-record-graph-default +++ b/tools/perf/tests/shell/attr/test-record-graph-default diff --git a/tools/perf/tests/attr/test-record-graph-default-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 index e98d62efb6f7..e98d62efb6f7 100644 --- a/tools/perf/tests/attr/test-record-graph-default-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-default-aarch64 diff --git a/tools/perf/tests/attr/test-record-graph-dwarf b/tools/perf/tests/shell/attr/test-record-graph-dwarf index ae92061d611d..ae92061d611d 100644 --- a/tools/perf/tests/attr/test-record-graph-dwarf +++ b/tools/perf/tests/shell/attr/test-record-graph-dwarf diff --git a/tools/perf/tests/attr/test-record-graph-fp b/tools/perf/tests/shell/attr/test-record-graph-fp index a6e60e839205..a6e60e839205 100644 --- a/tools/perf/tests/attr/test-record-graph-fp +++ b/tools/perf/tests/shell/attr/test-record-graph-fp diff --git a/tools/perf/tests/attr/test-record-graph-fp-aarch64 b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 index cbeea9971285..cbeea9971285 100644 --- a/tools/perf/tests/attr/test-record-graph-fp-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-graph-fp-aarch64 diff --git a/tools/perf/tests/attr/test-record-group-sampling b/tools/perf/tests/shell/attr/test-record-group-sampling index 97e7e64a38f0..86a940d7895d 100644 --- a/tools/perf/tests/attr/test-record-group-sampling +++ b/tools/perf/tests/shell/attr/test-record-group-sampling @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 @@ -18,7 +19,7 @@ group_fd=1 type=0 config=3 -# default | PERF_SAMPLE_READ +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD sample_type=343 # PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling1 b/tools/perf/tests/shell/attr/test-record-group-sampling1 new file mode 100644 index 000000000000..4748ab7bf684 --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling1 @@ -0,0 +1,50 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ | PERF_SAMPLE_PERIOD +sample_type=343 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 +freq=0 + +# inherit is enabled for group sampling +inherit=1 diff --git a/tools/perf/tests/shell/attr/test-record-group-sampling2 b/tools/perf/tests/shell/attr/test-record-group-sampling2 new file mode 100644 index 000000000000..e0432244a0eb --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group-sampling2 @@ -0,0 +1,61 @@ +[config] +command = record +args = --no-bpf-event -c 10000 -e '{cycles,cache-misses}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 + +# cycles +type=0 +config=0 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=1 +mmap=1 +comm=1 +enable_on_exec=1 +disabled=1 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=10000 +freq=0 +write_backward=0 + +[event-2:base-record] +fd=2 +group_fd=1 + +# cache-misses +type=0 +config=3 + +# default | PERF_SAMPLE_READ +sample_type=87 + +# PERF_FORMAT_ID | PERF_FORMAT_GROUP | PERF_FORMAT_LOST | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING +read_format=28|31 +task=0 +mmap=0 +comm=0 +enable_on_exec=0 +disabled=0 + +# inherit is enabled for group sampling +inherit=1 + +# sampling disabled +sample_freq=0 +sample_period=0 +freq=0 +write_backward=0 diff --git a/tools/perf/tests/attr/test-record-group1 b/tools/perf/tests/shell/attr/test-record-group1 index eeb1db392bc9..eeb1db392bc9 100644 --- a/tools/perf/tests/attr/test-record-group1 +++ b/tools/perf/tests/shell/attr/test-record-group1 diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/shell/attr/test-record-group2 index cebdaa8e64e4..891d41a7bddf 100644 --- a/tools/perf/tests/attr/test-record-group2 +++ b/tools/perf/tests/shell/attr/test-record-group2 @@ -2,6 +2,7 @@ command = record args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 ret = 1 +kernel_until = 6.12 [event-1:base-record] fd=1 diff --git a/tools/perf/tests/shell/attr/test-record-group3 b/tools/perf/tests/shell/attr/test-record-group3 new file mode 100644 index 000000000000..249be884959e --- /dev/null +++ b/tools/perf/tests/shell/attr/test-record-group3 @@ -0,0 +1,31 @@ +[config] +command = record +args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1 +ret = 1 +kernel_since = 6.12 + +[event-1:base-record] +fd=1 +group_fd=-1 +config=0|1 +sample_period=1234000 +sample_type=87 +read_format=28|31 +disabled=1 +inherit=1 +freq=0 + +[event-2:base-record] +fd=2 +group_fd=1 +config=0|1 +sample_period=6789000 +sample_type=87 +read_format=28|31 +disabled=0 +inherit=1 +mmap=0 +comm=0 +freq=0 +enable_on_exec=0 +task=0 diff --git a/tools/perf/tests/attr/test-record-no-buffering b/tools/perf/tests/shell/attr/test-record-no-buffering index 583dcbb078ba..583dcbb078ba 100644 --- a/tools/perf/tests/attr/test-record-no-buffering +++ b/tools/perf/tests/shell/attr/test-record-no-buffering diff --git a/tools/perf/tests/attr/test-record-no-inherit b/tools/perf/tests/shell/attr/test-record-no-inherit index 15d1dc162e1c..15d1dc162e1c 100644 --- a/tools/perf/tests/attr/test-record-no-inherit +++ b/tools/perf/tests/shell/attr/test-record-no-inherit diff --git a/tools/perf/tests/attr/test-record-no-samples b/tools/perf/tests/shell/attr/test-record-no-samples index 596fbd6d5a2c..596fbd6d5a2c 100644 --- a/tools/perf/tests/attr/test-record-no-samples +++ b/tools/perf/tests/shell/attr/test-record-no-samples diff --git a/tools/perf/tests/attr/test-record-period b/tools/perf/tests/shell/attr/test-record-period index 119101154c5e..119101154c5e 100644 --- a/tools/perf/tests/attr/test-record-period +++ b/tools/perf/tests/shell/attr/test-record-period diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/shell/attr/test-record-pfm-period index 368f5b814094..368f5b814094 100644 --- a/tools/perf/tests/attr/test-record-pfm-period +++ b/tools/perf/tests/shell/attr/test-record-pfm-period diff --git a/tools/perf/tests/attr/test-record-raw b/tools/perf/tests/shell/attr/test-record-raw index 13a5f7860c78..13a5f7860c78 100644 --- a/tools/perf/tests/attr/test-record-raw +++ b/tools/perf/tests/shell/attr/test-record-raw diff --git a/tools/perf/tests/attr/test-record-spe-period b/tools/perf/tests/shell/attr/test-record-spe-period index 75f8c9cd8e3f..75f8c9cd8e3f 100644 --- a/tools/perf/tests/attr/test-record-spe-period +++ b/tools/perf/tests/shell/attr/test-record-spe-period diff --git a/tools/perf/tests/attr/test-record-spe-period-term b/tools/perf/tests/shell/attr/test-record-spe-period-term index 8f60a4fec657..8f60a4fec657 100644 --- a/tools/perf/tests/attr/test-record-spe-period-term +++ b/tools/perf/tests/shell/attr/test-record-spe-period-term diff --git a/tools/perf/tests/attr/test-record-spe-physical-address b/tools/perf/tests/shell/attr/test-record-spe-physical-address index 7ebcf5012ce3..7ebcf5012ce3 100644 --- a/tools/perf/tests/attr/test-record-spe-physical-address +++ b/tools/perf/tests/shell/attr/test-record-spe-physical-address diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 index bed765450ca9..bed765450ca9 100644 --- a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-no-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 index 15ebfc3418e3..15ebfc3418e3 100644 --- a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-old-sve-aarch64 diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 index a65113cd7311..a65113cd7311 100644 --- a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 +++ b/tools/perf/tests/shell/attr/test-record-user-regs-sve-aarch64 diff --git a/tools/perf/tests/attr/test-stat-C0 b/tools/perf/tests/shell/attr/test-stat-C0 index a2c76d10b2bb..a2c76d10b2bb 100644 --- a/tools/perf/tests/attr/test-stat-C0 +++ b/tools/perf/tests/shell/attr/test-stat-C0 diff --git a/tools/perf/tests/attr/test-stat-basic b/tools/perf/tests/shell/attr/test-stat-basic index 69867d049fda..69867d049fda 100644 --- a/tools/perf/tests/attr/test-stat-basic +++ b/tools/perf/tests/shell/attr/test-stat-basic diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/shell/attr/test-stat-default index a1e2da0a9a6d..e47fb4944679 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/shell/attr/test-stat-default @@ -88,98 +88,142 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/shell/attr/test-stat-detailed-1 index 1c52cb05c900..3d500d3e0c5c 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-1 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/shell/attr/test-stat-detailed-2 index 7e961d24a885..01777a63752f 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-2 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/shell/attr/test-stat-detailed-3 index e50535f45977..8400abd7e1e4 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/shell/attr/test-stat-detailed-3 @@ -90,99 +90,143 @@ enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) [event13:base-stat] fd=13 group_fd=11 type=4 -config=33280 +config=33024 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-be-bound (0x8300) +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) [event14:base-stat] fd=14 group_fd=11 type=4 -config=33536 +config=33280 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +# PERF_TYPE_RAW / topdown-be-bound (0x8300) [event15:base-stat] fd=15 group_fd=11 type=4 -config=33024 +config=33536 disabled=0 enable_on_exec=0 read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) [event16:base-stat] fd=16 +group_fd=11 type=4 -config=4109 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) [event17:base-stat] fd=17 +group_fd=11 type=4 -config=17039629 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) [event18:base-stat] fd=18 +group_fd=11 type=4 -config=60 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) [event19:base-stat] fd=19 +group_fd=11 type=4 -config=2097421 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK +# PERF_TYPE_RAW / INT_MISC.UOP_DROPPING [event20:base-stat] fd=20 type=4 -config=316 +config=4109 optional=1 -# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +# PERF_TYPE_RAW / cpu/INT_MISC.RECOVERY_CYCLES,cmask=1,edge/ [event21:base-stat] fd=21 type=4 -config=412 +config=17039629 optional=1 -# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.THREAD [event22:base-stat] fd=22 type=4 -config=572 +config=60 optional=1 -# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +# PERF_TYPE_RAW / INT_MISC.RECOVERY_CYCLES_ANY [event23:base-stat] fd=23 type=4 -config=706 +config=2097421 optional=1 -# PERF_TYPE_RAW / UOPS_ISSUED.ANY +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.REF_XCLK [event24:base-stat] fd=24 type=4 +config=316 +optional=1 + +# PERF_TYPE_RAW / IDQ_UOPS_NOT_DELIVERED.CORE +[event25:base-stat] +fd=25 +type=4 +config=412 +optional=1 + +# PERF_TYPE_RAW / CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE +[event26:base-stat] +fd=26 +type=4 +config=572 +optional=1 + +# PERF_TYPE_RAW / UOPS_RETIRED.RETIRE_SLOTS +[event27:base-stat] +fd=27 +type=4 +config=706 +optional=1 + +# PERF_TYPE_RAW / UOPS_ISSUED.ANY +[event28:base-stat] +fd=28 +type=4 config=270 optional=1 @@ -190,8 +234,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event25:base-stat] -fd=25 +[event29:base-stat] +fd=29 type=3 config=0 optional=1 @@ -200,8 +244,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event26:base-stat] -fd=26 +[event30:base-stat] +fd=30 type=3 config=65536 optional=1 @@ -210,8 +254,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event27:base-stat] -fd=27 +[event31:base-stat] +fd=31 type=3 config=2 optional=1 @@ -220,8 +264,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event28:base-stat] -fd=28 +[event32:base-stat] +fd=32 type=3 config=65538 optional=1 @@ -230,8 +274,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event29:base-stat] -fd=29 +[event33:base-stat] +fd=33 type=3 config=1 optional=1 @@ -240,8 +284,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event30:base-stat] -fd=30 +[event34:base-stat] +fd=34 type=3 config=65537 optional=1 @@ -250,8 +294,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event31:base-stat] -fd=31 +[event35:base-stat] +fd=35 type=3 config=3 optional=1 @@ -260,8 +304,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event32:base-stat] -fd=32 +[event36:base-stat] +fd=36 type=3 config=65539 optional=1 @@ -270,8 +314,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event33:base-stat] -fd=33 +[event37:base-stat] +fd=37 type=3 config=4 optional=1 @@ -280,8 +324,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event34:base-stat] -fd=34 +[event38:base-stat] +fd=38 type=3 config=65540 optional=1 @@ -290,8 +334,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event35:base-stat] -fd=35 +[event39:base-stat] +fd=39 type=3 config=512 optional=1 @@ -300,8 +344,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event36:base-stat] -fd=36 +[event40:base-stat] +fd=40 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/attr/test-stat-group1 b/tools/perf/tests/shell/attr/test-stat-group1 index 1746751123dc..1746751123dc 100644 --- a/tools/perf/tests/attr/test-stat-group1 +++ b/tools/perf/tests/shell/attr/test-stat-group1 diff --git a/tools/perf/tests/attr/test-stat-no-inherit b/tools/perf/tests/shell/attr/test-stat-no-inherit index 924fbb9300d1..924fbb9300d1 100644 --- a/tools/perf/tests/attr/test-stat-no-inherit +++ b/tools/perf/tests/shell/attr/test-stat-no-inherit diff --git a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh index b5dc10b2a738..bead723e34af 100755 --- a/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh +++ b/tools/perf/tests/shell/base_probe/test_adding_blacklisted.sh @@ -19,35 +19,74 @@ TEST_RESULT=0 # skip if not supported -BLACKFUNC=`head -n 1 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` -if [ -z "$BLACKFUNC" ]; then +BLACKFUNC_LIST=`head -n 5 /sys/kernel/debug/kprobes/blacklist 2> /dev/null | cut -f2` +if [ -z "$BLACKFUNC_LIST" ]; then print_overall_skipped exit 0 fi +# try to find vmlinux with DWARF debug info +VMLINUX_FILE=$(perf probe -v random_probe |& grep "Using.*for symbols" | sed -r 's/^Using (.*) for symbols$/\1/') + # remove all previously added probes clear_all_probes ### adding blacklisted function - -# functions from blacklist should be skipped by perf probe -! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err -PERF_EXIT_CODE=$? - REGEX_SCOPE_FAIL="Failed to find scope of probe point" REGEX_SKIP_MESSAGE=" is blacklisted function, skip it\." -REGEX_NOT_FOUND_MESSAGE="Probe point \'$BLACKFUNC\' not found." +REGEX_NOT_FOUND_MESSAGE="Probe point \'$RE_EVENT\' not found." REGEX_ERROR_MESSAGE="Error: Failed to add events." REGEX_INVALID_ARGUMENT="Failed to write event: Invalid argument" REGEX_SYMBOL_FAIL="Failed to find symbol at $RE_ADDRESS" -REGEX_OUT_SECTION="$BLACKFUNC is out of \.\w+, skip it" -../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err -CHECK_EXIT_CODE=$? - -print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" -(( TEST_RESULT += $? )) - +REGEX_OUT_SECTION="$RE_EVENT is out of \.\w+, skip it" +REGEX_MISSING_DECL_LINE="A function DIE doesn't have decl_line. Maybe broken DWARF?" + +BLACKFUNC="" +SKIP_DWARF=0 + +for BLACKFUNC in $BLACKFUNC_LIST; do + echo "Probing $BLACKFUNC" + + # functions from blacklist should be skipped by perf probe + ! $CMD_PERF probe $BLACKFUNC > $LOGS_DIR/adding_blacklisted.log 2> $LOGS_DIR/adding_blacklisted.err + PERF_EXIT_CODE=$? + + # check for bad DWARF polluting the result + ../common/check_all_patterns_found.pl "$REGEX_MISSING_DECL_LINE" >/dev/null < $LOGS_DIR/adding_blacklisted.err + + if [ $? -eq 0 ]; then + SKIP_DWARF=1 + echo "Result polluted by broken DWARF, trying another probe" + + # confirm that the broken DWARF comes from assembler + if [ -n "$VMLINUX_FILE" ]; then + readelf -wi "$VMLINUX_FILE" | + awk -v probe="$BLACKFUNC" '/DW_AT_language/ { comp_lang = $0 } + $0 ~ probe { if (comp_lang) { print comp_lang }; exit }' | + grep -q "MIPS assembler" + + CHECK_EXIT_CODE=$? + if [ $CHECK_EXIT_CODE -ne 0 ]; then + SKIP_DWARF=0 # broken DWARF while available + break + fi + fi + else + ../common/check_all_lines_matched.pl "$REGEX_SKIP_MESSAGE" "$REGEX_NOT_FOUND_MESSAGE" "$REGEX_ERROR_MESSAGE" "$REGEX_SCOPE_FAIL" "$REGEX_INVALID_ARGUMENT" "$REGEX_SYMBOL_FAIL" "$REGEX_OUT_SECTION" < $LOGS_DIR/adding_blacklisted.err + CHECK_EXIT_CODE=$? + + SKIP_DWARF=0 + break + fi +done + +if [ $SKIP_DWARF -eq 1 ]; then + print_testcase_skipped "adding blacklisted function $BLACKFUNC" +else + print_results $PERF_EXIT_CODE $CHECK_EXIT_CODE "adding blacklisted function $BLACKFUNC" + (( TEST_RESULT += $? )) +fi ### listing not-added probe diff --git a/tools/perf/tests/shell/coresight/asm_pure_loop.sh b/tools/perf/tests/shell/coresight/asm_pure_loop.sh index 2d65defb7e0f..c63bc8c73e26 100755 --- a/tools/perf/tests/shell/coresight/asm_pure_loop.sh +++ b/tools/perf/tests/shell/coresight/asm_pure_loop.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / ASM Pure Loop +# CoreSight / ASM Pure Loop (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh index ddcc9bb850f5..8e29630957c8 100755 --- a/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh +++ b/tools/perf/tests/shell/coresight/memcpy_thread_16k_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Memcpy 16k 10 Threads +# CoreSight / Memcpy 16k 10 Threads (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh index 2ce5e139b2fd..0c4c82a1c8e1 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 10 Threads - Check TID +# CoreSight / Thread Loop 10 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh index 3ad9498753d7..d3aea9fc6ced 100755 --- a/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh +++ b/tools/perf/tests/shell/coresight/thread_loop_check_tid_2.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Thread Loop 2 Threads - Check TID +# CoreSight / Thread Loop 2 Threads - Check TID (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh index 4fbb4a29aad3..7429d3a2ae43 100755 --- a/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh +++ b/tools/perf/tests/shell/coresight/unroll_loop_thread_10.sh @@ -1,5 +1,5 @@ #!/bin/sh -e -# CoreSight / Unroll Loop Thread 10 +# CoreSight / Unroll Loop Thread 10 (exclusive) # SPDX-License-Identifier: GPL-2.0 # Carsten Haitzler <carsten.haitzler@arm.com>, 2021 diff --git a/tools/perf/tests/shell/ftrace.sh b/tools/perf/tests/shell/ftrace.sh index a6ee740f0d7e..2df05052c324 100755 --- a/tools/perf/tests/shell/ftrace.sh +++ b/tools/perf/tests/shell/ftrace.sh @@ -67,12 +67,12 @@ test_ftrace_latency() { test_ftrace_profile() { echo "perf ftrace profile test" - perf ftrace profile sleep 0.1 > "${output}" + perf ftrace profile -m 16M sleep 0.1 > "${output}" grep ^# "${output}" grep sleep "${output}" grep schedule "${output}" grep execve "${output}" - time_re="[[:space:]]+10[[:digit:]]{4}\.[[:digit:]]{3}" + time_re="[[:space:]]+1[[:digit:]]{5}\.[[:digit:]]{3}" # 100283.000 100283.000 100283.000 1 __x64_sys_clock_nanosleep # Check for one *clock_nanosleep line with a Count of just 1 that takes a bit more than 0.1 seconds # Strip the _x64_sys part to work with other architectures diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/shell/lib/attr.py index e890c261ad26..3db9a7d78715 100644 --- a/tools/perf/tests/attr.py +++ b/tools/perf/tests/shell/lib/attr.py @@ -246,6 +246,23 @@ class Test(object): return False return True + def restore_sample_rate(self, value=10000): + try: + # Check value of sample_rate + with open("/proc/sys/kernel/perf_event_max_sample_rate", "r") as fIn: + curr_value = fIn.readline() + # If too low restore to reasonable value + if not curr_value or int(curr_value) < int(value): + with open("/proc/sys/kernel/perf_event_max_sample_rate", "w") as fOut: + fOut.write(str(value)) + + except IOError as e: + log.warning("couldn't restore sample_rate value: I/O error %s" % e) + except ValueError as e: + log.warning("couldn't restore sample_rate value: Value error %s" % e) + except TypeError as e: + log.warning("couldn't restore sample_rate value: Type error %s" % e) + def load_events(self, path, events): parser_event = configparser.ConfigParser() parser_event.read(path) @@ -283,6 +300,7 @@ class Test(object): if self.skip_test_kernel_until(): raise Notest(self, "new kernel skip") + self.restore_sample_rate() cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir, self.perf, self.command, tempdir, self.args) ret = os.WEXITSTATUS(os.system(cmd)) diff --git a/tools/perf/tests/shell/lib/coresight.sh b/tools/perf/tests/shell/lib/coresight.sh index 11ed2c25ed91..184d62e7e5bd 100644 --- a/tools/perf/tests/shell/lib/coresight.sh +++ b/tools/perf/tests/shell/lib/coresight.sh @@ -18,7 +18,7 @@ BIN="$DIR/$TEST" # If the test tool/binary does not exist and is executable then skip the test if ! test -x "$BIN"; then exit 2; fi # If CoreSight is not available, skip the test -perf list cs_etm | grep -q cs_etm || exit 2 +perf list pmu | grep -q cs_etm || exit 2 DATD="." # If the data dir env is set then make the data dir use that instead of ./ if test -n "$PERF_TEST_CORESIGHT_DATADIR"; then diff --git a/tools/perf/tests/shell/lib/perf_json_output_lint.py b/tools/perf/tests/shell/lib/perf_json_output_lint.py index abc1fd737782..8ddb85586131 100644 --- a/tools/perf/tests/shell/lib/perf_json_output_lint.py +++ b/tools/perf/tests/shell/lib/perf_json_output_lint.py @@ -57,6 +57,7 @@ def check_json_output(expected_items): 'interval': lambda x: isfloat(x), 'metric-unit': lambda x: True, 'metric-value': lambda x: isfloat(x), + 'metric-threshold': lambda x: x in ['unknown', 'good', 'less good', 'nearly bad', 'bad'], 'metricgroup': lambda x: True, 'node': lambda x: True, 'pcnt-running': lambda x: isfloat(x), @@ -68,13 +69,15 @@ def check_json_output(expected_items): for item in json.loads(input): if expected_items != -1: count = len(item) - if count != expected_items and count >= 1 and count <= 6 and 'metric-value' in item: + if count != expected_items and count >= 1 and count <= 7 and 'metric-value' in item: # Events that generate >1 metric may have isolated metric # values and possibly other prefixes like interval, core, # aggregate-number, or event-runtime/pcnt-running from multiplexing. pass elif count != expected_items and count >= 1 and count <= 5 and 'metricgroup' in item: pass + elif count == expected_items + 1 and 'metric-threshold' in item: + pass elif count != expected_items: raise RuntimeError(f'wrong number of fields. counted {count} expected {expected_items}' f' in \'{item}\'') diff --git a/tools/perf/tests/shell/list.sh b/tools/perf/tests/shell/list.sh index 8a868ae64560..76a9846cff22 100755 --- a/tools/perf/tests/shell/list.sh +++ b/tools/perf/tests/shell/list.sh @@ -24,8 +24,11 @@ trap trap_cleanup EXIT TERM INT test_list_json() { echo "Json output test" + # Generate perf list json output into list_output file. perf list -j -o "${list_output}" - $PYTHON -m json.tool "${list_output}" + # Validate the json using python, redirect the json copy to /dev/null as + # otherwise the test may block writing to stdout. + $PYTHON -m json.tool "${list_output}" /dev/null echo "Json output test [Success]" } diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index c1ec5762215b..30d195d4c62f 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -27,7 +27,7 @@ check() { exit fi - if ! perf list | grep -q lock:contention_begin; then + if ! perf list tracepoint | grep -q lock:contention_begin; then echo "[Skip] No lock contention tracepoints" err=2 exit diff --git a/tools/perf/tests/shell/perftool-testsuite_report.sh b/tools/perf/tests/shell/perftool-testsuite_report.sh index 973012ce92a7..a8cf75b4e77e 100755 --- a/tools/perf/tests/shell/perftool-testsuite_report.sh +++ b/tools/perf/tests/shell/perftool-testsuite_report.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perftool-testsuite_report +# perftool-testsuite_report (exclusive) # SPDX-License-Identifier: GPL-2.0 test -d "$(dirname "$0")/base_report" || exit 2 diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh index d4c8005ce9b9..e459aa99a951 100755 --- a/tools/perf/tests/shell/pipe_test.sh +++ b/tools/perf/tests/shell/pipe_test.sh @@ -13,6 +13,7 @@ skip_test_missing_symbol ${sym} data=$(mktemp /tmp/perf.data.XXXXXX) data2=$(mktemp /tmp/perf.data2.XXXXXX) prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" err=0 set -e diff --git a/tools/perf/tests/shell/probe_vfs_getname.sh b/tools/perf/tests/shell/probe_vfs_getname.sh index 554e12e83c55..0c5aacc446b3 100755 --- a/tools/perf/tests/shell/probe_vfs_getname.sh +++ b/tools/perf/tests/shell/probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Add vfs_getname probe to get syscall args filenames +# Add vfs_getname probe to get syscall args filenames (exclusive) # SPDX-License-Identifier: GPL-2.0 # Arnaldo Carvalho de Melo <acme@kernel.org>, 2017 diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index f38c8ead0b03..47a26f25db9f 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -40,8 +40,8 @@ trace_libc_inet_pton_backtrace() { case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' - echo "(__GI_)?getaddrinfo\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected - echo "main\+0x[[:xdigit:]]+[[:space:]]\(.*/bin/ping.*\)$" >> $expected + echo "((__GI_)?getaddrinfo|text_to_binary_address)\+0x[[:xdigit:]]+[[:space:]]\($libc|inlined\)$" >> $expected + echo "(gaih_inet|main)\+0x[[:xdigit:]]+[[:space:]]\(inlined|.*/bin/ping.*\)$" >> $expected ;; ppc64|ppc64le) eventattr='max-stack=4' diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh index 9a61928e3c9a..5940fdc1df37 100755 --- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh +++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Use vfs_getname probe to get syscall args filenames +# Use vfs_getname probe to get syscall args filenames (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf record' using 'touch' to write to a temp file, then diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 048078ee2eca..0fc7a909ae9b 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record tests +# perf record tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -17,6 +17,7 @@ skip_test_missing_symbol ${testsym} err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +script_output=$(mktemp /tmp/__perf_test.perf.data.XXXXX.script) testprog="perf test -w thloop" cpu_pmu_dir="/sys/bus/event_source/devices/cpu*" br_cntr_file="/caps/branch_counter_nr" @@ -93,7 +94,7 @@ test_per_thread() { test_register_capture() { echo "Register capture test" - if ! perf list | grep -q 'br_inst_retired.near_call' + if ! perf list pmu | grep -q 'br_inst_retired.near_call' then echo "Register capture test [Skipped missing event]" return @@ -228,6 +229,73 @@ test_cgroup() { echo "Cgroup sampling test [Success]" } +test_leader_sampling() { + echo "Basic leader sampling test" + if ! perf record -o "${perfdata}" -e "{instructions,instructions}:Su" -- \ + perf test -w brstack 2> /dev/null + then + echo "Leader sampling [Failed record]" + err=1 + return + fi + index=0 + perf script -i "${perfdata}" > $script_output + while IFS= read -r line + do + # Check if the two instruction counts are equal in each record + instructions=$(echo $line | awk '{for(i=1;i<=NF;i++) if($i=="instructions:") print $(i-1)}') + if [ $(($index%2)) -ne 0 ] && [ ${instructions}x != ${prev_instructions}x ] + then + echo "Leader sampling [Failed inconsistent instructions count]" + err=1 + return + fi + index=$(($index+1)) + prev_instructions=$instructions + done < $script_output + echo "Basic leader sampling test [Success]" +} + +test_topdown_leader_sampling() { + echo "Topdown leader sampling test" + if ! perf stat -e "{slots,topdown-retiring}" true 2> /dev/null + then + echo "Topdown leader sampling [Skipped event parsing failed]" + return + fi + if ! perf record -o "${perfdata}" -e "{instructions,slots,topdown-retiring}:S" true 2> /dev/null + then + echo "Topdown leader sampling [Failed topdown events not reordered correctly]" + err=1 + return + fi + echo "Topdown leader sampling test [Success]" +} + +test_precise_max() { + echo "precise_max attribute test" + if ! perf stat -e "cycles,instructions" true 2> /dev/null + then + echo "precise_max attribute [Skipped no hardware events]" + return + fi + # Just to make sure it doesn't fail + if ! perf record -o "${perfdata}" -e "cycles:P" true 2> /dev/null + then + echo "precise_max attribute [Failed cycles:P event]" + err=1 + return + fi + # On AMD, cycles and instructions events are treated differently + if ! perf record -o "${perfdata}" -e "instructions:P" true 2> /dev/null + then + echo "precise_max attribute [Failed instructions:P event]" + err=1 + return + fi + echo "precise_max attribute test [Success]" +} + # raise the limit of file descriptors to minimum if [[ $default_fd_limit -lt $min_fd_limit ]]; then ulimit -Sn $min_fd_limit @@ -239,6 +307,9 @@ test_system_wide test_workload test_branch_counter test_cgroup +test_leader_sampling +test_topdown_leader_sampling +test_precise_max # restore the default value ulimit -Sn $default_fd_limit diff --git a/tools/perf/tests/shell/record_lbr.sh b/tools/perf/tests/shell/record_lbr.sh index 32314641217e..8d750ee631f8 100755 --- a/tools/perf/tests/shell/record_lbr.sh +++ b/tools/perf/tests/shell/record_lbr.sh @@ -1,5 +1,5 @@ #!/bin/bash -# perf record LBR tests +# perf record LBR tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh index 67c925f3a15a..678947fe69ee 100755 --- a/tools/perf/tests/shell/record_offcpu.sh +++ b/tools/perf/tests/shell/record_offcpu.sh @@ -1,5 +1,5 @@ #!/bin/sh -# perf record offcpu profiling tests +# perf record offcpu profiling tests (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh index 3f1e67795490..7a8adf81e4b3 100755 --- a/tools/perf/tests/shell/stat.sh +++ b/tools/perf/tests/shell/stat.sh @@ -73,9 +73,33 @@ test_topdown_groups() { err=1 return fi - if perf stat -e '{topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + if perf stat -e 'instructions,topdown-retiring,slots' true 2>&1 | grep -E -q "<not supported>" then - echo "Topdown event group test [Failed slots not reordered first]" + echo "Topdown event group test [Failed slots not reordered first in no-group case]" + err=1 + return + fi + if perf stat -e '{instructions,topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed slots not reordered first in single group case]" + err=1 + return + fi + if perf stat -e '{instructions,slots},topdown-retiring' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics event not move into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,slots},{topdown-retiring}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed topdown metrics group not merge into slots group]" + err=1 + return + fi + if perf stat -e '{instructions,r400,r8000}' true 2>&1 | grep -E -q "<not supported>" + then + echo "Topdown event group test [Failed raw format slots not reordered first]" err=1 return fi @@ -117,16 +141,18 @@ test_cputype() { # Find a known PMU for cputype. pmu="" - for i in cpu cpu_atom armv8_pmuv3_0 + devs="/sys/bus/event_source/devices" + for i in $devs/cpu $devs/cpu_atom $devs/armv8_pmuv3_0 $devs/armv8_cortex_* do - if test -d "/sys/devices/$i" + i_base=$(basename "$i") + if test -d "$i" then - pmu="$i" + pmu="$i_base" break fi - if perf stat -e "$i/instructions/" true > /dev/null 2>&1 + if perf stat -e "$i_base/instructions/" true > /dev/null 2>&1 then - pmu="$i" + pmu="$i_base" break fi done @@ -146,6 +172,30 @@ test_cputype() { echo "cputype test [Success]" } +test_hybrid() { + # Test the default stat command on hybrid devices opens one cycles event for + # each CPU type. + echo "hybrid test" + + # Count the number of core PMUs, assume minimum of 1 + pmus=$(ls /sys/bus/event_source/devices/*/cpus 2>/dev/null | wc -l) + if [ "$pmus" -lt 1 ] + then + pmus=1 + fi + + # Run default Perf stat + cycles_events=$(perf stat -- true 2>&1 | grep -E "/cycles/[uH]*| cycles[:uH]* " -c) + + if [ "$pmus" -ne "$cycles_events" ] + then + echo "hybrid test [Found $pmus PMUs but $cycles_events cycles events. Failed]" + err=1 + return + fi + echo "hybrid test [Success]" +} + test_default_stat test_stat_record_report test_stat_record_script @@ -153,4 +203,5 @@ test_stat_repeat_weak_groups test_topdown_groups test_topdown_weak_groups test_cputype +test_hybrid exit $err diff --git a/tools/perf/tests/shell/stat_all_metricgroups.sh b/tools/perf/tests/shell/stat_all_metricgroups.sh index 55ef9c9ded2d..c6d61a4ac3e7 100755 --- a/tools/perf/tests/shell/stat_all_metricgroups.sh +++ b/tools/perf/tests/shell/stat_all_metricgroups.sh @@ -1,9 +1,7 @@ -#!/bin/sh +#!/bin/bash # perf all metricgroups test # SPDX-License-Identifier: GPL-2.0 -set -e - ParanoidAndNotRoot() { [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] @@ -14,11 +12,37 @@ if ParanoidAndNotRoot 0 then system_wide_flag="" fi - +err=0 for m in $(perf list --raw-dump metricgroups) do echo "Testing $m" - perf stat -M "$m" $system_wide_flag sleep 0.01 + result=$(perf stat -M "$m" $system_wide_flag sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] + then + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + else + echo "Metric group $m failed" + echo $result + err=1 # Fail + fi + fi done -exit 0 +exit $err diff --git a/tools/perf/tests/shell/stat_all_metrics.sh b/tools/perf/tests/shell/stat_all_metrics.sh index 54774525e18a..73e9347e88a9 100755 --- a/tools/perf/tests/shell/stat_all_metrics.sh +++ b/tools/perf/tests/shell/stat_all_metrics.sh @@ -2,42 +2,87 @@ # perf all metrics test # SPDX-License-Identifier: GPL-2.0 +ParanoidAndNotRoot() +{ + [ "$(id -u)" != 0 ] && [ "$(cat /proc/sys/kernel/perf_event_paranoid)" -gt $1 ] +} + +system_wide_flag="-a" +if ParanoidAndNotRoot 0 +then + system_wide_flag="" +fi + err=0 for m in $(perf list --raw-dump metrics); do echo "Testing $m" - result=$(perf stat -M "$m" true 2>&1) - if [[ "$result" =~ ${m:0:50} ]] || [[ "$result" =~ "<not supported>" ]] + result=$(perf stat -M "$m" $system_wide_flag -- sleep 0.01 2>&1) + result_err=$? + if [[ $result_err -gt 0 ]] then - continue + if [[ "$result" =~ \ + "Access to performance monitoring and observability operations is limited" ]] + then + echo "Permission failure" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "in per-thread mode, enable system wide" ]] + then + echo "Permissions - need system wide mode" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "<not supported>" ]] + then + echo "Not supported events" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] + then + echo "FP issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + elif [[ "$result" =~ "PMM" ]] + then + echo "Optane memory issues" + echo $result + if [[ $err -eq 0 ]] + then + err=2 # Skip + fi + continue + fi fi - # Failed so try system wide. - result=$(perf stat -M "$m" -a sleep 0.01 2>&1) + if [[ "$result" =~ ${m:0:50} ]] then continue fi - # Failed again, possibly the workload was too small so retry with something - # longer. - result=$(perf stat -M "$m" perf bench internals synthesize 2>&1) + + # Failed, possibly the workload was too small so retry with something longer. + result=$(perf stat -M "$m" $system_wide_flag -- perf bench internals synthesize 2>&1) if [[ "$result" =~ ${m:0:50} ]] then continue fi echo "Metric '$m' not printed in:" echo "$result" - if [[ "$err" != "1" ]] - then - err=2 - if [[ "$result" =~ "FP_ARITH" || "$result" =~ "AMX" ]] - then - echo "Skip, not fail, for FP issues" - elif [[ "$result" =~ "PMM" ]] - then - echo "Skip, not fail, for Optane memory issues" - else - err=1 - fi - fi + err=1 done exit "$err" diff --git a/tools/perf/tests/shell/stat_all_pmu.sh b/tools/perf/tests/shell/stat_all_pmu.sh index d2a3506e0d19..8b148b300be1 100755 --- a/tools/perf/tests/shell/stat_all_pmu.sh +++ b/tools/perf/tests/shell/stat_all_pmu.sh @@ -1,23 +1,51 @@ -#!/bin/sh -# perf all PMU test +#!/bin/bash +# perf all PMU test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e +err=0 +result="" + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + echo "$result" + exit 1 +} +trap trap_cleanup EXIT TERM INT # Test all PMU events; however exclude parameterized ones (name contains '?') -for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g'); do +for p in $(perf list --raw-dump pmu | sed 's/[[:graph:]]\+?[[:graph:]]\+[[:space:]]//g') +do echo "Testing $p" result=$(perf stat -e "$p" true 2>&1) - if ! echo "$result" | grep -q "$p" && ! echo "$result" | grep -q "<not supported>" ; then - # We failed to see the event and it is supported. Possibly the workload was - # too small so retry with something longer. - result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) - if ! echo "$result" | grep -q "$p" ; then - echo "Event '$p' not printed in:" - echo "$result" - exit 1 - fi + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue + fi + if echo "$result" | grep -q "<not supported>" + then + # Event not supported, so ignore. + continue + fi + if echo "$result" | grep -q "Access to performance monitoring and observability operations is limited." + then + # Access is limited, so ignore. + continue + fi + + # We failed to see the event and it is supported. Possibly the workload was + # too small so retry with something longer. + result=$(perf stat -e "$p" perf bench internals synthesize 2>&1) + if echo "$result" | grep -q "$p" + then + # Event seen in output. + continue fi + echo "Error: event '$p' not printed in:" + echo "$result" + err=1 done -exit 0 +trap - EXIT TERM INT +exit $err diff --git a/tools/perf/tests/shell/stat_bpf_counters.sh b/tools/perf/tests/shell/stat_bpf_counters.sh index f250b7d6f773..95d2ad5d17c6 100755 --- a/tools/perf/tests/shell/stat_bpf_counters.sh +++ b/tools/perf/tests/shell/stat_bpf_counters.sh @@ -1,10 +1,10 @@ #!/bin/sh -# perf stat --bpf-counters test +# perf stat --bpf-counters test (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e -workload="perf test -w brstack" +workload="perf test -w sqrtloop" # check whether $2 is within +/- 20% of $1 compare_number() diff --git a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh index e75d0780dc78..2ec69060c42f 100755 --- a/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh +++ b/tools/perf/tests/shell/stat_bpf_counters_cgrp.sh @@ -58,22 +58,9 @@ check_system_wide_counted() fi } -check_cpu_list_counted() -{ - check_cpu_list_counted_output=$(perf stat -C 0,1 --bpf-counters --for-each-cgroup ${test_cgroups} -e cpu-clock -x, taskset -c 1 sleep 1 2>&1) - if echo ${check_cpu_list_counted_output} | grep -q -F "<not "; then - echo "Some CPU events are not counted" - if [ "${verbose}" = "1" ]; then - echo ${check_cpu_list_counted_output} - fi - exit 1 - fi -} - check_bpf_counter find_cgroups check_system_wide_counted -check_cpu_list_counted exit 0 diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh index 3302ea0b9672..573af9235b72 100755 --- a/tools/perf/tests/shell/test_arm_coresight.sh +++ b/tools/perf/tests/shell/test_arm_coresight.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm CoreSight trace data recording and synthesized samples +# Check Arm CoreSight trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data with Arm CoreSight sinks; # then verify if there have any branch samples and instruction samples @@ -12,7 +12,7 @@ glb_err=0 skip_if_no_cs_etm_event() { - perf list | grep -q 'cs_etm//' && return 0 + perf list pmu | grep -q 'cs_etm//' && return 0 # cs_etm event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_coresight_disasm.sh b/tools/perf/tests/shell/test_arm_coresight_disasm.sh new file mode 100755 index 000000000000..be2d26303f94 --- /dev/null +++ b/tools/perf/tests/shell/test_arm_coresight_disasm.sh @@ -0,0 +1,65 @@ +#!/bin/sh +# Check Arm CoreSight disassembly script completes without errors (exclusive) +# SPDX-License-Identifier: GPL-2.0 + +# The disassembly script reconstructs ranges of instructions and gives these to objdump to +# decode. objdump doesn't like ranges that go backwards, but these are a good indication +# that decoding has gone wrong either in OpenCSD, Perf or in the range reconstruction in +# the script. Test all 3 parts are working correctly by running the script. + +skip_if_no_cs_etm_event() { + perf list pmu | grep -q 'cs_etm//' && return 0 + + # cs_etm event doesn't exist + return 2 +} + +skip_if_no_cs_etm_event || exit 2 + +# Assume an error unless we reach the very end +set -e +glb_err=1 + +perfdata_dir=$(mktemp -d /tmp/__perf_test.perf.data.XXXXX) +perfdata=${perfdata_dir}/perf.data +file=$(mktemp /tmp/temporary_file.XXXXX) +# Relative path works whether it's installed or running from repo +script_path=$(dirname "$0")/../../scripts/python/arm-cs-trace-disasm.py + +cleanup_files() +{ + set +e + rm -rf ${perfdata_dir} + rm -f ${file} + trap - EXIT TERM INT + exit $glb_err +} + +trap cleanup_files EXIT TERM INT + +# Ranges start and end on branches, so check for some likely branch instructions +sep="\s\|\s" +branch_search="\sbl${sep}b${sep}b.ne${sep}b.eq${sep}cbz\s" + +## Test kernel ## +if [ -e /proc/kcore ]; then + echo "Testing kernel disassembly" + perf record -o ${perfdata} -e cs_etm//k --kcore -- touch $file > /dev/null 2>&1 + perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} + grep -q -e ${branch_search} ${file} + echo "Found kernel branches" +else + # kcore is required for correct kernel decode due to runtime code patching + echo "No kcore, skipping kernel test" +fi + +## Test user ## +echo "Testing userspace disassembly" +perf record -o ${perfdata} -e cs_etm//u -- touch $file > /dev/null 2>&1 +perf script -i ${perfdata} -s python:${script_path} -- \ + -d --stop-sample=30 2> /dev/null > ${file} +grep -q -e ${branch_search} ${file} +echo "Found userspace branches" + +glb_err=0 diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh index 03d5c7d12ee5..3258368634f7 100755 --- a/tools/perf/tests/shell/test_arm_spe.sh +++ b/tools/perf/tests/shell/test_arm_spe.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check Arm SPE trace data recording and synthesized samples +# Check Arm SPE trace data recording and synthesized samples (exclusive) # Uses the 'perf record' to record trace data of Arm SPE events; # then verify if any SPE event samples are generated by SPE with @@ -9,7 +9,7 @@ # German Gomez <german.gomez@arm.com>, 2021 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 # arm_spe event doesn't exist return 2 diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh index 1a7e6a82d0e3..8efeef9fb956 100755 --- a/tools/perf/tests/shell/test_arm_spe_fork.sh +++ b/tools/perf/tests/shell/test_arm_spe_fork.sh @@ -5,7 +5,7 @@ # German Gomez <german.gomez@arm.com>, 2022 skip_if_no_arm_spe_event() { - perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0 + perf list pmu | grep -E -q 'arm_spe_[0-9]+//' && return 0 return 2 } diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh index 3dfa91832aa8..c86da0235059 100755 --- a/tools/perf/tests/shell/test_data_symbol.sh +++ b/tools/perf/tests/shell/test_data_symbol.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Test data symbol +# Test data symbol (exclusive) # SPDX-License-Identifier: GPL-2.0 # Leo Yan <leo.yan@linaro.org>, 2022 diff --git a/tools/perf/tests/shell/test_intel_pt.sh b/tools/perf/tests/shell/test_intel_pt.sh index 723ec501f99a..e6f0070975f6 100755 --- a/tools/perf/tests/shell/test_intel_pt.sh +++ b/tools/perf/tests/shell/test_intel_pt.sh @@ -1,11 +1,11 @@ #!/bin/sh -# Miscellaneous Intel PT testing +# Miscellaneous Intel PT testing (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e # Skip if no Intel PT -perf list | grep -q 'intel_pt//' || exit 2 +perf list pmu | grep -q 'intel_pt//' || exit 2 shelldir=$(dirname "$0") # shellcheck source=lib/waiting.sh diff --git a/tools/perf/tests/shell/test_stat_intel_tpebs.sh b/tools/perf/tests/shell/test_stat_intel_tpebs.sh index c60b29add980..f95fc64bf0a7 100755 --- a/tools/perf/tests/shell/test_stat_intel_tpebs.sh +++ b/tools/perf/tests/shell/test_stat_intel_tpebs.sh @@ -1,5 +1,5 @@ #!/bin/bash -# test Intel TPEBS counting mode +# test Intel TPEBS counting mode (exclusive) # SPDX-License-Identifier: GPL-2.0 set -e @@ -8,12 +8,15 @@ grep -q GenuineIntel /proc/cpuinfo || { echo Skipping non-Intel; exit 2; } # Use this event for testing because it should exist in all platforms event=cache-misses:R +# Hybrid platforms output like "cpu_atom/cache-misses/R", rather than as above +alt_name=/cache-misses/R + # Without this cmd option, default value or zero is returned -echo "Testing without --record-tpebs" -result=$(perf stat -e "$event" true 2>&1) -[[ "$result" =~ $event ]] || exit 1 +#echo "Testing without --record-tpebs" +#result=$(perf stat -e "$event" true 2>&1) +#[[ "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 # In platforms that do not support TPEBS, it should execute without error. echo "Testing with --record-tpebs" result=$(perf stat -e "$event" --record-tpebs -a sleep 0.01 2>&1) -[[ "$result" =~ "perf record" && "$result" =~ $event ]] || exit 1 +[[ "$result" =~ "perf record" && "$result" =~ $event || "$result" =~ $alt_name ]] || exit 1 diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 3146a1eece07..708a13f00635 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -1,5 +1,5 @@ #!/bin/sh -# Check open filename arg using perf trace + vfs_getname +# Check open filename arg using perf trace + vfs_getname (exclusive) # Uses the 'perf test shell' library to add probe:vfs_getname to the system # then use it with 'perf trace' using 'touch' to write to a temp file, then @@ -19,7 +19,7 @@ skip_if_no_perf_trace || exit 2 . "$(dirname $0)"/lib/probe_vfs_getname.sh trace_open_vfs_getname() { - evts="$(echo "$(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" + evts="$(echo "$(perf list tracepoint 2>/dev/null | grep -E 'syscalls:sys_enter_open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/')" | sed ':a;N;s:\n:,:g')" perf trace -e $evts touch $file 2>&1 | \ grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +\"?${file}\"?, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } diff --git a/tools/perf/tests/shell/trace_exit_race.sh b/tools/perf/tests/shell/trace_exit_race.sh new file mode 100755 index 000000000000..fbb0adc33a88 --- /dev/null +++ b/tools/perf/tests/shell/trace_exit_race.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# perf trace exit race +# SPDX-License-Identifier: GPL-2.0 + +# Check that the last events of a perf trace'd subprocess are not +# lost. Specifically, trace the exiting syscall of "true" 10 times and ensure +# the output contains 10 correct lines. + +# shellcheck source=lib/probe.sh +. "$(dirname $0)"/lib/probe.sh + +skip_if_no_perf_trace || exit 2 + +if [ "$1" = "-v" ]; then + verbose="1" +fi + +iter=10 +regexp=" +[0-9]+\.[0-9]+ [0-9]+ syscalls:sys_enter_exit_group\(\)$" + +trace_shutdown_race() { + for _ in $(seq $iter); do + perf trace --no-comm -e syscalls:sys_enter_exit_group true 2>>$file + done + result="$(grep -c -E "$regexp" $file)" + [ $result = $iter ] +} + + +file=$(mktemp /tmp/temporary_file.XXXXX) + +# Do not use whatever ~/.perfconfig file, it may change the output +# via trace.{show_timestamp,show_prefix,etc} +export PERF_CONFIG=/dev/null + +trace_shutdown_race +err=$? + +if [ $err != 0 ] && [ "${verbose}" = "1" ]; then + lines_not_matching=$(mktemp /tmp/temporary_file.XXXXX) + if grep -v -E "$regexp" $file > $lines_not_matching ; then + echo "Lines not matching the expected regexp: '$regexp':" + cat $lines_not_matching + else + echo "Missing output, expected $iter but only got $result" + fi + rm -f $lines_not_matching +fi + +rm -f ${file} +exit $err diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index d33d0952025c..8e328bbd509d 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -152,4 +152,11 @@ out_delete_evlist: return err; } -DEFINE_SUITE("Number of exit events of a simple workload", task_exit); +struct test_case tests__task_exit[] = { + TEST_CASE_EXCLUSIVE("Number of exit events of a simple workload", task_exit), + { .name = NULL, } +}; +struct test_suite suite__task_exit = { + .desc = "Number of exit events of a simple workload", + .test_cases = tests__task_exit, +}; diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index ed114b044293..cf3ae0c1d871 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -175,6 +175,7 @@ static void append_script(int dir_fd, const char *name, char *desc, struct test_suite *test_suite, **result_tmp; struct test_case *tests; size_t len; + char *exclusive; snprintf(link, sizeof(link), "/proc/%d/fd/%d", getpid(), dir_fd); len = readlink(link, filename, sizeof(filename)); @@ -191,9 +192,13 @@ static void append_script(int dir_fd, const char *name, char *desc, return; } tests[0].name = strdup_check(name); + exclusive = strstr(desc, " (exclusive)"); + if (exclusive != NULL) { + tests[0].exclusive = true; + exclusive[0] = '\0'; + } tests[0].desc = strdup_check(desc); tests[0].run_case = shell_test__run; - test_suite = zalloc(sizeof(*test_suite)); if (!test_suite) { pr_err("Out of memory while building script test suite list\n"); diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 6ea2be86b7bf..cb58b43aa063 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -36,6 +36,7 @@ struct test_case { const char *desc; const char *skip_reason; test_fnptr run_case; + bool exclusive; }; struct test_suite { @@ -62,6 +63,14 @@ struct test_suite { .skip_reason = _reason, \ } +#define TEST_CASE_EXCLUSIVE(description, _name) \ + { \ + .name = #_name, \ + .desc = description, \ + .run_case = test__##_name, \ + .exclusive = true, \ + } + #define DEFINE_SUITE(description, _name) \ struct test_case tests__##_name[] = { \ TEST_CASE(description, _name), \ @@ -83,6 +92,8 @@ DECLARE_SUITE(perf_evsel__tp_sched_test); DECLARE_SUITE(syscall_openat_tp_fields); DECLARE_SUITE(pmu); DECLARE_SUITE(pmu_events); +DECLARE_SUITE(hwmon_pmu); +DECLARE_SUITE(tool_pmu); DECLARE_SUITE(attr); DECLARE_SUITE(dso_data); DECLARE_SUITE(dso_data_cache); diff --git a/tools/perf/tests/tool_pmu.c b/tools/perf/tests/tool_pmu.c new file mode 100644 index 000000000000..187942b749b7 --- /dev/null +++ b/tools/perf/tests/tool_pmu.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "debug.h" +#include "evlist.h" +#include "parse-events.h" +#include "tests.h" +#include "tool_pmu.h" + +static int do_test(enum tool_pmu_event ev, bool with_pmu) +{ + struct evlist *evlist = evlist__new(); + struct evsel *evsel; + struct parse_events_error err; + int ret; + char str[128]; + bool found = false; + + if (!evlist) { + pr_err("evlist allocation failed\n"); + return TEST_FAIL; + } + + if (with_pmu) + snprintf(str, sizeof(str), "tool/%s/", tool_pmu__event_to_str(ev)); + else + snprintf(str, sizeof(str), "%s", tool_pmu__event_to_str(ev)); + + parse_events_error__init(&err); + ret = parse_events(evlist, str, &err); + if (ret) { + if (tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + ret = TEST_OK; + goto out; + } + + pr_debug("FAILED %s:%d failed to parse event '%s', err %d\n", + __FILE__, __LINE__, str, ret); + parse_events_error__print(&err, str); + ret = TEST_FAIL; + goto out; + } + + ret = TEST_OK; + if (with_pmu ? (evlist->core.nr_entries != 1) : (evlist->core.nr_entries < 1)) { + pr_debug("FAILED %s:%d Unexpected number of events for '%s' of %d\n", + __FILE__, __LINE__, str, evlist->core.nr_entries); + ret = TEST_FAIL; + goto out; + } + + evlist__for_each_entry(evlist, evsel) { + if (perf_pmu__is_tool(evsel->pmu)) { + if (evsel->core.attr.config != ev) { + pr_debug("FAILED %s:%d Unexpected config for '%s', %lld != %d\n", + __FILE__, __LINE__, str, evsel->core.attr.config, ev); + ret = TEST_FAIL; + goto out; + } + found = true; + } + } + + if (!found && !tool_pmu__skip_event(tool_pmu__event_to_str(ev))) { + pr_debug("FAILED %s:%d Didn't find tool event '%s' in parsed evsels\n", + __FILE__, __LINE__, str); + ret = TEST_FAIL; + } + +out: + parse_events_error__exit(&err); + evlist__delete(evlist); + return ret; +} + +static int test__tool_pmu_without_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/false); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static int test__tool_pmu_with_pmu(struct test_suite *test __maybe_unused, + int subtest __maybe_unused) +{ + int i; + + tool_pmu__for_each_event(i) { + int ret = do_test(i, /*with_pmu=*/true); + + if (ret != TEST_OK) + return ret; + } + return TEST_OK; +} + +static struct test_case tests__tool_pmu[] = { + TEST_CASE("Parsing without PMU name", tool_pmu_without_pmu), + TEST_CASE("Parsing with PMU name", tool_pmu_with_pmu), + { .name = NULL, } +}; + +struct test_suite suite__tool_pmu = { + .desc = "Tool PMU", + .test_cases = tests__tool_pmu, +}; diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h index 13aea8fc3d45..47051871b436 100644 --- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h +++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h @@ -18,8 +18,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... LOCAL_TIMER_VECTOR-1 - * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts + * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts + * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh index 456f59addf74..e3f13f96a27c 100755 --- a/tools/perf/trace/beauty/fs_at_flags.sh +++ b/tools/perf/trace/beauty/fs_at_flags.sh @@ -13,9 +13,14 @@ printf "static const char *fs_at_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' # AT_EACCESS is only meaningful to faccessat, so we will special case it there... # AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC +# AT_HANDLE_FID and AT_HANDLE_MNT_ID_UNIQUE are reusing values and are valid only for name_to_handle_at() +# AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2() grep -E $regex ${linux_fcntl} | \ grep -v AT_EACCESS | \ grep -v AT_STATX_SYNC_TYPE | \ + grep -v AT_HANDLE_FID | \ + grep -v AT_HANDLE_MNT_ID_UNIQUE | \ + grep -v AT_RENAME_NOREPLACE | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index c0bcc185fa48..87e2dec79fea 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -16,6 +16,9 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) +/* Was the file just created? */ +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) + /* * Cancel a blocking posix lock; internal use only until we expose an * asynchronous lock api to userspace: @@ -87,37 +90,70 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value for dirfd used to + indicate openat should use the + current working directory. */ + + +/* Generic flags for the *at(2) family of syscalls. */ + +/* Reserved for per-syscall flags 0xff. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic + links. */ +/* Reserved for per-syscall flags 0x200 */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount + traversal. */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative + pathname to operate on dirfd + directly. */ +/* + * These flags are currently statx(2)-specific, but they could be made generic + * in the future and so they should not be used for other per-syscall flags. + */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to - * unlinkat. The two functions do completely different things and therefore, - * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to - * faccessat would be undefined behavior and thus treating it equivalent to - * AT_EACCESS is valid undefined behavior. + * Per-syscall flags for the *at(2) family of syscalls. + * + * These are flags that are so syscall-specific that a user passing these flags + * to the wrong syscall is so "clearly wrong" that we can safely call such + * usage "undefined behaviour". + * + * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value. + * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful + * only to unlinkat. The two functions do completely different things and + * therefore, the flags can be allowed to overlap. For example, passing + * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it + * equivalent to AT_EACCESS is valid undefined behavior. + * + * Note for implementers: When picking a new per-syscall AT_* flag, try to + * reuse already existing flags first. This leaves us with as many unused bits + * as possible, so we can use them for generic bits in the future if necessary. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ + +/* Flags for renameat2(2) (must match legacy RENAME_* flags). */ +#define AT_RENAME_NOREPLACE 0x0001 +#define AT_RENAME_EXCHANGE 0x0002 +#define AT_RENAME_WHITEOUT 0x0004 + +/* Flag for faccessat(2). */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ +/* Flag for unlinkat(2). */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ -#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ -#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ -#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ - -#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ -#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ -#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ -#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ - -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +/* Flags for name_to_handle_at(2). */ +#define AT_HANDLE_FID 0x200 /* File handle is needed to compare + object identity and may not be + usable with open_by_handle_at(2). */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ -#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to - compare object identity and may not - be usable to open_by_handle_at(2) */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 #endif diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h index 3bac0a8ceab2..359a14cc76a4 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/sched.h +++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h @@ -118,6 +118,7 @@ struct clone_args { /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 #define SCHED_DEADLINE 6 +#define SCHED_EXT 7 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 8bf7e8a0eb6f..4cd513215bcd 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -869,7 +869,7 @@ struct snd_ump_block_info { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) enum { SNDRV_TIMER_CLASS_NONE = -1, @@ -894,6 +894,7 @@ enum { #define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */ #define SNDRV_TIMER_GLOBAL_HPET 2 #define SNDRV_TIMER_GLOBAL_HRTIMER 3 +#define SNDRV_TIMER_GLOBAL_UDRIVEN 4 /* info flags */ #define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ @@ -974,6 +975,18 @@ struct snd_timer_status { }; #endif +/* + * This structure describes the userspace-driven timer. Such timers are purely virtual, + * and can only be triggered from software (for instance, by userspace application). + */ +struct snd_timer_uinfo { + /* To pretend being a normal timer, we need to know the resolution in ns. */ + __u64 resolution; + int fd; + unsigned int id; + unsigned char reserved[16]; +}; + #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) #define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int) @@ -990,6 +1003,8 @@ struct snd_timer_status { #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) #define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int) +#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo) +#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6) #if __BITS_PER_LONG == 64 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index ed3ff969b546..2da581ff0c80 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -11,6 +11,9 @@ #ifndef MSG_BATCH #define MSG_BATCH 0x40000 #endif +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif @@ -57,6 +60,7 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, P_MSG_FLAG(MORE); P_MSG_FLAG(WAITFORONE); P_MSG_FLAG(BATCH); + P_MSG_FLAG(SOCK_DEVMEM); P_MSG_FLAG(ZEROCOPY); P_MSG_FLAG(SPLICE_PAGES); P_MSG_FLAG(FASTOPEN); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index dc616292b2dd..c06d2ee9024c 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -83,6 +83,8 @@ perf-util-y += pmu.o perf-util-y += pmus.o perf-util-y += pmu-flex.o perf-util-y += pmu-bison.o +perf-util-y += hwmon_pmu.o +perf-util-y += tool_pmu.o perf-util-y += svghelper.o perf-util-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o perf-util-y += trace-event-scripting.o @@ -199,11 +201,14 @@ ifndef CONFIG_SETNS perf-util-y += setns.o endif -perf-util-$(CONFIG_DWARF) += probe-finder.o -perf-util-$(CONFIG_DWARF) += dwarf-aux.o -perf-util-$(CONFIG_DWARF) += dwarf-regs.o -perf-util-$(CONFIG_DWARF) += debuginfo.o -perf-util-$(CONFIG_DWARF) += annotate-data.o +perf-util-$(CONFIG_LIBDW) += probe-finder.o +perf-util-$(CONFIG_LIBDW) += dwarf-aux.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-csky.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-powerpc.o +perf-util-$(CONFIG_LIBDW) += dwarf-regs-x86.o +perf-util-$(CONFIG_LIBDW) += debuginfo.o +perf-util-$(CONFIG_LIBDW) += annotate-data.o perf-util-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o perf-util-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind-local.o @@ -234,7 +239,7 @@ perf-util-$(CONFIG_LIBLLVM) += llvm-c-helpers.o ifdef CONFIG_JITDUMP perf-util-$(CONFIG_LIBELF) += jitdump.o perf-util-$(CONFIG_LIBELF) += genelf.o -perf-util-$(CONFIG_DWARF) += genelf_debug.o +perf-util-$(CONFIG_LIBDW) += genelf_debug.o endif perf-util-y += perf-hooks.o diff --git a/tools/perf/util/annotate-data.h b/tools/perf/util/annotate-data.h index 8ac0fd94a0ba..98c80b2268dd 100644 --- a/tools/perf/util/annotate-data.h +++ b/tools/perf/util/annotate-data.h @@ -9,7 +9,7 @@ #include "dwarf-regs.h" #include "annotate.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "debuginfo.h" #endif @@ -165,7 +165,7 @@ struct annotated_data_stat { }; extern struct annotated_data_stat ann_data_stat; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* * Type information in a register, valid when @ok is true. * The @caller_saved registers are invalidated after a function call. @@ -244,7 +244,7 @@ bool get_global_var_info(struct data_loc_info *dloc, u64 addr, const char **var_name, int *var_offset); void pr_debug_type_name(Dwarf_Die *die, enum type_state_kind kind); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline struct annotated_data_type * find_data_type(struct data_loc_info *dloc __maybe_unused) @@ -276,7 +276,7 @@ static inline int hist_entry__annotate_data_tty(struct hist_entry *he __maybe_un return -1; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_SLANG_SUPPORT int hist_entry__annotate_data_tui(struct hist_entry *he, struct evsel *evsel, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 37ce43c4eb8f..32e15c9f53f3 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2116,6 +2116,12 @@ static int annotation__config(const char *var, const char *value, void *data) opt->offset_level = ANNOTATION__MAX_OFFSET_LEVEL; else if (opt->offset_level < ANNOTATION__MIN_OFFSET_LEVEL) opt->offset_level = ANNOTATION__MIN_OFFSET_LEVEL; + } else if (!strcmp(var, "annotate.disassemblers")) { + opt->disassemblers_str = strdup(value); + if (!opt->disassemblers_str) { + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; + } } else if (!strcmp(var, "annotate.hide_src_code")) { opt->hide_src_code = perf_config_bool("hide_src_code", value); } else if (!strcmp(var, "annotate.jump_arrows")) { @@ -2292,7 +2298,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg1 = get_dwarf_regnum(regname, 0); + op_loc->reg1 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); /* Get the second register */ @@ -2305,7 +2311,7 @@ static int extract_reg_offset(struct arch *arch, const char *str, if (regname == NULL) return -1; - op_loc->reg2 = get_dwarf_regnum(regname, 0); + op_loc->reg2 = get_dwarf_regnum(regname, arch->e_machine, arch->e_flags); free(regname); } return 0; @@ -2405,7 +2411,7 @@ int annotate_get_insn_location(struct arch *arch, struct disasm_line *dl, return -1; if (*s == arch->objdump.register_char) - op_loc->reg1 = get_dwarf_regnum(s, 0); + op_loc->reg1 = get_dwarf_regnum(s, arch->e_machine, arch->e_flags); else if (*s == arch->objdump.imm_char) { op_loc->offset = strtol(s + 1, &p, 0); if (p && p != s + 1) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 8b9e05a1932f..194a05cbc506 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -34,6 +34,9 @@ struct annotated_data_type; #define ANNOTATION__BR_CNTR_WIDTH 30 #define ANNOTATION_DUMMY_LEN 256 +// llvm, capstone, objdump +#define MAX_DISASSEMBLERS 3 + struct annotation_options { bool hide_src_code, use_offset, @@ -49,11 +52,14 @@ struct annotation_options { annotate_src, full_addr; u8 offset_level; + u8 nr_disassemblers; int min_pcnt; int max_lines; int context; char *objdump_path; char *disassembler_style; + const char *disassemblers_str; + const char *disassemblers[MAX_DISASSEMBLERS]; const char *prefix; const char *prefix_strip; unsigned int percent_type; diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h index 1443c28545a9..358c611eeddb 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h +++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h @@ -56,15 +56,15 @@ enum arm_spe_op_type { ARM_SPE_OP_BR_INDIRECT = 1 << 17, }; -enum arm_spe_neoverse_data_source { - ARM_SPE_NV_L1D = 0x0, - ARM_SPE_NV_L2 = 0x8, - ARM_SPE_NV_PEER_CORE = 0x9, - ARM_SPE_NV_LOCAL_CLUSTER = 0xa, - ARM_SPE_NV_SYS_CACHE = 0xb, - ARM_SPE_NV_PEER_CLUSTER = 0xc, - ARM_SPE_NV_REMOTE = 0xd, - ARM_SPE_NV_DRAM = 0xe, +enum arm_spe_common_data_source { + ARM_SPE_COMMON_DS_L1D = 0x0, + ARM_SPE_COMMON_DS_L2 = 0x8, + ARM_SPE_COMMON_DS_PEER_CORE = 0x9, + ARM_SPE_COMMON_DS_LOCAL_CLUSTER = 0xa, + ARM_SPE_COMMON_DS_SYS_CACHE = 0xb, + ARM_SPE_COMMON_DS_PEER_CLUSTER = 0xc, + ARM_SPE_COMMON_DS_REMOTE = 0xd, + ARM_SPE_COMMON_DS_DRAM = 0xe, }; struct arm_spe_record { diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 7bf607d0f6d8..4cef10a83962 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -11,7 +11,7 @@ #include <linux/bitops.h> #include <stdarg.h> #include <linux/kernel.h> -#include <asm-generic/unaligned.h> +#include <linux/unaligned.h> #include "arm-spe-pkt-decoder.h" diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c index 138ffc71b32d..dbf13f47879c 100644 --- a/tools/perf/util/arm-spe.c +++ b/tools/perf/util/arm-spe.c @@ -46,7 +46,6 @@ struct arm_spe { struct perf_session *session; struct machine *machine; u32 pmu_type; - u64 midr; struct perf_tsc_conversion tc; @@ -69,7 +68,7 @@ struct arm_spe { u64 llc_access_id; u64 tlb_miss_id; u64 tlb_access_id; - u64 branch_miss_id; + u64 branch_id; u64 remote_access_id; u64 memory_id; u64 instructions_id; @@ -78,6 +77,11 @@ struct arm_spe { unsigned long num_events; u8 use_ctx_pkt_for_pid; + + u64 **metadata; + u64 metadata_ver; + u64 metadata_nr_cpu; + bool is_homogeneous; }; struct arm_spe_queue { @@ -96,6 +100,7 @@ struct arm_spe_queue { u64 timestamp; struct thread *thread; u64 period_instructions; + u32 flags; }; static void arm_spe_dump(struct arm_spe *spe __maybe_unused, @@ -118,7 +123,7 @@ static void arm_spe_dump(struct arm_spe *spe __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -273,6 +278,20 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid) return 0; } +static u64 *arm_spe__get_metadata_by_cpu(struct arm_spe *spe, u64 cpu) +{ + u64 i; + + if (!spe->metadata) + return NULL; + + for (i = 0; i < spe->metadata_nr_cpu; i++) + if (spe->metadata[i][ARM_SPE_CPU] == cpu) + return spe->metadata[i]; + + return NULL; +} + static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record) { struct simd_flags simd_flags = {}; @@ -376,6 +395,7 @@ static int arm_spe__synth_branch_sample(struct arm_spe_queue *speq, sample.stream_id = spe_events_id; sample.addr = record->to_ip; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } @@ -400,24 +420,44 @@ static int arm_spe__synth_instruction_sample(struct arm_spe_queue *speq, sample.id = spe_events_id; sample.stream_id = spe_events_id; - sample.addr = record->virt_addr; + sample.addr = record->to_ip; sample.phys_addr = record->phys_addr; sample.data_src = data_src; sample.period = spe->instructions_sample_period; sample.weight = record->latency; + sample.flags = speq->flags; return arm_spe_deliver_synth_event(spe, speq, event, &sample); } -static const struct midr_range neoverse_spe[] = { +static const struct midr_range common_ds_encoding_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), {}, }; -static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__sample_flags(struct arm_spe_queue *speq) +{ + const struct arm_spe_record *record = &speq->decoder->record; + + speq->flags = 0; + if (record->op & ARM_SPE_OP_BRANCH_ERET) { + speq->flags = PERF_IP_FLAG_BRANCH; + + if (record->type & ARM_SPE_BRANCH_MISS) + speq->flags |= PERF_IP_FLAG_BRANCH_MISS; + } +} + +static void arm_spe__synth_data_source_common(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { /* * Even though four levels of cache hierarchy are possible, no known @@ -439,17 +479,17 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } switch (record->source) { - case ARM_SPE_NV_L1D: + case ARM_SPE_COMMON_DS_L1D: data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L1; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_L2: + case ARM_SPE_COMMON_DS_L2: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; break; - case ARM_SPE_NV_PEER_CORE: + case ARM_SPE_COMMON_DS_PEER_CORE: data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L2; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -458,8 +498,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know if this is L1, L2 but we do know it was a cache-2-cache * transfer, so set SNOOPX_PEER */ - case ARM_SPE_NV_LOCAL_CLUSTER: - case ARM_SPE_NV_PEER_CLUSTER: + case ARM_SPE_COMMON_DS_LOCAL_CLUSTER: + case ARM_SPE_COMMON_DS_PEER_CLUSTER: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; @@ -467,7 +507,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec /* * System cache is assumed to be L3 */ - case ARM_SPE_NV_SYS_CACHE: + case ARM_SPE_COMMON_DS_SYS_CACHE: data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_L3; data_src->mem_snoop = PERF_MEM_SNOOP_HIT; @@ -476,13 +516,13 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec * We don't know what level it hit in, except it came from the other * socket */ - case ARM_SPE_NV_REMOTE: + case ARM_SPE_COMMON_DS_REMOTE: data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1; data_src->mem_lvl_num = PERF_MEM_LVLNUM_ANY_CACHE; data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; data_src->mem_snoopx = PERF_MEM_SNOOPX_PEER; break; - case ARM_SPE_NV_DRAM: + case ARM_SPE_COMMON_DS_DRAM: data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; data_src->mem_lvl_num = PERF_MEM_LVLNUM_RAM; data_src->mem_snoop = PERF_MEM_SNOOP_NONE; @@ -492,8 +532,8 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec } } -static void arm_spe__synth_data_source_generic(const struct arm_spe_record *record, - union perf_mem_data_src *data_src) +static void arm_spe__synth_memory_level(const struct arm_spe_record *record, + union perf_mem_data_src *data_src) { if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) { data_src->mem_lvl = PERF_MEM_LVL_L3; @@ -515,10 +555,55 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco data_src->mem_lvl |= PERF_MEM_LVL_REM_CCE1; } -static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr) +static bool arm_spe__is_common_ds_encoding(struct arm_spe_queue *speq) +{ + struct arm_spe *spe = speq->spe; + bool is_in_cpu_list; + u64 *metadata = NULL; + u64 midr = 0; + + /* Metadata version 1 assumes all CPUs are the same (old behavior) */ + if (spe->metadata_ver == 1) { + const char *cpuid; + + pr_warning_once("Old SPE metadata, re-record to improve decode accuracy\n"); + cpuid = perf_env__cpuid(spe->session->evlist->env); + midr = strtol(cpuid, NULL, 16); + } else { + /* CPU ID is -1 for per-thread mode */ + if (speq->cpu < 0) { + /* + * On the heterogeneous system, due to CPU ID is -1, + * cannot confirm the data source packet is supported. + */ + if (!spe->is_homogeneous) + return false; + + /* In homogeneous system, simply use CPU0's metadata */ + if (spe->metadata) + metadata = spe->metadata[0]; + } else { + metadata = arm_spe__get_metadata_by_cpu(spe, speq->cpu); + } + + if (!metadata) + return false; + + midr = metadata[ARM_SPE_CPU_MIDR]; + } + + is_in_cpu_list = is_midr_in_range_list(midr, common_ds_encoding_cpus); + if (is_in_cpu_list) + return true; + else + return false; +} + +static u64 arm_spe__synth_data_source(struct arm_spe_queue *speq, + const struct arm_spe_record *record) { union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA }; - bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe); + bool is_common = arm_spe__is_common_ds_encoding(speq); if (record->op & ARM_SPE_OP_LD) data_src.mem_op = PERF_MEM_OP_LOAD; @@ -527,10 +612,10 @@ static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 m else return 0; - if (is_neoverse) - arm_spe__synth_data_source_neoverse(record, &data_src); + if (is_common) + arm_spe__synth_data_source_common(record, &data_src); else - arm_spe__synth_data_source_generic(record, &data_src); + arm_spe__synth_memory_level(record, &data_src); if (record->type & (ARM_SPE_TLB_ACCESS | ARM_SPE_TLB_MISS)) { data_src.mem_dtlb = PERF_MEM_TLB_WK; @@ -551,7 +636,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) u64 data_src; int err; - data_src = arm_spe__synth_data_source(record, spe->midr); + arm_spe__sample_flags(speq); + data_src = arm_spe__synth_data_source(speq, record); if (spe->sample_flc) { if (record->type & ARM_SPE_L1D_MISS) { @@ -601,8 +687,8 @@ static int arm_spe_sample(struct arm_spe_queue *speq) } } - if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) { - err = arm_spe__synth_branch_sample(speq, spe->branch_miss_id); + if (spe->sample_branch && (record->op & ARM_SPE_OP_BRANCH_ERET)) { + err = arm_spe__synth_branch_sample(speq, spe->branch_id); if (err) return err; } @@ -1016,6 +1102,73 @@ static int arm_spe_flush(struct perf_session *session __maybe_unused, return 0; } +static u64 *arm_spe__alloc_per_cpu_metadata(u64 *buf, int per_cpu_size) +{ + u64 *metadata; + + metadata = zalloc(per_cpu_size); + if (!metadata) + return NULL; + + memcpy(metadata, buf, per_cpu_size); + return metadata; +} + +static void arm_spe__free_metadata(u64 **metadata, int nr_cpu) +{ + int i; + + for (i = 0; i < nr_cpu; i++) + zfree(&metadata[i]); + free(metadata); +} + +static u64 **arm_spe__alloc_metadata(struct perf_record_auxtrace_info *info, + u64 *ver, int *nr_cpu) +{ + u64 *ptr = (u64 *)info->priv; + u64 metadata_size; + u64 **metadata = NULL; + int hdr_sz, per_cpu_sz, i; + + metadata_size = info->header.size - + sizeof(struct perf_record_auxtrace_info); + + /* Metadata version 1 */ + if (metadata_size == ARM_SPE_AUXTRACE_V1_PRIV_SIZE) { + *ver = 1; + *nr_cpu = 0; + /* No per CPU metadata */ + return NULL; + } + + *ver = ptr[ARM_SPE_HEADER_VERSION]; + hdr_sz = ptr[ARM_SPE_HEADER_SIZE]; + *nr_cpu = ptr[ARM_SPE_CPUS_NUM]; + + metadata = calloc(*nr_cpu, sizeof(*metadata)); + if (!metadata) + return NULL; + + /* Locate the start address of per CPU metadata */ + ptr += hdr_sz; + per_cpu_sz = (metadata_size - (hdr_sz * sizeof(u64))) / (*nr_cpu); + + for (i = 0; i < *nr_cpu; i++) { + metadata[i] = arm_spe__alloc_per_cpu_metadata(ptr, per_cpu_sz); + if (!metadata[i]) + goto err_per_cpu_metadata; + + ptr += per_cpu_sz / sizeof(u64); + } + + return metadata; + +err_per_cpu_metadata: + arm_spe__free_metadata(metadata, *nr_cpu); + return NULL; +} + static void arm_spe_free_queue(void *priv) { struct arm_spe_queue *speq = priv; @@ -1050,6 +1203,7 @@ static void arm_spe_free(struct perf_session *session) auxtrace_heap__free(&spe->heap); arm_spe_free_events(session); session->auxtrace = NULL; + arm_spe__free_metadata(spe->metadata, spe->metadata_nr_cpu); free(spe); } @@ -1061,16 +1215,60 @@ static bool arm_spe_evsel_is_auxtrace(struct perf_session *session, return evsel->core.attr.type == spe->pmu_type; } -static const char * const arm_spe_info_fmts[] = { - [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +static const char * const metadata_hdr_v1_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_PER_CPU_MMAPS] = " Per CPU mmaps :%"PRId64"\n", +}; + +static const char * const metadata_hdr_fmts[] = { + [ARM_SPE_HEADER_VERSION] = " Header version :%"PRId64"\n", + [ARM_SPE_HEADER_SIZE] = " Header size :%"PRId64"\n", + [ARM_SPE_PMU_TYPE_V2] = " PMU type v2 :%"PRId64"\n", + [ARM_SPE_CPUS_NUM] = " CPU number :%"PRId64"\n", +}; + +static const char * const metadata_per_cpu_fmts[] = { + [ARM_SPE_MAGIC] = " Magic :0x%"PRIx64"\n", + [ARM_SPE_CPU] = " CPU # :%"PRId64"\n", + [ARM_SPE_CPU_NR_PARAMS] = " Num of params :%"PRId64"\n", + [ARM_SPE_CPU_MIDR] = " MIDR :0x%"PRIx64"\n", + [ARM_SPE_CPU_PMU_TYPE] = " PMU Type :%"PRId64"\n", + [ARM_SPE_CAP_MIN_IVAL] = " Min Interval :%"PRId64"\n", }; -static void arm_spe_print_info(__u64 *arr) +static void arm_spe_print_info(struct arm_spe *spe, __u64 *arr) { + unsigned int i, cpu, hdr_size, cpu_num, cpu_size; + const char * const *hdr_fmts; + if (!dump_trace) return; - fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); + if (spe->metadata_ver == 1) { + cpu_num = 0; + hdr_size = ARM_SPE_AUXTRACE_V1_PRIV_MAX; + hdr_fmts = metadata_hdr_v1_fmts; + } else { + cpu_num = arr[ARM_SPE_CPUS_NUM]; + hdr_size = arr[ARM_SPE_HEADER_SIZE]; + hdr_fmts = metadata_hdr_fmts; + } + + for (i = 0; i < hdr_size; i++) + fprintf(stdout, hdr_fmts[i], arr[i]); + + arr += hdr_size; + for (cpu = 0; cpu < cpu_num; cpu++) { + /* + * The parameters from ARM_SPE_MAGIC to ARM_SPE_CPU_NR_PARAMS + * are fixed. The sequential parameter size is decided by the + * field 'ARM_SPE_CPU_NR_PARAMS'. + */ + cpu_size = (ARM_SPE_CPU_NR_PARAMS + 1) + arr[ARM_SPE_CPU_NR_PARAMS]; + for (i = 0; i < cpu_size; i++) + fprintf(stdout, metadata_per_cpu_fmts[i], arr[i]); + arr += cpu_size; + } } static void arm_spe_set_event_name(struct evlist *evlist, u64 id, @@ -1202,12 +1400,12 @@ arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session) if (spe->synth_opts.branches) { spe->sample_branch = true; - /* Branch miss */ + /* Branch */ err = perf_session__deliver_synth_attr_event(session, &attr, id); if (err) return err; - spe->branch_miss_id = id; - arm_spe_set_event_name(evlist, id, "branch-miss"); + spe->branch_id = id; + arm_spe_set_event_name(evlist, id, "branch"); id += 1; } @@ -1258,24 +1456,57 @@ synth_instructions_out: return 0; } +static bool arm_spe__is_homogeneous(u64 **metadata, int nr_cpu) +{ + u64 midr; + int i; + + if (!nr_cpu) + return false; + + for (i = 0; i < nr_cpu; i++) { + if (!metadata[i]) + return false; + + if (i == 0) { + midr = metadata[i][ARM_SPE_CPU_MIDR]; + continue; + } + + if (midr != metadata[i][ARM_SPE_CPU_MIDR]) + return false; + } + + return true; +} + int arm_spe_process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info; - size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX; + size_t min_sz = ARM_SPE_AUXTRACE_V1_PRIV_SIZE; struct perf_record_time_conv *tc = &session->time_conv; - const char *cpuid = perf_env__cpuid(session->evlist->env); - u64 midr = strtol(cpuid, NULL, 16); struct arm_spe *spe; - int err; + u64 **metadata = NULL; + u64 metadata_ver; + int nr_cpu, err; if (auxtrace_info->header.size < sizeof(struct perf_record_auxtrace_info) + min_sz) return -EINVAL; + metadata = arm_spe__alloc_metadata(auxtrace_info, &metadata_ver, + &nr_cpu); + if (!metadata && metadata_ver != 1) { + pr_err("Failed to parse Arm SPE metadata.\n"); + return -EINVAL; + } + spe = zalloc(sizeof(struct arm_spe)); - if (!spe) - return -ENOMEM; + if (!spe) { + err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&spe->queues); if (err) @@ -1284,8 +1515,14 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->session = session; spe->machine = &session->machines.host; /* No kvm support */ spe->auxtrace_type = auxtrace_info->type; - spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; - spe->midr = midr; + if (metadata_ver == 1) + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + else + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE_V2]; + spe->metadata = metadata; + spe->metadata_ver = metadata_ver; + spe->metadata_nr_cpu = nr_cpu; + spe->is_homogeneous = arm_spe__is_homogeneous(metadata, nr_cpu); spe->timeless_decoding = arm_spe__is_timeless_decoding(spe); @@ -1318,7 +1555,7 @@ int arm_spe_process_auxtrace_info(union perf_event *event, spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace; session->auxtrace = &spe->auxtrace; - arm_spe_print_info(&auxtrace_info->priv[0]); + arm_spe_print_info(spe, &auxtrace_info->priv[0]); if (dump_trace) return 0; @@ -1346,5 +1583,7 @@ err_free_queues: session->auxtrace = NULL; err_free: free(spe); +err_free_metadata: + arm_spe__free_metadata(metadata, nr_cpu); return err; } diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h index 4f4900c18f3e..390679a4af2f 100644 --- a/tools/perf/util/arm-spe.h +++ b/tools/perf/util/arm-spe.h @@ -12,10 +12,46 @@ enum { ARM_SPE_PMU_TYPE, ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_V1_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_V1_PRIV_SIZE \ + (ARM_SPE_AUXTRACE_V1_PRIV_MAX * sizeof(u64)) + +enum { + /* + * The old metadata format (defined above) does not include a + * field for version number. Version 1 is reserved and starts + * from version 2. + */ + ARM_SPE_HEADER_VERSION, + /* Number of sizeof(u64) */ + ARM_SPE_HEADER_SIZE, + /* PMU type shared by CPUs */ + ARM_SPE_PMU_TYPE_V2, + /* Number of CPUs */ + ARM_SPE_CPUS_NUM, ARM_SPE_AUXTRACE_PRIV_MAX, }; -#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) +enum { + /* Magic number */ + ARM_SPE_MAGIC, + /* CPU logical number in system */ + ARM_SPE_CPU, + /* Number of parameters */ + ARM_SPE_CPU_NR_PARAMS, + /* CPU MIDR */ + ARM_SPE_CPU_MIDR, + /* Associated PMU type */ + ARM_SPE_CPU_PMU_TYPE, + /* Minimal interval */ + ARM_SPE_CAP_MIN_IVAL, + ARM_SPE_CPU_PRIV_MAX, +}; + +#define ARM_SPE_HEADER_CURRENT_VERSION 2 + union perf_event; struct perf_session; diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index a1895a4f530b..dddaf4f3ffed 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -75,7 +75,6 @@ enum itrace_period_type { * (not fully accurate, since CYC packets are only emitted * together with other events, such as branches) * @branches: whether to synthesize 'branches' events - * (branch misses only for Arm SPE) * @transactions: whether to synthesize events for transactions * @ptwrites: whether to synthesize events for ptwrites * @pwr_events: whether to synthesize power events @@ -650,7 +649,7 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session, #define ITRACE_HELP \ " i[period]: synthesize instructions events\n" \ " y[period]: synthesize cycles events (same period as i)\n" \ -" b: synthesize branches events (branch misses for Arm SPE)\n" \ +" b: synthesize branches events\n" \ " c: synthesize branches events (calls only)\n" \ " r: synthesize branches events (returns only)\n" \ " x: synthesize transactions events\n" \ diff --git a/tools/perf/util/bpf-filter.c b/tools/perf/util/bpf-filter.c index e87b6789eb9e..a4fdf6911ec1 100644 --- a/tools/perf/util/bpf-filter.c +++ b/tools/perf/util/bpf-filter.c @@ -375,7 +375,7 @@ static int create_idx_hash(struct evsel *evsel, struct perf_bpf_filter_entry *en pfi = zalloc(sizeof(*pfi)); if (pfi == NULL) { pr_err("Cannot save pinned filter index\n"); - goto err; + return -ENOMEM; } pfi->evsel = evsel; diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h deleted file mode 100644 index 66dcf751ef65..000000000000 --- a/tools/perf/util/bpf-prologue.h +++ /dev/null @@ -1,37 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2015, He Kuang <hekuang@huawei.com> - * Copyright (C) 2015, Huawei Inc. - */ -#ifndef __BPF_PROLOGUE_H -#define __BPF_PROLOGUE_H - -struct probe_trace_arg; -struct bpf_insn; - -#define BPF_PROLOGUE_MAX_ARGS 3 -#define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 -#define BPF_PROLOGUE_FETCH_RESULT_REG BPF_REG_2 - -#ifdef HAVE_BPF_PROLOGUE -int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, - struct bpf_insn *new_prog, size_t *new_cnt, - size_t cnt_space); -#else -#include <linux/compiler.h> -#include <errno.h> - -static inline int -bpf__gen_prologue(struct probe_trace_arg *args __maybe_unused, - int nargs __maybe_unused, - struct bpf_insn *new_prog __maybe_unused, - size_t *new_cnt, - size_t cnt_space __maybe_unused) -{ - if (!new_cnt) - return -EINVAL; - *new_cnt = 0; - return -ENOTSUP; -} -#endif -#endif /* __BPF_PROLOGUE_H */ diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index 7a8af60e0f51..73fcafbffc6a 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -394,6 +394,7 @@ static int bperf_check_target(struct evsel *evsel, } static struct perf_cpu_map *all_cpu_map; +static __u32 filter_entry_cnt; static int bperf_reload_leader_program(struct evsel *evsel, int attr_map_fd, struct perf_event_attr_map_entry *entry) @@ -444,12 +445,32 @@ out: return err; } +static int bperf_attach_follower_program(struct bperf_follower_bpf *skel, + enum bperf_filter_type filter_type, + bool inherit) +{ + struct bpf_link *link; + int err = 0; + + if ((filter_type == BPERF_FILTER_PID || + filter_type == BPERF_FILTER_TGID) && inherit) + /* attach all follower bpf progs to enable event inheritance */ + err = bperf_follower_bpf__attach(skel); + else { + link = bpf_program__attach(skel->progs.fexit_XXX); + if (IS_ERR(link)) + err = PTR_ERR(link); + } + + return err; +} + static int bperf__load(struct evsel *evsel, struct target *target) { struct perf_event_attr_map_entry entry = {0xffffffff, 0xffffffff}; int attr_map_fd, diff_map_fd = -1, err; enum bperf_filter_type filter_type; - __u32 filter_entry_cnt, i; + __u32 i; if (bperf_check_target(evsel, target, &filter_type, &filter_entry_cnt)) return -1; @@ -529,9 +550,6 @@ static int bperf__load(struct evsel *evsel, struct target *target) /* set up reading map */ bpf_map__set_max_entries(evsel->follower_skel->maps.accum_readings, filter_entry_cnt); - /* set up follower filter based on target */ - bpf_map__set_max_entries(evsel->follower_skel->maps.filter, - filter_entry_cnt); err = bperf_follower_bpf__load(evsel->follower_skel); if (err) { pr_err("Failed to load follower skeleton\n"); @@ -543,6 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) for (i = 0; i < filter_entry_cnt; i++) { int filter_map_fd; __u32 key; + struct bperf_filter_value fval = { i, 0 }; if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) @@ -553,12 +572,14 @@ static int bperf__load(struct evsel *evsel, struct target *target) break; filter_map_fd = bpf_map__fd(evsel->follower_skel->maps.filter); - bpf_map_update_elem(filter_map_fd, &key, &i, BPF_ANY); + bpf_map_update_elem(filter_map_fd, &key, &fval, BPF_ANY); } evsel->follower_skel->bss->type = filter_type; + evsel->follower_skel->bss->inherit = target->inherit; - err = bperf_follower_bpf__attach(evsel->follower_skel); + err = bperf_attach_follower_program(evsel->follower_skel, filter_type, + target->inherit); out: if (err && evsel->bperf_leader_link_fd >= 0) @@ -623,7 +644,7 @@ static int bperf__read(struct evsel *evsel) bperf_sync_counters(evsel); reading_map_fd = bpf_map__fd(skel->maps.accum_readings); - for (i = 0; i < bpf_map__max_entries(skel->maps.accum_readings); i++) { + for (i = 0; i < filter_entry_cnt; i++) { struct perf_cpu entry; __u32 cpu; diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c index b2f17cca014b..4a62ed593e84 100644 --- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c +++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c @@ -288,6 +288,10 @@ int sys_enter_rename(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -315,6 +319,10 @@ int sys_enter_renameat2(struct syscall_enter_args *args) augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64)); len += augmented_args->arg.size; + /* Every read from userspace is limited to value size */ + if (augmented_args->arg.size > sizeof(augmented_args->arg.value)) + return 1; /* Failure: don't filter */ + struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size; newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value)); @@ -423,8 +431,9 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) { bool augmented, do_output = false; - int zero = 0, size, aug_size, index, output = 0, + int zero = 0, size, aug_size, index, value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value); + u64 output = 0; /* has to be u64, otherwise it won't pass the verifier */ unsigned int nr, *beauty_map; struct beauty_payload_enter *payload; void *arg, *payload_offset; @@ -477,6 +486,8 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) augmented = true; } else if (size < 0 && size >= -6) { /* buffer */ index = -(size + 1); + barrier_var(index); // Prevent clang (noticed with v18) from removing the &= 7 trick. + index &= 7; // Satisfy the bounds checking with the verifier in some kernels. aug_size = args->args[index]; if (aug_size > TRACE_AUG_MAX_BUF) @@ -488,10 +499,17 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } + /* Augmented data size is limited to sizeof(augmented_arg->unnamed union with value field) */ + if (aug_size > value_size) + aug_size = value_size; + /* write data to payload */ if (augmented) { int written = offsetof(struct augmented_arg, value) + aug_size; + if (written < 0 || written > sizeof(struct augmented_arg)) + return 1; + ((struct augmented_arg *)payload_offset)->size = aug_size; output += written; payload_offset += written; @@ -499,7 +517,7 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args) } } - if (!do_output) + if (!do_output || (sizeof(struct syscall_enter_args) + output) > sizeof(struct beauty_payload_enter)) return 1; return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output); diff --git a/tools/perf/util/bpf_skel/bperf_follower.bpf.c b/tools/perf/util/bpf_skel/bperf_follower.bpf.c index f193998530d4..0595063139a3 100644 --- a/tools/perf/util/bpf_skel/bperf_follower.bpf.c +++ b/tools/perf/util/bpf_skel/bperf_follower.bpf.c @@ -5,6 +5,8 @@ #include <bpf/bpf_tracing.h> #include "bperf_u.h" +#define MAX_ENTRIES 102400 + struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); __uint(key_size, sizeof(__u32)); @@ -22,25 +24,29 @@ struct { struct { __uint(type, BPF_MAP_TYPE_HASH); __uint(key_size, sizeof(__u32)); - __uint(value_size, sizeof(__u32)); + __uint(value_size, sizeof(struct bperf_filter_value)); + __uint(max_entries, MAX_ENTRIES); + __uint(map_flags, BPF_F_NO_PREALLOC); } filter SEC(".maps"); enum bperf_filter_type type = 0; int enabled = 0; +int inherit; SEC("fexit/XXX") int BPF_PROG(fexit_XXX) { struct bpf_perf_event_value *diff_val, *accum_val; __u32 filter_key, zero = 0; - __u32 *accum_key; + __u32 accum_key; + struct bperf_filter_value *fval; if (!enabled) return 0; switch (type) { case BPERF_FILTER_GLOBAL: - accum_key = &zero; + accum_key = zero; goto do_add; case BPERF_FILTER_CPU: filter_key = bpf_get_smp_processor_id(); @@ -49,22 +55,34 @@ int BPF_PROG(fexit_XXX) filter_key = bpf_get_current_pid_tgid() & 0xffffffff; break; case BPERF_FILTER_TGID: - filter_key = bpf_get_current_pid_tgid() >> 32; + /* Use pid as the filter_key to exclude new task counts + * when inherit is disabled. Don't worry about the existing + * children in TGID losing their counts, bpf_counter has + * already added them to the filter map via perf_thread_map + * before this bpf prog runs. + */ + filter_key = inherit ? + bpf_get_current_pid_tgid() >> 32 : + bpf_get_current_pid_tgid() & 0xffffffff; break; default: return 0; } - accum_key = bpf_map_lookup_elem(&filter, &filter_key); - if (!accum_key) + fval = bpf_map_lookup_elem(&filter, &filter_key); + if (!fval) return 0; + accum_key = fval->accum_key; + if (fval->exited) + bpf_map_delete_elem(&filter, &filter_key); + do_add: diff_val = bpf_map_lookup_elem(&diff_readings, &zero); if (!diff_val) return 0; - accum_val = bpf_map_lookup_elem(&accum_readings, accum_key); + accum_val = bpf_map_lookup_elem(&accum_readings, &accum_key); if (!accum_val) return 0; @@ -75,4 +93,70 @@ do_add: return 0; } +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/task_newtask") +int BPF_PROG(on_newtask, struct task_struct *task, __u64 clone_flags) +{ + __u32 parent_key, child_key; + struct bperf_filter_value *parent_fval; + struct bperf_filter_value child_fval = { 0 }; + + if (!enabled) + return 0; + + switch (type) { + case BPERF_FILTER_PID: + parent_key = bpf_get_current_pid_tgid() & 0xffffffff; + child_key = task->pid; + break; + case BPERF_FILTER_TGID: + parent_key = bpf_get_current_pid_tgid() >> 32; + child_key = task->tgid; + if (child_key == parent_key) + return 0; + break; + default: + return 0; + } + + /* Check if the current task is one of the target tasks to be counted */ + parent_fval = bpf_map_lookup_elem(&filter, &parent_key); + if (!parent_fval) + return 0; + + /* Start counting for the new task by adding it into filter map, + * inherit the accum key of its parent task so that they can be + * counted together. + */ + child_fval.accum_key = parent_fval->accum_key; + child_fval.exited = 0; + bpf_map_update_elem(&filter, &child_key, &child_fval, BPF_NOEXIST); + + return 0; +} + +/* The program is only used for PID or TGID filter types. */ +SEC("tp_btf/sched_process_exit") +int BPF_PROG(on_exittask, struct task_struct *task) +{ + __u32 pid; + struct bperf_filter_value *fval; + + if (!enabled) + return 0; + + /* Stop counting for this task by removing it from filter map. + * For TGID type, if the pid can be found in the map, it means that + * this pid belongs to the leader task. After the task exits, the + * tgid of its child tasks (if any) will be 1, so the pid can be + * safely removed. + */ + pid = task->pid; + fval = bpf_map_lookup_elem(&filter, &pid); + if (fval) + fval->exited = 1; + + return 0; +} + char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/perf/util/bpf_skel/bperf_u.h b/tools/perf/util/bpf_skel/bperf_u.h index 1ce0c2c905c1..4a4a753980be 100644 --- a/tools/perf/util/bpf_skel/bperf_u.h +++ b/tools/perf/util/bpf_skel/bperf_u.h @@ -11,4 +11,9 @@ enum bperf_filter_type { BPERF_FILTER_TGID, }; +struct bperf_filter_value { + __u32 accum_key; + __u8 exited; +}; + #endif /* __BPERF_STAT_U_H */ diff --git a/tools/perf/util/cap.c b/tools/perf/util/cap.c index 7574a67651bc..69d9a2bcd40b 100644 --- a/tools/perf/util/cap.c +++ b/tools/perf/util/cap.c @@ -7,13 +7,9 @@ #include "debug.h" #include <errno.h> #include <string.h> -#include <unistd.h> #include <linux/capability.h> #include <sys/syscall.h> - -#ifndef SYS_capget -#define SYS_capget 90 -#endif +#include <unistd.h> #define MAX_LINUX_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 @@ -21,9 +17,9 @@ bool perf_cap__capable(int cap, bool *used_root) { struct __user_cap_header_struct header = { .version = _LINUX_CAPABILITY_VERSION_3, - .pid = getpid(), + .pid = 0, }; - struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S]; + struct __user_cap_data_struct data[MAX_LINUX_CAPABILITY_U32S] = {}; __u32 cap_val; *used_root = false; diff --git a/tools/perf/util/color.c b/tools/perf/util/color.c index bffbdd216a6a..e51f0a676a22 100644 --- a/tools/perf/util/color.c +++ b/tools/perf/util/color.c @@ -93,34 +93,6 @@ int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) return r; } -/* - * This function splits the buffer by newlines and colors the lines individually. - * - * Returns 0 on success. - */ -int color_fwrite_lines(FILE *fp, const char *color, - size_t count, const char *buf) -{ - if (!*color) - return fwrite(buf, count, 1, fp) != 1; - - while (count) { - char *p = memchr(buf, '\n', count); - - if (p != buf && (fputs(color, fp) < 0 || - fwrite(buf, p ? (size_t)(p - buf) : count, 1, fp) != 1 || - fputs(PERF_COLOR_RESET, fp) < 0)) - return -1; - if (!p) - return 0; - if (fputc('\n', fp) < 0) - return -1; - count -= p + 1 - buf; - buf = p + 1; - } - return 0; -} - const char *get_percent_color(double percent) { const char *color = PERF_COLOR_NORMAL; diff --git a/tools/perf/util/color.h b/tools/perf/util/color.h index 01f7bed21c9b..9a7248dbe2d7 100644 --- a/tools/perf/util/color.h +++ b/tools/perf/util/color.h @@ -2,6 +2,7 @@ #ifndef __PERF_COLOR_H #define __PERF_COLOR_H +#include <linux/compiler.h> #include <stdio.h> #include <stdarg.h> @@ -22,6 +23,7 @@ #define MIN_GREEN 0.5 #define MIN_RED 5.0 +#define PERF_COLOR_DELETE_LINE "\033[A\33[2K\r" /* * This variable stores the value of color.ui */ @@ -37,12 +39,11 @@ int perf_config_colorbool(const char *var, const char *value, int stdout_is_tty) int color_vsnprintf(char *bf, size_t size, const char *color, const char *fmt, va_list args); int color_vfprintf(FILE *fp, const char *color, const char *fmt, va_list args); -int color_fprintf(FILE *fp, const char *color, const char *fmt, ...); -int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...); -int color_fwrite_lines(FILE *fp, const char *color, size_t count, const char *buf); +int color_fprintf(FILE *fp, const char *color, const char *fmt, ...) __printf(3, 4); +int color_snprintf(char *bf, size_t size, const char *color, const char *fmt, ...) __printf(4, 5); int value_color_snprintf(char *bf, size_t size, const char *fmt, double value); -int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...); -int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...); +int percent_color_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); +int percent_color_len_snprintf(char *bf, size_t size, const char *fmt, ...) __printf(3, 4); int percent_color_fprintf(FILE *fp, const char *fmt, double percent); const char *get_percent_color(double percent); diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 7a650de0db83..68f9407ca74b 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -912,6 +912,7 @@ void set_buildid_dir(const char *dir) struct perf_config_scan_data { const char *name; const char *fmt; + const char *value; va_list args; int ret; }; @@ -939,3 +940,24 @@ int perf_config_scan(const char *name, const char *fmt, ...) return d.ret; } + +static int perf_config_get_cb(const char *var, const char *value, void *data) +{ + struct perf_config_scan_data *d = data; + + if (!strcmp(var, d->name)) + d->value = value; + + return 0; +} + +const char *perf_config_get(const char *name) +{ + struct perf_config_scan_data d = { + .name = name, + .value = NULL, + }; + + perf_config(perf_config_get_cb, &d); + return d.value; +} diff --git a/tools/perf/util/config.h b/tools/perf/util/config.h index 2e5e808928a5..9971313d61c1 100644 --- a/tools/perf/util/config.h +++ b/tools/perf/util/config.h @@ -30,6 +30,7 @@ typedef int (*config_fn_t)(const char *, const char *, void *); int perf_default_config(const char *, const char *, void *); int perf_config(config_fn_t fn, void *); int perf_config_scan(const char *name, const char *fmt, ...) __scanf(2, 3); +const char *perf_config_get(const char *name); int perf_config_set(struct perf_config_set *set, config_fn_t fn, void *data); int perf_config_int(int *dest, const char *, const char *); diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index b78ef0262135..b85a8837bddc 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -685,9 +685,14 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, } if (d_params->operation == CS_ETM_OPERATION_DECODE) { + int decode_flags = OCSD_CREATE_FLG_FULL_DECODER; +#ifdef OCSD_OPFLG_N_UNCOND_DIR_BR_CHK + decode_flags |= OCSD_OPFLG_N_UNCOND_DIR_BR_CHK | OCSD_OPFLG_CHK_RANGE_CONTINUE | + ETM4_OPFLG_PKTDEC_AA64_OPCODE_CHK; +#endif if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder->decoder_name, - OCSD_CREATE_FLG_FULL_DECODER, + decode_flags, trace_config, &csid)) return -1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 90f32f327b9b..0bf9e5c27b59 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -2490,12 +2490,6 @@ static void cs_etm__clear_all_traceid_queues(struct cs_etm_queue *etmq) /* Ignore return value */ cs_etm__process_traceid_queue(etmq, tidq); - - /* - * Generate an instruction sample with the remaining - * branchstack entries. - */ - cs_etm__flush(etmq, tidq); } } @@ -2638,7 +2632,7 @@ static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm) while (1) { if (!etm->heap.heap_cnt) - goto out; + break; /* Take the entry at the top of the min heap */ cs_queue_nr = etm->heap.heap_array[0].queue_nr; @@ -2721,6 +2715,23 @@ refetch: ret = auxtrace_heap__add(&etm->heap, cs_queue_nr, cs_timestamp); } + for (i = 0; i < etm->queues.nr_queues; i++) { + struct int_node *inode; + + etmq = etm->queues.queue_array[i].priv; + if (!etmq) + continue; + + intlist__for_each_entry(inode, etmq->traceid_queues_list) { + int idx = (int)(intptr_t)inode->priv; + + /* Flush any remaining branch stack entries */ + tidq = etmq->traceid_queues[idx]; + ret = cs_etm__end_block(etmq, tidq); + if (ret) + return ret; + } + } out: return ret; } @@ -3323,7 +3334,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) * Don't create decoders for empty queues, mainly because * etmq->format is unknown for empty queues. */ - assert(empty == (etmq->format == UNSET)); + assert(empty || etmq->format != UNSET); if (empty) continue; diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 021e9b1d5cc5..f0599c61fab4 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -36,7 +36,7 @@ #include "util/sample.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #define pr_N(n, fmt, ...) \ diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 20bfb0884e9e..8304cd2d4a9c 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -28,7 +28,7 @@ #include "util/tool.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct convert_json { diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index d633d15329fa..995f6bb05b5f 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -27,7 +27,7 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #else #define LIBTRACEEVENT_VERSION 0 #endif diff --git a/tools/perf/util/debuginfo.h b/tools/perf/util/debuginfo.h index ad6422c3f8ca..a52d69932815 100644 --- a/tools/perf/util/debuginfo.h +++ b/tools/perf/util/debuginfo.h @@ -5,7 +5,7 @@ #include <errno.h> #include <linux/compiler.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" @@ -25,7 +25,7 @@ void debuginfo__delete(struct debuginfo *dbg); int debuginfo__get_text_offset(struct debuginfo *dbg, Dwarf_Addr *offs, bool adjust_offset); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ /* dummy debug information structure */ struct debuginfo { @@ -49,7 +49,7 @@ static inline int debuginfo__get_text_offset(struct debuginfo *dbg __maybe_unuse return -EINVAL; } -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #ifdef HAVE_DEBUGINFOD_SUPPORT int get_source_from_debuginfod(const char *raw_path, const char *sbuild_id, diff --git a/tools/perf/util/disasm.c b/tools/perf/util/disasm.c index f05ba7739c1e..41a2b08670dc 100644 --- a/tools/perf/util/disasm.c +++ b/tools/perf/util/disasm.c @@ -18,6 +18,7 @@ #include "disasm.h" #include "disasm_bpf.h" #include "dso.h" +#include "dwarf-regs.h" #include "env.h" #include "evsel.h" #include "map.h" @@ -151,14 +152,14 @@ static struct arch architectures[] = { .memory_ref_char = '(', .imm_char = '$', }, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_x86, #endif }, { .name = "powerpc", .init = powerpc__annotate_init, -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT .update_insn_state = update_insn_state_powerpc, #endif }, @@ -1423,6 +1424,15 @@ err: } #endif +#if !defined(HAVE_LIBCAPSTONE_SUPPORT) || !defined(HAVE_LIBLLVM_SUPPORT) +static void symbol__disassembler_missing(const char *disassembler, const char *filename, + struct symbol *sym) +{ + pr_debug("The %s disassembler isn't linked in for %s in %s\n", + disassembler, sym->name, filename); +} +#endif + #ifdef HAVE_LIBCAPSTONE_SUPPORT static void print_capstone_detail(cs_insn *insn, char *buf, size_t len, struct annotate_args *args, u64 addr) @@ -1573,7 +1583,7 @@ static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *s dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); @@ -1603,18 +1613,6 @@ out: err: if (fd >= 0) close(fd); - if (needs_cs_close) { - struct disasm_line *tmp; - - /* - * It probably failed in the middle of the above loop. - * Release any resources it might add. - */ - list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { - list_del(&dl->al.node); - free(dl); - } - } count = -1; goto out; } @@ -1627,12 +1625,12 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, u64 start = map__rip_2objdump(map, sym->start); u64 len; u64 offset; - int i, count; + int i, count, free_count; bool is_64bit = false; bool needs_cs_close = false; u8 *buf = NULL; csh handle; - cs_insn *insn; + cs_insn *insn = NULL; char disasm_buf[512]; struct disasm_line *dl; @@ -1664,7 +1662,7 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, needs_cs_close = true; - count = cs_disasm(handle, buf, len, start, len, &insn); + free_count = count = cs_disasm(handle, buf, len, start, len, &insn); for (i = 0, offset = 0; i < count; i++) { int printed; @@ -1702,8 +1700,11 @@ static int symbol__disassemble_capstone(char *filename, struct symbol *sym, } out: - if (needs_cs_close) + if (needs_cs_close) { cs_close(&handle); + if (free_count > 0) + cs_free(insn, free_count); + } free(buf); return count < 0 ? count : 0; @@ -1717,13 +1718,27 @@ err: */ list_for_each_entry_safe(dl, tmp, ¬es->src->source, al.node) { list_del(&dl->al.node); - free(dl); + disasm_line__free(dl); } } count = -1; goto out; } -#endif +#else // HAVE_LIBCAPSTONE_SUPPORT +static int symbol__disassemble_capstone(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone", filename, sym); + return -1; +} + +static int symbol__disassemble_capstone_powerpc(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("capstone powerpc", filename, sym); + return -1; +} +#endif // HAVE_LIBCAPSTONE_SUPPORT static int symbol__disassemble_raw(char *filename, struct symbol *sym, struct annotate_args *args) @@ -1782,7 +1797,7 @@ static int symbol__disassemble_raw(char *filename, struct symbol *sym, sprintf(args->line, "%x", line[i]); dl = disasm_line__new(args); if (dl == NULL) - goto err; + break; annotation_line__add(&dl->al, ¬es->src->source); offset += 4; @@ -1991,7 +2006,14 @@ err: free(line_storage); return ret; } -#endif +#else // HAVE_LIBLLVM_SUPPORT +static int symbol__disassemble_llvm(char *filename, struct symbol *sym, + struct annotate_args *args __maybe_unused) +{ + symbol__disassembler_missing("LLVM", filename, sym); + return -1; +} +#endif // HAVE_LIBLLVM_SUPPORT /* * Possibly create a new version of line with tabs expanded. Returns the @@ -2053,17 +2075,14 @@ static char *expand_tabs(char *line, char **storage, size_t *storage_len) return new_line; } -int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +static int symbol__disassemble_objdump(const char *filename, struct symbol *sym, + struct annotate_args *args) { struct annotation_options *opts = &annotate_opts; struct map *map = args->ms.map; struct dso *dso = map__dso(map); char *command; FILE *file; - char symfs_filename[PATH_MAX]; - struct kcore_extract kce; - bool delete_extract = false; - bool decomp = false; int lineno = 0; char *fileloc = NULL; int nline; @@ -2078,77 +2097,7 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) NULL, }; struct child_process objdump_process; - int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); - - if (err) - return err; - - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, - symfs_filename, sym->name, map__unmap_ip(map, sym->start), - map__unmap_ip(map, sym->end)); - - pr_debug("annotating [%p] %30s : [%p] %30s\n", - dso, dso__long_name(dso), sym, sym->name); - - if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { - return symbol__disassemble_bpf(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { - return symbol__disassemble_bpf_image(sym, args); - } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { - return -1; - } else if (dso__is_kcore(dso)) { - kce.kcore_filename = symfs_filename; - kce.addr = map__rip_2objdump(map, sym->start); - kce.offs = sym->start; - kce.len = sym->end - sym->start; - if (!kcore_extract__create(&kce)) { - delete_extract = true; - strlcpy(symfs_filename, kce.extract_filename, - sizeof(symfs_filename)); - } - } else if (dso__needs_decompress(dso)) { - char tmp[KMOD_DECOMP_LEN]; - - if (dso__decompress_kmodule_path(dso, symfs_filename, - tmp, sizeof(tmp)) < 0) - return -1; - - decomp = true; - strcpy(symfs_filename, tmp); - } - - /* - * For powerpc data type profiling, use the dso__data_read_offset - * to read raw instruction directly and interpret the binary code - * to understand instructions and register fields. For sort keys as - * type and typeoff, disassemble to mnemonic notation is - * not required in case of powerpc. - */ - if (arch__is(args->arch, "powerpc")) { - extern const char *sort_order; - - if (sort_order && !strstr(sort_order, "sym")) { - err = symbol__disassemble_raw(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif - } - } - -#ifdef HAVE_LIBLLVM_SUPPORT - err = symbol__disassemble_llvm(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif -#ifdef HAVE_LIBCAPSTONE_SUPPORT - err = symbol__disassemble_capstone(symfs_filename, sym, args); - if (err == 0) - goto out_remove_tmp; -#endif + int err; err = asprintf(&command, "%s %s%s --start-address=0x%016" PRIx64 @@ -2171,13 +2120,13 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) if (err < 0) { pr_err("Failure allocating memory for the command to run\n"); - goto out_remove_tmp; + return err; } pr_debug("Executing: %s\n", command); objdump_argv[2] = command; - objdump_argv[4] = symfs_filename; + objdump_argv[4] = filename; /* Create a pipe to read from for stdout */ memset(&objdump_process, 0, sizeof(objdump_process)); @@ -2215,8 +2164,8 @@ int symbol__disassemble(struct symbol *sym, struct annotate_args *args) break; /* Skip lines containing "filename:" */ - match = strstr(line, symfs_filename); - if (match && match[strlen(symfs_filename)] == ':') + match = strstr(line, filename); + if (match && match[strlen(filename)] == ':') continue; expanded_line = strim(line); @@ -2261,7 +2210,150 @@ out_close_stdout: out_free_command: free(command); + return err; +} + +static int annotation_options__init_disassemblers(struct annotation_options *options) +{ + char *disassembler; + + if (options->disassemblers_str == NULL) { + const char *default_disassemblers_str = +#ifdef HAVE_LIBLLVM_SUPPORT + "llvm," +#endif +#ifdef HAVE_LIBCAPSTONE_SUPPORT + "capstone," +#endif + "objdump"; + + options->disassemblers_str = strdup(default_disassemblers_str); + if (!options->disassemblers_str) + goto out_enomem; + } + + disassembler = strdup(options->disassemblers_str); + if (disassembler == NULL) + goto out_enomem; + + while (1) { + char *comma = strchr(disassembler, ','); + + if (comma != NULL) + *comma = '\0'; + + options->disassemblers[options->nr_disassemblers++] = strim(disassembler); + + if (comma == NULL) + break; + + disassembler = comma + 1; + + if (options->nr_disassemblers >= MAX_DISASSEMBLERS) { + pr_debug("annotate.disassemblers can have at most %d entries, ignoring \"%s\"\n", + MAX_DISASSEMBLERS, disassembler); + break; + } + } + + return 0; + +out_enomem: + pr_err("Not enough memory for annotate.disassemblers\n"); + return -1; +} + +int symbol__disassemble(struct symbol *sym, struct annotate_args *args) +{ + struct annotation_options *options = args->options; + struct map *map = args->ms.map; + struct dso *dso = map__dso(map); + char symfs_filename[PATH_MAX]; + bool delete_extract = false; + struct kcore_extract kce; + const char *disassembler; + bool decomp = false; + int err = dso__disassemble_filename(dso, symfs_filename, sizeof(symfs_filename)); + + if (err) + return err; + + pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, + symfs_filename, sym->name, map__unmap_ip(map, sym->start), + map__unmap_ip(map, sym->end)); + + pr_debug("annotating [%p] %30s : [%p] %30s\n", dso, dso__long_name(dso), sym, sym->name); + if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_PROG_INFO) { + return symbol__disassemble_bpf(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__BPF_IMAGE) { + return symbol__disassemble_bpf_image(sym, args); + } else if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) { + return -1; + } else if (dso__is_kcore(dso)) { + kce.addr = map__rip_2objdump(map, sym->start); + kce.kcore_filename = symfs_filename; + kce.len = sym->end - sym->start; + kce.offs = sym->start; + + if (!kcore_extract__create(&kce)) { + delete_extract = true; + strlcpy(symfs_filename, kce.extract_filename, sizeof(symfs_filename)); + } + } else if (dso__needs_decompress(dso)) { + char tmp[KMOD_DECOMP_LEN]; + + if (dso__decompress_kmodule_path(dso, symfs_filename, tmp, sizeof(tmp)) < 0) + return -1; + + decomp = true; + strcpy(symfs_filename, tmp); + } + + /* + * For powerpc data type profiling, use the dso__data_read_offset to + * read raw instruction directly and interpret the binary code to + * understand instructions and register fields. For sort keys as type + * and typeoff, disassemble to mnemonic notation is not required in + * case of powerpc. + */ + if (arch__is(args->arch, "powerpc")) { + extern const char *sort_order; + + if (sort_order && !strstr(sort_order, "sym")) { + err = symbol__disassemble_raw(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + + err = symbol__disassemble_capstone_powerpc(symfs_filename, sym, args); + if (err == 0) + goto out_remove_tmp; + } + } + + err = annotation_options__init_disassemblers(options); + if (err) + goto out_remove_tmp; + + err = -1; + + for (int i = 0; i < options->nr_disassemblers && err != 0; ++i) { + disassembler = options->disassemblers[i]; + + if (!strcmp(disassembler, "llvm")) + err = symbol__disassemble_llvm(symfs_filename, sym, args); + else if (!strcmp(disassembler, "capstone")) + err = symbol__disassemble_capstone(symfs_filename, sym, args); + else if (!strcmp(disassembler, "objdump")) + err = symbol__disassemble_objdump(symfs_filename, sym, args); + else + pr_debug("Unknown disassembler %s, skipping...\n", disassembler); + } + + if (err == 0) { + pr_debug("Disassembled with %s\nannotate.disassemblers=%s\n", + disassembler, options->disassemblers_str); + } out_remove_tmp: if (decomp) unlink(symfs_filename); diff --git a/tools/perf/util/disasm.h b/tools/perf/util/disasm.h index f56beedeb9da..c135db2416b5 100644 --- a/tools/perf/util/disasm.h +++ b/tools/perf/util/disasm.h @@ -4,7 +4,7 @@ #include "map_symbol.h" -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #endif @@ -39,11 +39,15 @@ struct arch { char memory_ref_char; char imm_char; } objdump; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT void (*update_insn_state)(struct type_state *state, struct data_loc_info *dloc, Dwarf_Die *cu_die, struct disasm_line *dl); #endif + /** @e_machine: ELF machine associated with arch. */ + unsigned int e_machine; + /** @e_flags: Optional ELF flags associated with arch. */ + unsigned int e_flags; }; struct ins { diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 92eb9c8dc3e5..559c953ca172 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -1182,7 +1182,6 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) return ret < 0 ? ret : strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); } -#if defined(HAVE_DWARF_GETLOCATIONS_SUPPORT) || defined(HAVE_DWARF_CFI_SUPPORT) static int reg_from_dwarf_op(Dwarf_Op *op) { switch (op->atom) { @@ -1245,9 +1244,7 @@ static bool check_allowed_ops(Dwarf_Op *ops, size_t nops) } return true; } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT || HAVE_DWARF_CFI_SUPPORT */ -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT /** * die_get_var_innermost_scope - Get innermost scope range of given variable DIE * @sp_die: a subprogram DIE @@ -1697,9 +1694,7 @@ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types) die_find_child(cu_die, __die_collect_global_vars_cb, (void *)var_types, &die_mem); } -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ -#ifdef HAVE_DWARF_CFI_SUPPORT /** * die_get_cfa - Get frame base information * @dwarf: a Dwarf info @@ -1732,7 +1727,6 @@ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset) } return -1; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ /* * die_has_loclist - Check if DW_AT_location of @vr_die is a location list diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 336a3a183a78..892c8c5c23fc 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -156,8 +156,6 @@ Dwarf_Die *die_get_member_type(Dwarf_Die *type_die, int offset, Dwarf_Die *die_m /* Return type info where the pointer and offset point to */ Dwarf_Die *die_deref_ptr_type(Dwarf_Die *ptr_die, int offset, Dwarf_Die *die_mem); -#ifdef HAVE_DWARF_GETLOCATIONS_SUPPORT - /* Get byte offset range of given variable DIE */ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf); @@ -176,58 +174,7 @@ void die_collect_vars(Dwarf_Die *sc_die, struct die_var_type **var_types); /* Save all global variables in this CU */ void die_collect_global_vars(Dwarf_Die *cu_die, struct die_var_type **var_types); -#else /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -static inline int die_get_var_range(Dwarf_Die *sp_die __maybe_unused, - Dwarf_Die *vr_die __maybe_unused, - struct strbuf *buf __maybe_unused) -{ - return -ENOTSUP; -} - -static inline Dwarf_Die *die_find_variable_by_reg(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr pc __maybe_unused, - int reg __maybe_unused, - int *poffset __maybe_unused, - bool is_fbreg __maybe_unused, - Dwarf_Die *die_mem __maybe_unused) -{ - return NULL; -} - -static inline Dwarf_Die *die_find_variable_by_addr(Dwarf_Die *sc_die __maybe_unused, - Dwarf_Addr addr __maybe_unused, - Dwarf_Die *die_mem __maybe_unused, - int *offset __maybe_unused) -{ - return NULL; -} - -static inline void die_collect_vars(Dwarf_Die *sc_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -static inline void die_collect_global_vars(Dwarf_Die *cu_die __maybe_unused, - struct die_var_type **var_types __maybe_unused) -{ -} - -#endif /* HAVE_DWARF_GETLOCATIONS_SUPPORT */ - -#ifdef HAVE_DWARF_CFI_SUPPORT - /* Get the frame base information from CFA */ int die_get_cfa(Dwarf *dwarf, u64 pc, int *preg, int *poffset); -#else /* HAVE_DWARF_CFI_SUPPORT */ - -static inline int die_get_cfa(Dwarf *dwarf __maybe_unused, u64 pc __maybe_unused, - int *preg __maybe_unused, int *poffset __maybe_unused) -{ - return -1; -} - -#endif /* HAVE_DWARF_CFI_SUPPORT */ - #endif /* _DWARF_AUX_H */ diff --git a/tools/perf/arch/csky/util/dwarf-regs.c b/tools/perf/util/dwarf-regs-csky.c index ca86ecaeacbb..d38ef1f07f3e 100644 --- a/tools/perf/arch/csky/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs-csky.c @@ -5,9 +5,8 @@ #include <stddef.h> #include <dwarf-regs.h> -#if defined(__CSKYABIV2__) -#define CSKY_MAX_REGS 73 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { +#define CSKY_ABIV2_MAX_REGS 73 +const char *csky_dwarf_regs_table_abiv2[CSKY_ABIV2_MAX_REGS] = { /* r0 ~ r8 */ "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", "%regs2", "%regs3", /* r9 ~ r15 */ @@ -26,9 +25,9 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#else -#define CSKY_MAX_REGS 57 -const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { + +#define CSKY_ABIV1_MAX_REGS 57 +const char *csky_dwarf_regs_table_abiv1[CSKY_ABIV1_MAX_REGS] = { /* r0 ~ r8 */ "%sp", "%regs9", "%a0", "%a1", "%a2", "%a3", "%regs0", "%regs1", /* r9 ~ r15 */ @@ -41,9 +40,11 @@ const char *csky_dwarf_regs_table[CSKY_MAX_REGS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "%epc", }; -#endif -const char *get_arch_regstr(unsigned int n) +const char *get_csky_regstr(unsigned int n, unsigned int flags) { - return (n < CSKY_MAX_REGS) ? csky_dwarf_regs_table[n] : NULL; + if (flags & EF_CSKY_ABIV2) + return (n < CSKY_ABIV2_MAX_REGS) ? csky_dwarf_regs_table_abiv2[n] : NULL; + + return (n < CSKY_ABIV1_MAX_REGS) ? csky_dwarf_regs_table_abiv1[n] : NULL; } diff --git a/tools/perf/util/dwarf-regs-powerpc.c b/tools/perf/util/dwarf-regs-powerpc.c new file mode 100644 index 000000000000..caf77a234c78 --- /dev/null +++ b/tools/perf/util/dwarf-regs-powerpc.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Mapping of DWARF debug register numbers into register names. + * + * Copyright (C) 2010 Ian Munsie, IBM Corporation. + */ + +#include <dwarf-regs.h> + +#define PPC_OP(op) (((op) >> 26) & 0x3F) +#define PPC_RA(a) (((a) >> 16) & 0x1f) +#define PPC_RT(t) (((t) >> 21) & 0x1f) +#define PPC_RB(b) (((b) >> 11) & 0x1f) +#define PPC_D(D) ((D) & 0xfffe) +#define PPC_DS(DS) ((DS) & 0xfffc) +#define OP_LD 58 +#define OP_STD 62 + +static int get_source_reg(u32 raw_insn) +{ + return PPC_RA(raw_insn); +} + +static int get_target_reg(u32 raw_insn) +{ + return PPC_RT(raw_insn); +} + +static int get_offset_opcode(u32 raw_insn) +{ + int opcode = PPC_OP(raw_insn); + + /* DS- form */ + if ((opcode == OP_LD) || (opcode == OP_STD)) + return PPC_DS(raw_insn); + else + return PPC_D(raw_insn); +} + +/* + * Fills the required fields for op_loc depending on if it + * is a source or target. + * D form: ins RT,D(RA) -> src_reg1 = RA, offset = D, dst_reg1 = RT + * DS form: ins RT,DS(RA) -> src_reg1 = RA, offset = DS, dst_reg1 = RT + * X form: ins RT,RA,RB -> src_reg1 = RA, src_reg2 = RB, dst_reg1 = RT + */ +void get_powerpc_regs(u32 raw_insn, int is_source, + struct annotated_op_loc *op_loc) +{ + if (is_source) + op_loc->reg1 = get_source_reg(raw_insn); + else + op_loc->reg1 = get_target_reg(raw_insn); + + if (op_loc->multi_regs) + op_loc->reg2 = PPC_RB(raw_insn); + + /* TODO: Implement offset handling for X Form */ + if ((op_loc->mem_ref) && (PPC_OP(raw_insn) != 31)) + op_loc->offset = get_offset_opcode(raw_insn); +} diff --git a/tools/perf/util/dwarf-regs-x86.c b/tools/perf/util/dwarf-regs-x86.c new file mode 100644 index 000000000000..7a55c65e8da6 --- /dev/null +++ b/tools/perf/util/dwarf-regs-x86.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * dwarf-regs.c : Mapping of DWARF debug register numbers into register names. + * Extracted from probe-finder.c + * + * Written by Masami Hiramatsu <mhiramat@redhat.com> + */ + +#include <errno.h> /* for EINVAL */ +#include <string.h> /* for strcmp */ +#include <linux/kernel.h> /* for ARRAY_SIZE */ +#include <dwarf-regs.h> + +struct dwarf_regs_idx { + const char *name; + int idx; +}; + +static const struct dwarf_regs_idx x86_regidx_table[] = { + { "rax", 0 }, { "eax", 0 }, { "ax", 0 }, { "al", 0 }, + { "rdx", 1 }, { "edx", 1 }, { "dx", 1 }, { "dl", 1 }, + { "rcx", 2 }, { "ecx", 2 }, { "cx", 2 }, { "cl", 2 }, + { "rbx", 3 }, { "edx", 3 }, { "bx", 3 }, { "bl", 3 }, + { "rsi", 4 }, { "esi", 4 }, { "si", 4 }, { "sil", 4 }, + { "rdi", 5 }, { "edi", 5 }, { "di", 5 }, { "dil", 5 }, + { "rbp", 6 }, { "ebp", 6 }, { "bp", 6 }, { "bpl", 6 }, + { "rsp", 7 }, { "esp", 7 }, { "sp", 7 }, { "spl", 7 }, + { "r8", 8 }, { "r8d", 8 }, { "r8w", 8 }, { "r8b", 8 }, + { "r9", 9 }, { "r9d", 9 }, { "r9w", 9 }, { "r9b", 9 }, + { "r10", 10 }, { "r10d", 10 }, { "r10w", 10 }, { "r10b", 10 }, + { "r11", 11 }, { "r11d", 11 }, { "r11w", 11 }, { "r11b", 11 }, + { "r12", 12 }, { "r12d", 12 }, { "r12w", 12 }, { "r12b", 12 }, + { "r13", 13 }, { "r13d", 13 }, { "r13w", 13 }, { "r13b", 13 }, + { "r14", 14 }, { "r14d", 14 }, { "r14w", 14 }, { "r14b", 14 }, + { "r15", 15 }, { "r15d", 15 }, { "r15w", 15 }, { "r15b", 15 }, + { "rip", DWARF_REG_PC }, +}; + +int get_x86_regnum(const char *name) +{ + unsigned int i; + + if (*name != '%') + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(x86_regidx_table); i++) + if (!strcmp(x86_regidx_table[i].name, name + 1)) + return x86_regidx_table[i].idx; + return -ENOENT; +} diff --git a/tools/perf/util/dwarf-regs.c b/tools/perf/util/dwarf-regs.c index 5b7f86c0063f..28a1cfdf26d4 100644 --- a/tools/perf/util/dwarf-regs.c +++ b/tools/perf/util/dwarf-regs.c @@ -13,14 +13,6 @@ #include <errno.h> #include <linux/kernel.h> -#ifndef EM_AARCH64 -#define EM_AARCH64 183 /* ARM 64 bit */ -#endif - -#ifndef EM_LOONGARCH -#define EM_LOONGARCH 258 /* LoongArch */ -#endif - /* Define const char * {arch}_register_tbl[] */ #define DEFINE_DWARF_REGSTR_TABLE #include "../arch/x86/include/dwarf-regs-table.h" @@ -28,6 +20,7 @@ #include "../arch/arm64/include/dwarf-regs-table.h" #include "../arch/sh/include/dwarf-regs-table.h" #include "../arch/powerpc/include/dwarf-regs-table.h" +#include "../arch/riscv/include/dwarf-regs-table.h" #include "../arch/s390/include/dwarf-regs-table.h" #include "../arch/sparc/include/dwarf-regs-table.h" #include "../arch/xtensa/include/dwarf-regs-table.h" @@ -37,11 +30,13 @@ #define __get_dwarf_regstr(tbl, n) (((n) < ARRAY_SIZE(tbl)) ? (tbl)[(n)] : NULL) /* Return architecture dependent register string (for kprobe-tracer) */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine) +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags) { + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ - return get_arch_regstr(n); case EM_386: return __get_dwarf_regstr(x86_32_regstr_tbl, n); case EM_X86_64: @@ -50,6 +45,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return __get_dwarf_regstr(arm_regstr_tbl, n); case EM_AARCH64: return __get_dwarf_regstr(aarch64_regstr_tbl, n); + case EM_CSKY: + return get_csky_regstr(n, flags); case EM_SH: return __get_dwarf_regstr(sh_regstr_tbl, n); case EM_S390: @@ -57,6 +54,8 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) case EM_PPC: case EM_PPC64: return __get_dwarf_regstr(powerpc_regstr_tbl, n); + case EM_RISCV: + return __get_dwarf_regstr(riscv_regstr_tbl, n); case EM_SPARC: case EM_SPARCV9: return __get_dwarf_regstr(sparc_regstr_tbl, n); @@ -72,13 +71,15 @@ const char *get_dwarf_regstr(unsigned int n, unsigned int machine) return NULL; } +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 __weak int get_arch_regnum(const char *name __maybe_unused) { return -ENOTSUP; } +#endif /* Return DWARF register number from architecture register name */ -int get_dwarf_regnum(const char *name, unsigned int machine) +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags __maybe_unused) { char *regname = strdup(name); int reg = -1; @@ -92,10 +93,21 @@ int get_dwarf_regnum(const char *name, unsigned int machine) if (p) *p = '\0'; + if (machine == EM_NONE) { + /* Generic arch - use host arch */ + machine = EM_HOST; + } switch (machine) { - case EM_NONE: /* Generic arch - use host arch */ +#if EM_HOST != EM_X86_64 && EM_HOST != EM_386 + case EM_HOST: reg = get_arch_regnum(regname); break; +#endif + case EM_X86_64: + fallthrough; + case EM_386: + reg = get_x86_regnum(regname); + break; default: pr_err("ELF MACHINE %x is not supported.\n", machine); } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 1edbccfc3281..e2843ca2edd9 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -5,12 +5,14 @@ #include "util/header.h" #include "linux/compiler.h" #include <linux/ctype.h> +#include <linux/string.h> #include <linux/zalloc.h> #include "cgroup.h" #include <errno.h> #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "pmu.h" #include "pmus.h" #include "strbuf.h" #include "trace/beauty/beauty.h" @@ -372,7 +374,8 @@ error: int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; - int err = get_cpuid(cpuid, sizeof(cpuid)); + struct perf_cpu cpu = {-1}; + int err = get_cpuid(cpuid, sizeof(cpuid), cpu); if (err) return err; @@ -639,3 +642,25 @@ void perf_env__find_br_cntr_info(struct perf_env *env, env->pmu_caps->br_cntr_width; } } + +bool perf_env__is_x86_amd_cpu(struct perf_env *env) +{ + static int is_amd; /* 0: Uninitialized, 1: Yes, -1: No */ + + if (is_amd == 0) + is_amd = env->cpuid && strstarts(env->cpuid, "AuthenticAMD") ? 1 : -1; + + return is_amd >= 1 ? true : false; +} + +bool x86__is_amd_cpu(void) +{ + struct perf_env env = { .total_mem = 0, }; + bool is_amd; + + perf_env__cpuid(&env); + is_amd = perf_env__is_x86_amd_cpu(&env); + perf_env__exit(&env); + + return is_amd; +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 51b36c36019b..ae604c4edbb7 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -195,4 +195,8 @@ bool perf_env__has_pmu_mapping(struct perf_env *env, const char *pmu_name); void perf_env__find_br_cntr_info(struct perf_env *env, unsigned int *nr, unsigned int *width); + +bool x86__is_amd_cpu(void); +bool perf_env__is_x86_amd_cpu(struct perf_env *env); + #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index f8742e6230a5..2744c54f404e 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -66,6 +66,7 @@ enum { PERF_IP_FLAG_VMEXIT = 1ULL << 12, PERF_IP_FLAG_INTR_DISABLE = 1ULL << 13, PERF_IP_FLAG_INTR_TOGGLE = 1ULL << 14, + PERF_IP_FLAG_BRANCH_MISS = 1ULL << 15, }; #define PERF_IP_FLAG_CHARS "bcrosyiABExghDt" diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f14b7e6ff1dc..f0dd174e2deb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -48,6 +48,7 @@ #include <sys/mman.h> #include <sys/prctl.h> #include <sys/timerfd.h> +#include <sys/wait.h> #include <linux/bitops.h> #include <linux/hash.h> @@ -319,62 +320,6 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide) } #endif -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - struct evsel *evsel, *n; - LIST_HEAD(head); - size_t i; - - for (i = 0; i < nr_attrs; i++) { - evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i); - if (evsel == NULL) - goto out_delete_partial_list; - list_add_tail(&evsel->core.node, &head); - } - - evlist__splice_list_tail(evlist, &head); - - return 0; - -out_delete_partial_list: - __evlist__for_each_entry_safe(&head, n, evsel) - evsel__delete(evsel); - return -1; -} - -int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs) -{ - size_t i; - - for (i = 0; i < nr_attrs; i++) - event_attr_init(attrs + i); - - return evlist__add_attrs(evlist, attrs, nr_attrs); -} - -__weak int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs) -{ - if (!nr_attrs) - return 0; - - return __evlist__add_default_attrs(evlist, attrs, nr_attrs); -} - -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT && - (int)evsel->core.attr.config == id) - return evsel; - } - - return NULL; -} - struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name) { struct evsel *evsel; @@ -1199,11 +1144,6 @@ int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) return ret; } -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid) -{ - return evlist__set_tp_filter_pids(evlist, 1, &pid); -} - int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids) { char *filter = asprintf__tp_filter_pids(npids, pids); @@ -1484,6 +1424,8 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const int child_ready_pipe[2], go_pipe[2]; char bf; + evlist->workload.cork_fd = -1; + if (pipe(child_ready_pipe) < 0) { perror("failed to create 'ready' pipe"); return -1; @@ -1536,7 +1478,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() - * here. + * here (See evlist__cancel_workload()). */ if (ret != 1) { if (ret == -1) @@ -1600,7 +1542,7 @@ out_close_ready_pipe: int evlist__start_workload(struct evlist *evlist) { - if (evlist->workload.cork_fd > 0) { + if (evlist->workload.cork_fd >= 0) { char bf = 0; int ret; /* @@ -1611,12 +1553,24 @@ int evlist__start_workload(struct evlist *evlist) perror("unable to write to pipe"); close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; return ret; } return 0; } +void evlist__cancel_workload(struct evlist *evlist) +{ + int status; + + if (evlist->workload.cork_fd >= 0) { + close(evlist->workload.cork_fd); + evlist->workload.cork_fd = -1; + waitpid(evlist->workload.pid, &status, WNOHANG); + } +} + int evlist__parse_sample(struct evlist *evlist, union perf_event *event, struct perf_sample *sample) { struct evsel *evsel = evlist__event2evsel(evlist, event); @@ -2619,7 +2573,8 @@ void evlist__uniquify_name(struct evlist *evlist) else attributes = empty_attributes; - if (asprintf(&new_name, "%s/%s/%s", pos->pmu_name, pos->name, attributes + 1)) { + if (asprintf(&new_name, "%s/%s/%s", pos->pmu ? pos->pmu->name : "", + pos->name, attributes + 1)) { free(pos->name); pos->name = new_name; } else { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index bcc1c6984bb5..adddb1db1ad2 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -102,18 +102,6 @@ void evlist__delete(struct evlist *evlist); void evlist__add(struct evlist *evlist, struct evsel *entry); void evlist__remove(struct evlist *evlist, struct evsel *evsel); -int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); - -int __evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, size_t nr_attrs); - -int arch_evlist__add_default_attrs(struct evlist *evlist, - struct perf_event_attr *attrs, - size_t nr_attrs); - -#define evlist__add_default_attrs(evlist, array) \ - arch_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array)) - int arch_evlist__cmp(const struct evsel *lhs, const struct evsel *rhs); int evlist__add_dummy(struct evlist *evlist); @@ -144,7 +132,6 @@ int __evlist__set_tracepoints_handlers(struct evlist *evlist, __evlist__set_tracepoints_handlers(evlist, array, ARRAY_SIZE(array)) int evlist__set_tp_filter(struct evlist *evlist, const char *filter); -int evlist__set_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__set_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); int evlist__append_tp_filter(struct evlist *evlist, const char *filter); @@ -152,7 +139,6 @@ int evlist__append_tp_filter(struct evlist *evlist, const char *filter); int evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid); int evlist__append_tp_filter_pids(struct evlist *evlist, size_t npids, pid_t *pids); -struct evsel *evlist__find_tracepoint_by_id(struct evlist *evlist, int id); struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char *name); int evlist__add_pollfd(struct evlist *evlist, int fd); @@ -186,6 +172,7 @@ int evlist__prepare_workload(struct evlist *evlist, struct target *target, const char *argv[], bool pipe_output, void (*exec_error)(int signo, siginfo_t *info, void *ucontext)); int evlist__start_workload(struct evlist *evlist); +void evlist__cancel_workload(struct evlist *evlist); struct option; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index dbf9c8cee3c5..f745723d486b 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -5,12 +5,16 @@ * Parts came from builtin-{top,stat,record}.c, see those files for further * copyright notes. */ +/* + * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select + * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu. + */ +#define __SANE_USERSPACE_TYPES__ #include <byteswap.h> #include <errno.h> #include <inttypes.h> #include <linux/bitops.h> -#include <api/io.h> #include <api/fs/fs.h> #include <api/fs/tracing_path.h> #include <linux/hw_breakpoint.h> @@ -20,6 +24,7 @@ #include <linux/zalloc.h> #include <sys/ioctl.h> #include <sys/resource.h> +#include <sys/syscall.h> #include <sys/types.h> #include <dirent.h> #include <stdlib.h> @@ -51,6 +56,8 @@ #include "off_cpu.h" #include "pmu.h" #include "pmus.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "rlimit.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" @@ -64,46 +71,135 @@ #include <linux/ctype.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif struct perf_missing_features perf_missing_features; static clockid_t clockid; -static const char *const perf_tool_event__tool_names[PERF_TOOL_MAX] = { - NULL, - "duration_time", - "user_time", - "system_time", -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev) +static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) { - if (ev > PERF_TOOL_NONE && ev < PERF_TOOL_MAX) - return perf_tool_event__tool_names[ev]; - - return NULL; + return 0; } -enum perf_tool_event perf_tool_event__from_str(const char *str) +static bool test_attr__enabled(void) { - int i; + static bool test_attr__enabled; + static bool test_attr__enabled_tested; + + if (!test_attr__enabled_tested) { + char *dir = getenv("PERF_TEST_ATTR"); - perf_tool_event__for_each_event(i) { - if (!strcmp(str, perf_tool_event__tool_names[i])) - return i; + test_attr__enabled = (dir != NULL); + test_attr__enabled_tested = true; } - return PERF_TOOL_NONE; + return test_attr__enabled; } +#define __WRITE_ASS(str, fmt, data) \ +do { \ + if (fprintf(file, #str "=%"fmt "\n", data) < 0) { \ + perror("test attr - failed to write event file"); \ + fclose(file); \ + return -1; \ + } \ +} while (0) -static int evsel__no_extra_init(struct evsel *evsel __maybe_unused) +#define WRITE_ASS(field, fmt) __WRITE_ASS(field, fmt, attr->field) + +static int store_event(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) { + FILE *file; + char path[PATH_MAX]; + char *dir = getenv("PERF_TEST_ATTR"); + + snprintf(path, PATH_MAX, "%s/event-%d-%llu-%d", dir, + attr->type, attr->config, fd); + + file = fopen(path, "w+"); + if (!file) { + perror("test attr - failed to open event file"); + return -1; + } + + if (fprintf(file, "[event-%d-%llu-%d]\n", + attr->type, attr->config, fd) < 0) { + perror("test attr - failed to write event file"); + fclose(file); + return -1; + } + + /* syscall arguments */ + __WRITE_ASS(fd, "d", fd); + __WRITE_ASS(group_fd, "d", group_fd); + __WRITE_ASS(cpu, "d", cpu.cpu); + __WRITE_ASS(pid, "d", pid); + __WRITE_ASS(flags, "lu", flags); + + /* struct perf_event_attr */ + WRITE_ASS(type, PRIu32); + WRITE_ASS(size, PRIu32); + WRITE_ASS(config, "llu"); + WRITE_ASS(sample_period, "llu"); + WRITE_ASS(sample_type, "llu"); + WRITE_ASS(read_format, "llu"); + WRITE_ASS(disabled, "d"); + WRITE_ASS(inherit, "d"); + WRITE_ASS(pinned, "d"); + WRITE_ASS(exclusive, "d"); + WRITE_ASS(exclude_user, "d"); + WRITE_ASS(exclude_kernel, "d"); + WRITE_ASS(exclude_hv, "d"); + WRITE_ASS(exclude_idle, "d"); + WRITE_ASS(mmap, "d"); + WRITE_ASS(comm, "d"); + WRITE_ASS(freq, "d"); + WRITE_ASS(inherit_stat, "d"); + WRITE_ASS(enable_on_exec, "d"); + WRITE_ASS(task, "d"); + WRITE_ASS(watermark, "d"); + WRITE_ASS(precise_ip, "d"); + WRITE_ASS(mmap_data, "d"); + WRITE_ASS(sample_id_all, "d"); + WRITE_ASS(exclude_host, "d"); + WRITE_ASS(exclude_guest, "d"); + WRITE_ASS(exclude_callchain_kernel, "d"); + WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); + WRITE_ASS(wakeup_events, PRIu32); + WRITE_ASS(bp_type, PRIu32); + WRITE_ASS(config1, "llu"); + WRITE_ASS(config2, "llu"); + WRITE_ASS(branch_sample_type, "llu"); + WRITE_ASS(sample_regs_user, "llu"); + WRITE_ASS(sample_stack_user, PRIu32); + + fclose(file); return 0; } -void __weak test_attr__ready(void) { } +#undef __WRITE_ASS +#undef WRITE_ASS + +static void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, + int fd, int group_fd, unsigned long flags) +{ + int errno_saved = errno; + + if ((fd != -1) && store_event(attr, pid, cpu, fd, group_fd, flags)) { + pr_err("test attr FAILED"); + exit(128); + } + + errno = errno_saved; +} static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused) { @@ -296,9 +392,9 @@ void evsel__init(struct evsel *evsel, evsel->metric_events = NULL; evsel->per_pkg_mask = NULL; evsel->collect_stat = false; - evsel->pmu_name = NULL; evsel->group_pmu_name = NULL; evsel->skippable = false; + evsel->alternate_hw_config = PERF_COUNT_HW_MAX; } struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx) @@ -393,11 +489,6 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel->group_name == NULL) goto out_err; } - if (orig->pmu_name) { - evsel->pmu_name = strdup(orig->pmu_name); - if (evsel->pmu_name == NULL) - goto out_err; - } if (orig->group_pmu_name) { evsel->group_pmu_name = strdup(orig->group_pmu_name); if (evsel->group_pmu_name == NULL) @@ -421,7 +512,6 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->core.leader = orig->core.leader; evsel->max_events = orig->max_events; - evsel->tool_event = orig->tool_event; free((char *)evsel->unit); evsel->unit = strdup(orig->unit); if (evsel->unit == NULL) @@ -445,6 +535,8 @@ struct evsel *evsel__clone(struct evsel *orig) if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; + evsel->alternate_hw_config = orig->alternate_hw_config; + return evsel; out_err: @@ -548,7 +640,6 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) { int colon = 0, r = 0; struct perf_event_attr *attr = &evsel->core.attr; - bool exclude_guest_default = false; #define MOD_PRINT(context, mod) do { \ if (!attr->exclude_##context) { \ @@ -560,17 +651,15 @@ static int evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size) MOD_PRINT(kernel, 'k'); MOD_PRINT(user, 'u'); MOD_PRINT(hv, 'h'); - exclude_guest_default = true; } if (attr->precise_ip) { if (!colon) colon = ++r; r += scnprintf(bf + r, size - r, "%.*s", attr->precise_ip, "ppp"); - exclude_guest_default = true; } - if (attr->exclude_host || attr->exclude_guest == exclude_guest_default) { + if (attr->exclude_host || attr->exclude_guest) { MOD_PRINT(host, 'H'); MOD_PRINT(guest, 'G'); } @@ -617,11 +706,6 @@ static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size) return r + evsel__add_modifiers(evsel, bf + r, size - r); } -static int evsel__tool_name(enum perf_tool_event ev, char *bf, size_t size) -{ - return scnprintf(bf, size, "%s", perf_tool_event__to_str(ev)); -} - static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type) { int r; @@ -772,10 +856,7 @@ const char *evsel__name(struct evsel *evsel) break; case PERF_TYPE_SOFTWARE: - if (evsel__is_tool(evsel)) - evsel__tool_name(evsel__tool_event(evsel), bf, sizeof(bf)); - else - evsel__sw_name(evsel, bf, sizeof(bf)); + evsel__sw_name(evsel, bf, sizeof(bf)); break; case PERF_TYPE_TRACEPOINT: @@ -786,6 +867,10 @@ const char *evsel__name(struct evsel *evsel) evsel__bp_name(evsel, bf, sizeof(bf)); break; + case PERF_PMU_TYPE_TOOL: + scnprintf(bf, sizeof(bf), "%s", evsel__tool_pmu_event_name(evsel)); + break; + default: scnprintf(bf, sizeof(bf), "unknown attr type: %d", evsel->core.attr.type); @@ -811,7 +896,7 @@ const char *evsel__metric_id(const struct evsel *evsel) return evsel->metric_id; if (evsel__is_tool(evsel)) - return perf_tool_event__to_str(evsel__tool_event(evsel)); + return evsel__tool_pmu_event_name(evsel); return "unknown"; } @@ -862,7 +947,6 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o { bool function = evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->core.attr; - const char *arch = perf_env__arch(evsel__env(evsel)); evsel__set_sample_bit(evsel, CALLCHAIN); @@ -893,6 +977,8 @@ static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *o if (param->record_mode == CALLCHAIN_DWARF) { if (!function) { + const char *arch = perf_env__arch(evsel__env(evsel)); + evsel__set_sample_bit(evsel, REGS_USER); evsel__set_sample_bit(evsel, STACK_USER); if (opts->sample_user_regs && @@ -1150,7 +1236,7 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, bool per_cpu = opts->target.default_per_cpu && !opts->target.per_thread; attr->sample_id_all = perf_missing_features.sample_id_all ? 0 : 1; - attr->inherit = !opts->no_inherit; + attr->inherit = target__has_cpu(&opts->target) ? 0 : !opts->no_inherit; attr->write_backward = opts->overwrite ? 1 : 0; attr->read_format = PERF_FORMAT_LOST; @@ -1172,7 +1258,15 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts, */ if (leader->core.nr_members > 1) { attr->read_format |= PERF_FORMAT_GROUP; - attr->inherit = 0; + } + + /* + * Inherit + SAMPLE_READ requires SAMPLE_TID in the read_format + */ + if (attr->inherit) { + evsel__set_sample_bit(evsel, TID); + evsel->core.attr.read_format |= + PERF_FORMAT_ID; } } @@ -1494,7 +1588,6 @@ void evsel__exit(struct evsel *evsel) zfree(&evsel->group_name); zfree(&evsel->name); zfree(&evsel->filter); - zfree(&evsel->pmu_name); zfree(&evsel->group_pmu_name); zfree(&evsel->unit); zfree(&evsel->metric_id); @@ -1503,8 +1596,8 @@ void evsel__exit(struct evsel *evsel) evsel->per_pkg_mask = NULL; zfree(&evsel->metric_events); perf_evsel__object.fini(evsel); - if (evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) + if (evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) xyarray__delete(evsel->start_times); } @@ -1684,171 +1777,31 @@ static int evsel__read_group(struct evsel *leader, int cpu_map_idx, int thread) return evsel__process_group_data(leader, cpu_map_idx, thread, data); } -static bool read_until_char(struct io *io, char e) -{ - int c; - - do { - c = io__get_char(io); - if (c == -1) - return false; - } while (c != e); - return true; -} - -static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) -{ - char buf[256]; - struct io io; - int i; - - io__init(&io, fd, buf, sizeof(buf)); - - /* Skip lines to relevant CPU. */ - for (i = -1; i < cpu.cpu; i++) { - if (!read_until_char(&io, '\n')) - return -EINVAL; - } - /* Skip to "cpu". */ - if (io__get_char(&io) != 'c') return -EINVAL; - if (io__get_char(&io) != 'p') return -EINVAL; - if (io__get_char(&io) != 'u') return -EINVAL; - - /* Skip N of cpuN. */ - if (!read_until_char(&io, ' ')) - return -EINVAL; - - i = 1; - while (true) { - if (io__get_dec(&io, val) != ' ') - break; - if (field == i) - return 0; - i++; - } - return -EINVAL; -} - -static int read_pid_stat_field(int fd, int field, __u64 *val) -{ - char buf[256]; - struct io io; - int c, i; - - io__init(&io, fd, buf, sizeof(buf)); - if (io__get_dec(&io, val) != ' ') - return -EINVAL; - if (field == 1) - return 0; - - /* Skip comm. */ - if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) - return -EINVAL; - if (field == 2) - return -EINVAL; /* String can't be returned. */ - - /* Skip state */ - if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) - return -EINVAL; - if (field == 3) - return -EINVAL; /* String can't be returned. */ - - /* Loop over numeric fields*/ - if (io__get_char(&io) != ' ') - return -EINVAL; - - i = 4; - while (true) { - c = io__get_dec(&io, val); - if (c == -1) - return -EINVAL; - if (c == -2) { - /* Assume a -ve was read */ - c = io__get_dec(&io, val); - *val *= -1; - } - if (c != ' ') - return -EINVAL; - if (field == i) - return 0; - i++; - } - return -EINVAL; -} - -static int evsel__read_tool(struct evsel *evsel, int cpu_map_idx, int thread) +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) { - __u64 *start_time, cur_time, delta_start; - int fd, err = 0; - struct perf_counts_values *count; - bool adjust = false; - count = perf_counts(evsel->counts, cpu_map_idx, thread); + u32 e_type = evsel->core.attr.type; + u64 e_config = evsel->core.attr.config; - switch (evsel__tool_event(evsel)) { - case PERF_TOOL_DURATION_TIME: - /* - * Pretend duration_time is only on the first CPU and thread, or - * else aggregation will scale duration_time by the number of - * CPUs/threads. - */ - start_time = &evsel->start_time; - if (cpu_map_idx == 0 && thread == 0) - cur_time = rdclock(); - else - cur_time = *start_time; - break; - case PERF_TOOL_USER_TIME: - case PERF_TOOL_SYSTEM_TIME: { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - - start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); - fd = FD(evsel, cpu_map_idx, thread); - lseek(fd, SEEK_SET, 0); - if (evsel->pid_stat) { - /* The event exists solely on 1 CPU. */ - if (cpu_map_idx == 0) - err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); - else - cur_time = 0; - } else { - /* The event is for all threads. */ - if (thread == 0) { - struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, - cpu_map_idx); - - err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); - } else { - cur_time = 0; - } - } - adjust = true; - break; - } - case PERF_TOOL_NONE: - case PERF_TOOL_MAX: - default: - err = -EINVAL; + if (e_type != type) { + return type == PERF_TYPE_HARDWARE && evsel->pmu && evsel->pmu->is_core && + evsel->alternate_hw_config == config; } - if (err) - return err; - delta_start = cur_time - *start_time; - if (adjust) { - __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && + perf_pmus__supports_extended_type()) + e_config &= PERF_HW_EVENT_MASK; - delta_start *= 1000000000 / ticks_per_sec; - } - count->val = delta_start; - count->ena = count->run = delta_start; - count->lost = 0; - return 0; + return e_config == config; } int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread) { if (evsel__is_tool(evsel)) - return evsel__read_tool(evsel, cpu_map_idx, thread); + return evsel__tool_pmu_read(evsel, cpu_map_idx, thread); + + if (evsel__is_hwmon(evsel)) + return evsel__hwmon_pmu_read(evsel, cpu_map_idx, thread); if (evsel__is_retire_lat(evsel)) return evsel__read_retire_lat(evsel, cpu_map_idx, thread); @@ -2042,6 +1995,7 @@ static struct perf_thread_map *empty_thread_map; static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads) { + int ret = 0; int nthreads = perf_thread_map__nr(threads); if ((perf_missing_features.write_backward && evsel->core.attr.write_backward) || @@ -2072,23 +2026,21 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, perf_evsel__alloc_fd(&evsel->core, perf_cpu_map__nr(cpus), nthreads) < 0) return -ENOMEM; - if ((evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME || - evsel__tool_event(evsel) == PERF_TOOL_USER_TIME) && - !evsel->start_times) { - evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), nthreads, sizeof(__u64)); - if (!evsel->start_times) - return -ENOMEM; - } + if (evsel__is_tool(evsel)) + ret = evsel__tool_pmu_prepare_open(evsel, cpus, nthreads); evsel->open_flags = PERF_FLAG_FD_CLOEXEC; if (evsel->cgrp) evsel->open_flags |= PERF_FLAG_PID_CGROUP; - return 0; + return ret; } static void evsel__disable_missing_features(struct evsel *evsel) { + if (perf_missing_features.inherit_sample_read && evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ)) + evsel->core.attr.inherit = 0; if (perf_missing_features.branch_counters) evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS; if (perf_missing_features.read_lost) @@ -2138,120 +2090,346 @@ int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, return err; } -bool evsel__detect_missing_features(struct evsel *evsel) +static bool has_attr_feature(struct perf_event_attr *attr, unsigned long flags) +{ + int fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + + if (fd < 0) { + attr->exclude_kernel = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_hv = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + if (fd < 0) { + attr->exclude_guest = 1; + + fd = syscall(SYS_perf_event_open, attr, /*pid=*/0, /*cpu=*/-1, + /*group_fd=*/-1, flags); + close(fd); + } + + attr->exclude_kernel = 0; + attr->exclude_guest = 0; + attr->exclude_hv = 0; + + return fd >= 0; +} + +static void evsel__detect_missing_pmu_features(struct evsel *evsel) { + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + }; + struct perf_pmu *pmu = evsel->pmu; + int old_errno; + + old_errno = errno; + + if (pmu == NULL) + pmu = evsel->pmu = evsel__find_pmu(evsel); + + if (pmu == NULL || pmu->missing_features.checked) + goto out; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are kernel core limitation but + * specific to PMUs with branch stack. So we can detect with the given + * hardware event and stop on the first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.exclude_guest = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + pmu->missing_features.exclude_guest = true; + pr_debug2("switching off exclude_guest for PMU %s\n", pmu->name); + +found: + pmu->missing_features.checked = true; +out: + errno = old_errno; +} + +static void evsel__detect_missing_brstack_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = evsel->core.attr.type, + .config = evsel->core.attr.config, + .disabled = 1, + .sample_type = PERF_SAMPLE_BRANCH_STACK, + .sample_period = 1000, + }; + int old_errno; + + if (detection_done) + return; + + old_errno = errno; + + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. These are PMU specific limitation + * so we can detect with the given hardware event and stop on the + * first one succeeded. + */ + + /* Please add new feature detection here. */ + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_COUNTERS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_counters = true; + pr_debug2("switching off branch counters support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_HW_INDEX; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.branch_hw_idx = true; + pr_debug2("switching off branch HW index support\n"); + + attr.branch_sample_type = PERF_SAMPLE_BRANCH_NO_CYCLES | PERF_SAMPLE_BRANCH_NO_FLAGS; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.lbr_flags = true; + pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + +found: + detection_done = true; + errno = old_errno; +} + +static bool evsel__detect_missing_features(struct evsel *evsel) +{ + static bool detection_done = false; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_TASK_CLOCK, + .disabled = 1, + }; + int old_errno; + + evsel__detect_missing_pmu_features(evsel); + + if (evsel__has_br_stack(evsel)) + evsel__detect_missing_brstack_features(evsel); + + if (detection_done) + goto check; + + old_errno = errno; + /* * Must probe features in the order they were added to the - * perf_event_attr interface. + * perf_event_attr interface. These are kernel core limitation + * not PMU-specific so we can detect with a software event and + * stop on the first one succeeded. */ - if (!perf_missing_features.branch_counters && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) { - perf_missing_features.branch_counters = true; - pr_debug2("switching off branch counters support\n"); + + /* Please add new feature detection here. */ + + attr.inherit = true; + attr.sample_type = PERF_SAMPLE_READ; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.inherit_sample_read = true; + pr_debug2("Using PERF_SAMPLE_READ / :S modifier is not compatible with inherit, falling back to no-inherit.\n"); + attr.inherit = false; + attr.sample_type = 0; + + attr.read_format = PERF_FORMAT_LOST; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.read_lost = true; + pr_debug2("switching off PERF_FORMAT_LOST support\n"); + attr.read_format = 0; + + attr.sample_type = PERF_SAMPLE_WEIGHT_STRUCT; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.weight_struct = true; + pr_debug2("switching off weight struct support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_CODE_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.code_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.sample_type = PERF_SAMPLE_DATA_PAGE_SIZE; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.data_page_size = true; + pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support\n"); + attr.sample_type = 0; + + attr.cgroup = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.cgroup = true; + pr_debug2_peo("Kernel has no cgroup sampling support\n"); + attr.cgroup = 0; + + attr.aux_output = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.aux_output = true; + pr_debug2_peo("Kernel has no attr.aux_output support\n"); + attr.aux_output = 0; + + attr.bpf_event = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.bpf = true; + pr_debug2_peo("switching off bpf_event\n"); + attr.bpf_event = 0; + + attr.ksymbol = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.ksymbol = true; + pr_debug2_peo("switching off ksymbol\n"); + attr.ksymbol = 0; + + attr.write_backward = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.write_backward = true; + pr_debug2_peo("switching off write_backward\n"); + attr.write_backward = 0; + + attr.use_clockid = 1; + attr.clockid = CLOCK_MONOTONIC; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.clockid = true; + pr_debug2_peo("switching off clockid\n"); + attr.use_clockid = 0; + attr.clockid = 0; + + if (has_attr_feature(&attr, /*flags=*/PERF_FLAG_FD_CLOEXEC)) + goto found; + perf_missing_features.cloexec = true; + pr_debug2_peo("switching off cloexec flag\n"); + + attr.mmap2 = 1; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.mmap2 = true; + pr_debug2_peo("switching off mmap2\n"); + attr.mmap2 = 0; + + /* set this unconditionally? */ + perf_missing_features.sample_id_all = true; + pr_debug2_peo("switching off sample_id_all\n"); + + attr.inherit = 1; + attr.read_format = PERF_FORMAT_GROUP; + if (has_attr_feature(&attr, /*flags=*/0)) + goto found; + perf_missing_features.group_read = true; + pr_debug2_peo("switching off group read\n"); + attr.inherit = 0; + attr.read_format = 0; + +found: + detection_done = true; + errno = old_errno; + +check: + if (evsel->core.attr.inherit && + (evsel->core.attr.sample_type & PERF_SAMPLE_READ) && + perf_missing_features.inherit_sample_read) return true; - } else if (!perf_missing_features.read_lost && - (evsel->core.attr.read_format & PERF_FORMAT_LOST)) { - perf_missing_features.read_lost = true; - pr_debug2("switching off PERF_FORMAT_LOST support\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) && + perf_missing_features.branch_counters) return true; - } else if (!perf_missing_features.weight_struct && - (evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT)) { - perf_missing_features.weight_struct = true; - pr_debug2("switching off weight struct support\n"); + + if ((evsel->core.attr.read_format & PERF_FORMAT_LOST) && + perf_missing_features.read_lost) return true; - } else if (!perf_missing_features.code_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)) { - perf_missing_features.code_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_CODE_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.data_page_size && - (evsel->core.attr.sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)) { - perf_missing_features.data_page_size = true; - pr_debug2_peo("Kernel has no PERF_SAMPLE_DATA_PAGE_SIZE support, bailing out\n"); - return false; - } else if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) { - perf_missing_features.cgroup = true; - pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n"); - return false; - } else if (!perf_missing_features.branch_hw_idx && - (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) { - perf_missing_features.branch_hw_idx = true; - pr_debug2("switching off branch HW index support\n"); + + if ((evsel->core.attr.sample_type & PERF_SAMPLE_WEIGHT_STRUCT) && + perf_missing_features.weight_struct) return true; - } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) { - perf_missing_features.aux_output = true; - pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n"); - return false; - } else if (!perf_missing_features.bpf && evsel->core.attr.bpf_event) { - perf_missing_features.bpf = true; - pr_debug2_peo("switching off bpf_event\n"); + + if (evsel->core.attr.use_clockid && evsel->core.attr.clockid != CLOCK_MONOTONIC && + !perf_missing_features.clockid) { + perf_missing_features.clockid_wrong = true; return true; - } else if (!perf_missing_features.ksymbol && evsel->core.attr.ksymbol) { - perf_missing_features.ksymbol = true; - pr_debug2_peo("switching off ksymbol\n"); + } + + if (evsel->core.attr.use_clockid && perf_missing_features.clockid) return true; - } else if (!perf_missing_features.write_backward && evsel->core.attr.write_backward) { - perf_missing_features.write_backward = true; - pr_debug2_peo("switching off write_backward\n"); - return false; - } else if (!perf_missing_features.clockid_wrong && evsel->core.attr.use_clockid) { - perf_missing_features.clockid_wrong = true; - pr_debug2_peo("switching off clockid\n"); + + if ((evsel->open_flags & PERF_FLAG_FD_CLOEXEC) && + perf_missing_features.cloexec) return true; - } else if (!perf_missing_features.clockid && evsel->core.attr.use_clockid) { - perf_missing_features.clockid = true; - pr_debug2_peo("switching off use_clockid\n"); + + if (evsel->core.attr.mmap2 && perf_missing_features.mmap2) return true; - } else if (!perf_missing_features.cloexec && (evsel->open_flags & PERF_FLAG_FD_CLOEXEC)) { - perf_missing_features.cloexec = true; - pr_debug2_peo("switching off cloexec flag\n"); + + if ((evsel->core.attr.branch_sample_type & (PERF_SAMPLE_BRANCH_NO_FLAGS | + PERF_SAMPLE_BRANCH_NO_CYCLES)) && + perf_missing_features.lbr_flags) return true; - } else if (!perf_missing_features.mmap2 && evsel->core.attr.mmap2) { - perf_missing_features.mmap2 = true; - pr_debug2_peo("switching off mmap2\n"); + + if (evsel->core.attr.inherit && (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && + perf_missing_features.group_read) return true; - } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { - if (evsel->pmu == NULL) - evsel->pmu = evsel__find_pmu(evsel); - - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } - if (evsel->exclude_GH) { - pr_debug2_peo("PMU has no exclude_host/guest support, bailing out\n"); - return false; - } - if (!perf_missing_features.exclude_guest) { - perf_missing_features.exclude_guest = true; - pr_debug2_peo("switching off exclude_guest, exclude_host\n"); - } + if (evsel->core.attr.ksymbol && perf_missing_features.ksymbol) return true; - } else if (!perf_missing_features.sample_id_all) { - perf_missing_features.sample_id_all = true; - pr_debug2_peo("switching off sample_id_all\n"); + + if (evsel->core.attr.bpf_event && perf_missing_features.bpf) return true; - } else if (!perf_missing_features.lbr_flags && - (evsel->core.attr.branch_sample_type & - (PERF_SAMPLE_BRANCH_NO_CYCLES | - PERF_SAMPLE_BRANCH_NO_FLAGS))) { - perf_missing_features.lbr_flags = true; - pr_debug2_peo("switching off branch sample type no (cycles/flags)\n"); + + if ((evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) && + perf_missing_features.branch_hw_idx) return true; - } else if (!perf_missing_features.group_read && - evsel->core.attr.inherit && - (evsel->core.attr.read_format & PERF_FORMAT_GROUP) && - evsel__is_group_leader(evsel)) { - perf_missing_features.group_read = true; - pr_debug2_peo("switching off group read\n"); + + if (evsel->core.attr.sample_id_all && perf_missing_features.sample_id_all) + return true; + + return false; +} + +static bool evsel__handle_error_quirks(struct evsel *evsel, int error) +{ + /* + * AMD core PMU tries to forward events with precise_ip to IBS PMU + * implicitly. But IBS PMU has more restrictions so it can fail with + * supported event attributes. Let's forward it back to the core PMU + * by clearing precise_ip only if it's from precise_max (:P). + */ + if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && + evsel->core.attr.precise_ip && evsel->precise_max) { + evsel->core.attr.precise_ip = 0; + pr_debug2_peo("removing precise_ip on AMD\n"); + display_attr(&evsel->core.attr); return true; - } else { - return false; } + + return false; } static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, @@ -2262,13 +2440,6 @@ static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int pid = -1, err, old_errno; enum rlimit_action set_rlimit = NO_CHANGE; - if (evsel__tool_event(evsel) == PERF_TOOL_DURATION_TIME) { - if (evsel->core.attr.sample_period) /* no sampling */ - return -EINVAL; - evsel->start_time = rdclock(); - return 0; - } - if (evsel__is_retire_lat(evsel)) return tpebs_start(evsel->evlist); @@ -2293,6 +2464,17 @@ fallback_missing_features: pr_debug3("Opening: %s\n", evsel__name(evsel)); display_attr(&evsel->core.attr); + if (evsel__is_tool(evsel)) { + return evsel__tool_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + if (evsel__is_hwmon(evsel)) { + return evsel__hwmon_pmu_open(evsel, threads, + start_cpu_map_idx, + end_cpu_map_idx); + } + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { for (thread = 0; thread < nthreads; thread++) { @@ -2304,46 +2486,6 @@ retry_open: if (!evsel->cgrp && !evsel->core.system_wide) pid = perf_thread_map__pid(threads, thread); - if (evsel__tool_event(evsel) == PERF_TOOL_USER_TIME || - evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME) { - bool system = evsel__tool_event(evsel) == PERF_TOOL_SYSTEM_TIME; - __u64 *start_time = NULL; - - if (evsel->core.attr.sample_period) { - /* no sampling */ - err = -EINVAL; - goto out_close; - } - if (pid > -1) { - char buf[64]; - - snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); - fd = open(buf, O_RDONLY); - evsel->pid_stat = true; - } else { - fd = open("/proc/stat", O_RDONLY); - } - FD(evsel, idx, thread) = fd; - if (fd < 0) { - err = -errno; - goto out_close; - } - start_time = xyarray__entry(evsel->start_times, idx, thread); - if (pid > -1) { - err = read_pid_stat_field(fd, system ? 15 : 14, - start_time); - } else { - struct perf_cpu cpu; - - cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); - err = read_stat_field(fd, cpu, system ? 3 : 1, - start_time); - } - if (err) - goto out_close; - continue; - } - group_fd = get_group_fd(evsel, idx, thread); if (group_fd == -2) { @@ -2352,8 +2494,6 @@ retry_open: goto out_close; } - test_attr__ready(); - /* Debug message used by test scripts */ pr_debug2_peo("sys_perf_event_open: pid %d cpu %d group_fd %d flags %#lx", pid, perf_cpu_map__cpu(cpus, idx).cpu, group_fd, evsel->open_flags); @@ -2374,7 +2514,7 @@ retry_open: bpf_counter__install_pe(evsel, idx, fd); - if (unlikely(test_attr__enabled)) { + if (unlikely(test_attr__enabled())) { test_attr__open(&evsel->core.attr, pid, perf_cpu_map__cpu(cpus, idx), fd, group_fd, evsel->open_flags); @@ -2415,9 +2555,6 @@ retry_open: return 0; try_fallback: - if (evsel__precise_ip_fallback(evsel)) - goto retry_open; - if (evsel__ignore_missing_thread(evsel, perf_cpu_map__nr(cpus), idx, threads, thread, err)) { /* We just removed 1 thread, so lower the upper nthreads limit. */ @@ -2434,11 +2571,15 @@ try_fallback: if (err == -EMFILE && rlimit__increase_nofile(&set_rlimit)) goto retry_open; - if (err != -EINVAL || idx > 0 || thread > 0) - goto out_close; + if (err == -EOPNOTSUPP && evsel__precise_ip_fallback(evsel)) + goto retry_open; - if (evsel__detect_missing_features(evsel)) + if (err == -EINVAL && evsel__detect_missing_features(evsel)) goto fallback_missing_features; + + if (evsel__handle_error_quirks(evsel, err)) + goto retry_open; + out_close: if (err) threads->err_thread = thread; @@ -3245,6 +3386,27 @@ bool evsel__fallback(struct evsel *evsel, struct target *target, int err, evsel->core.attr.exclude_hv = 1; return true; + } else if (err == EOPNOTSUPP && !evsel->core.attr.exclude_guest && + !evsel->exclude_GH) { + const char *name = evsel__name(evsel); + char *new_name; + const char *sep = ":"; + + /* Is there already the separator in the name. */ + if (strchr(name, '/') || + (strchr(name, ':') && !evsel->is_libpfm_event)) + sep = ""; + + if (asprintf(&new_name, "%s%sH", name, sep) < 0) + return false; + + free(evsel->name); + evsel->name = new_name; + /* Apple M1 requires exclude_guest */ + scnprintf(msg, msgsize, "trying to fall back to excluding guest samples"); + evsel->core.attr.exclude_guest = 1; + + return true; } return false; @@ -3415,7 +3577,7 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target, return scnprintf(msg, size, "The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n" - "/bin/dmesg | grep -i perf may provide additional information.\n", + "\"dmesg | grep -i perf\" may provide additional information.\n", err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel)); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 15e745a9a798..04934a7af174 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -11,6 +11,7 @@ #include <perf/evsel.h> #include "symbol_conf.h" #include "pmus.h" +#include "pmu.h" struct bpf_object; struct cgroup; @@ -22,25 +23,9 @@ struct target; struct hashmap; struct bperf_leader_bpf; struct bperf_follower_bpf; -struct perf_pmu; typedef int (evsel__sb_cb_t)(union perf_event *event, void *data); -enum perf_tool_event { - PERF_TOOL_NONE = 0, - PERF_TOOL_DURATION_TIME = 1, - PERF_TOOL_USER_TIME = 2, - PERF_TOOL_SYSTEM_TIME = 3, - - PERF_TOOL_MAX, -}; - -const char *perf_tool_event__to_str(enum perf_tool_event ev); -enum perf_tool_event perf_tool_event__from_str(const char *str); - -#define perf_tool_event__for_each_event(ev) \ - for ((ev) = PERF_TOOL_DURATION_TIME; (ev) < PERF_TOOL_MAX; ev++) - /** struct evsel - event selector * * @evlist - evlist this evsel is in, if it is in one. @@ -72,7 +57,6 @@ struct evsel { struct { char *name; char *group_name; - const char *pmu_name; const char *group_pmu_name; #ifdef HAVE_LIBTRACEEVENT struct tep_event *tp_format; @@ -83,7 +67,6 @@ struct evsel { const char *unit; struct cgroup *cgrp; const char *metric_id; - enum perf_tool_event tool_event; /* parse modifier helper */ int exclude_GH; int sample_read; @@ -102,6 +85,7 @@ struct evsel { int bpf_fd; struct bpf_object *bpf_obj; struct list_head config_terms; + u64 alternate_hw_config; }; /* @@ -183,7 +167,7 @@ struct evsel { unsigned long open_flags; int precise_ip_original; - /* for missing_features */ + /* The PMU the event is from. Used for missing_features, PMU name, etc. */ struct perf_pmu *pmu; /* For tool events */ @@ -221,6 +205,7 @@ struct perf_missing_features { bool weight_struct; bool read_lost; bool branch_counters; + bool inherit_sample_read; }; extern struct perf_missing_features perf_missing_features; @@ -320,21 +305,11 @@ const char *evsel__name(struct evsel *evsel); bool evsel__name_is(struct evsel *evsel, const char *name); const char *evsel__metric_id(const struct evsel *evsel); -static inline bool evsel__is_tool(const struct evsel *evsel) -{ - return evsel->tool_event != PERF_TOOL_NONE; -} - static inline bool evsel__is_retire_lat(const struct evsel *evsel) { return evsel->retire_lat; } -static inline enum perf_tool_event evsel__tool_event(const struct evsel *evsel) -{ - return evsel->tool_event; -} - const char *evsel__group_name(struct evsel *evsel); int evsel__group_desc(struct evsel *evsel, char *buf, size_t size); @@ -368,7 +343,6 @@ int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus, void evsel__close(struct evsel *evsel); int evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads); -bool evsel__detect_missing_features(struct evsel *evsel); bool evsel__precise_ip_fallback(struct evsel *evsel); @@ -393,26 +367,10 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam struct tep_format_field *evsel__field(struct evsel *evsel, const char *name); struct tep_format_field *evsel__common_field(struct evsel *evsel, const char *name); -static inline bool __evsel__match(const struct evsel *evsel, u32 type, u64 config) -{ - if (evsel->core.attr.type != type) - return false; - - if ((type == PERF_TYPE_HARDWARE || type == PERF_TYPE_HW_CACHE) && - perf_pmus__supports_extended_type()) - return (evsel->core.attr.config & PERF_HW_EVENT_MASK) == config; - - return evsel->core.attr.config == config; -} +bool __evsel__match(const struct evsel *evsel, u32 type, u64 config); #define evsel__match(evsel, t, c) __evsel__match(evsel, PERF_TYPE_##t, PERF_COUNT_##c) -static inline bool evsel__match2(struct evsel *e1, struct evsel *e2) -{ - return (e1->core.attr.type == e2->core.attr.type) && - (e1->core.attr.config == e2->core.attr.config); -} - int evsel__read_counter(struct evsel *evsel, int cpu_map_idx, int thread); int __evsel__read_on_cpu(struct evsel *evsel, int cpu_map_idx, int thread, bool scale); diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c index c2c0500d5da9..86b7f46f9e2a 100644 --- a/tools/perf/util/evsel_fprintf.c +++ b/tools/perf/util/evsel_fprintf.c @@ -14,7 +14,7 @@ #include "dso.h" #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index b2536a59c44e..f289044a1f7c 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -5,25 +5,22 @@ #include <stdlib.h> #include <string.h> #include "metricgroup.h" -#include "cpumap.h" -#include "cputopo.h" #include "debug.h" #include "evlist.h" #include "expr.h" +#include "smt.h" +#include "tool_pmu.h" #include <util/expr-bison.h> #include <util/expr-flex.h> #include "util/hashmap.h" #include "util/header.h" #include "util/pmu.h" -#include "smt.h" -#include "tsc.h" -#include <api/fs/fs.h> +#include <perf/cpumap.h> #include <linux/err.h> #include <linux/kernel.h> #include <linux/zalloc.h> #include <ctype.h> #include <math.h> -#include "pmu.h" struct expr_id_data { union { @@ -393,90 +390,26 @@ double expr_id_data__source_count(const struct expr_id_data *data) return data->val.source_count; } -#if !defined(__i386__) && !defined(__x86_64__) -double arch_get_tsc_freq(void) -{ - return 0.0; -} -#endif - -static double has_pmem(void) -{ - static bool has_pmem, cached; - const char *sysfs = sysfs__mountpoint(); - char path[PATH_MAX]; - - if (!cached) { - snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); - has_pmem = access(path, F_OK) == 0; - cached = true; - } - return has_pmem ? 1.0 : 0.0; -} - double expr__get_literal(const char *literal, const struct expr_scanner_ctx *ctx) { - const struct cpu_topology *topology; double result = NAN; + enum tool_pmu_event ev = tool_pmu__str_to_event(literal + 1); - if (!strcmp("#num_cpus", literal)) { - result = cpu__max_present_cpu().cpu; - goto out; - } - if (!strcmp("#num_cpus_online", literal)) { - struct perf_cpu_map *online = cpu_map__online(); - - if (online) - result = perf_cpu_map__nr(online); - goto out; - } + if (ev != TOOL_PMU__EVENT_NONE) { + u64 count; - if (!strcasecmp("#system_tsc_freq", literal)) { - result = arch_get_tsc_freq(); - goto out; - } + if (tool_pmu__read_event(ev, &count)) + result = count; + else + pr_err("Failure to read '%s'", literal); - /* - * Assume that topology strings are consistent, such as CPUs "0-1" - * wouldn't be listed as "0,1", and so after deduplication the number of - * these strings gives an indication of the number of packages, dies, - * etc. - */ - if (!strcasecmp("#smt_on", literal)) { - result = smt_on() ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#core_wide", literal)) { + } else if (!strcmp("#core_wide", literal)) { result = core_wide(ctx->system_wide, ctx->user_requested_cpu_list) ? 1.0 : 0.0; - goto out; - } - if (!strcmp("#num_packages", literal)) { - topology = online_topology(); - result = topology->package_cpus_lists; - goto out; - } - if (!strcmp("#num_dies", literal)) { - topology = online_topology(); - result = topology->die_cpus_lists; - goto out; - } - if (!strcmp("#num_cores", literal)) { - topology = online_topology(); - result = topology->core_cpus_lists; - goto out; - } - if (!strcmp("#slots", literal)) { - result = perf_pmu__cpu_slots_per_cycle(); - goto out; - } - if (!strcmp("#has_pmem", literal)) { - result = has_pmem(); - goto out; + } else { + pr_err("Unrecognized literal '%s'", literal); } - pr_err("Unrecognized literal '%s'", literal); -out: pr_debug2("literal: %s = %f\n", literal, result); return result; } @@ -523,8 +456,8 @@ double expr__strcmp_cpuid_str(const struct expr_parse_ctx *ctx __maybe_unused, bool compute_ids __maybe_unused, const char *test_id) { double ret; - struct perf_pmu *pmu = perf_pmus__find_core_pmu(); - char *cpuid = perf_pmu__getcpuid(pmu); + struct perf_cpu cpu = {-1}; + char *cpuid = get_cpuid_allow_env_override(cpu); if (!cpuid) return NAN; diff --git a/tools/perf/util/genelf.c b/tools/perf/util/genelf.c index c8f6bee1fa61..cdce7f173d00 100644 --- a/tools/perf/util/genelf.c +++ b/tools/perf/util/genelf.c @@ -16,7 +16,7 @@ #include <inttypes.h> #include <fcntl.h> #include <err.h> -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include <dwarf.h> #endif @@ -499,7 +499,7 @@ jit_write_elf(int fd, uint64_t load_addr, const char *sym, shdr->sh_size = sizeof(bnote); shdr->sh_entsize = 0; -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT if (debug && nr_debug_entries) { retval = jit_add_debug_info(e, load_addr, debug, nr_debug_entries); if (retval) diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h index 4e2e4f40e134..9f0b875d6548 100644 --- a/tools/perf/util/genelf.h +++ b/tools/perf/util/genelf.h @@ -8,7 +8,7 @@ int jit_write_elf(int fd, uint64_t code_addr, const char *sym, const void *code, int csize, void *debug, int nr_debug_entries, void *unwinding, uint64_t unwinding_header_size, uint64_t unwinding_size); -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT /* genelf_debug.c */ int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries); #endif diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h index c12f8320e668..0c4f155e8eb7 100644 --- a/tools/perf/util/hashmap.h +++ b/tools/perf/util/hashmap.h @@ -166,8 +166,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry(map, cur, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; cur; cur = cur->next) + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; cur; cur = cur->next) /* * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe @@ -178,8 +178,8 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @bkt: integer used as a bucket loop cursor */ #define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \ - for (bkt = 0; bkt < map->cap; bkt++) \ - for (cur = map->buckets[bkt]; \ + for (bkt = 0; bkt < (map)->cap; bkt++) \ + for (cur = (map)->buckets[bkt]; \ cur && ({tmp = cur->next; true; }); \ cur = tmp) @@ -190,19 +190,19 @@ bool hashmap_find(const struct hashmap *map, long key, long *value); * @key: key to iterate entries for */ #define hashmap__for_each_key_entry(map, cur, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur; \ cur = cur->next) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \ - for (cur = map->buckets \ - ? map->buckets[hash_bits(map->hash_fn((_key), map->ctx), map->cap_bits)] \ + for (cur = (map)->buckets \ + ? (map)->buckets[hash_bits((map)->hash_fn((_key), (map)->ctx), (map)->cap_bits)] \ : NULL; \ cur && ({ tmp = cur->next; true; }); \ cur = tmp) \ - if (map->equal_fn(cur->key, (_key), map->ctx)) + if ((map)->equal_fn(cur->key, (_key), (map)->ctx)) #endif /* __LIBBPF_HASHMAP_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index a6386d12afd7..3451e542b69a 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -58,7 +58,7 @@ #include <internal/lib.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif /* @@ -819,11 +819,31 @@ static int write_group_desc(struct feat_fd *ff, * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +char * __weak get_cpuid_str(struct perf_cpu cpu __maybe_unused) { return NULL; } +char *get_cpuid_allow_env_override(struct perf_cpu cpu) +{ + char *cpuid; + static bool printed; + + cpuid = getenv("PERF_CPUID"); + if (cpuid) + cpuid = strdup(cpuid); + if (!cpuid) + cpuid = get_cpuid_str(cpu); + if (!cpuid) + return NULL; + + if (!printed) { + pr_debug("Using CPUID %s\n", cpuid); + printed = true; + } + return cpuid; +} + /* Return zero when the cpuid from the mapfile.csv matches the * cpuid string generated on this platform. * Otherwise return non-zero. @@ -856,18 +876,19 @@ int __weak strcmp_cpuid_str(const char *mapcpuid, const char *cpuid) * default get_cpuid(): nothing gets recorded * actual implementation must be in arch/$(SRCARCH)/util/header.c */ -int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused) +int __weak get_cpuid(char *buffer __maybe_unused, size_t sz __maybe_unused, + struct perf_cpu cpu __maybe_unused) { return ENOSYS; /* Not implemented */ } -static int write_cpuid(struct feat_fd *ff, - struct evlist *evlist __maybe_unused) +static int write_cpuid(struct feat_fd *ff, struct evlist *evlist) { + struct perf_cpu cpu = perf_cpu_map__min(evlist->core.all_cpus); char buffer[64]; int ret; - ret = get_cpuid(buffer, sizeof(buffer)); + ret = get_cpuid(buffer, sizeof(buffer), cpu); if (ret) return -1; @@ -987,57 +1008,6 @@ static int write_dir_format(struct feat_fd *ff, return do_write(ff, &data->dir.version, sizeof(data->dir.version)); } -/* - * Check whether a CPU is online - * - * Returns: - * 1 -> if CPU is online - * 0 -> if CPU is offline - * -1 -> error case - */ -int is_cpu_online(unsigned int cpu) -{ - char *str; - size_t strlen; - char buf[256]; - int status = -1; - struct stat statbuf; - - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d", cpu); - if (stat(buf, &statbuf) != 0) - return 0; - - /* - * Check if /sys/devices/system/cpu/cpux/online file - * exists. Some cases cpu0 won't have online file since - * it is not expected to be turned off generally. - * In kernels without CONFIG_HOTPLUG_CPU, this - * file won't exist - */ - snprintf(buf, sizeof(buf), - "/sys/devices/system/cpu/cpu%d/online", cpu); - if (stat(buf, &statbuf) != 0) - return 1; - - /* - * Read online file using sysfs__read_str. - * If read or open fails, return -1. - * If read succeeds, return value from file - * which gets stored in "str" - */ - snprintf(buf, sizeof(buf), - "devices/system/cpu/cpu%d/online", cpu); - - if (sysfs__read_str(buf, &str, &strlen) < 0) - return status; - - status = atoi(str); - - free(str); - return status; -} - #ifdef HAVE_LIBBPF_SUPPORT static int write_bpf_prog_info(struct feat_fd *ff, struct evlist *evlist __maybe_unused) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index a63a361f20f4..5201af6305f4 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -10,7 +10,13 @@ #include <linux/bitmap.h> #include <linux/types.h> #include "env.h" -#include "pmu.h" +#include <perf/cpumap.h> + +struct evlist; +union perf_event; +struct perf_header; +struct perf_session; +struct perf_tool; enum { HEADER_RESERVED = 0, /* always cleared */ @@ -91,8 +97,6 @@ struct perf_pipe_file_header { u64 size; }; -struct perf_header; - int perf_file_header__read(struct perf_file_header *header, struct perf_header *ph, int fd); @@ -124,11 +128,6 @@ struct perf_header_feature_ops { bool synthesize; }; -struct evlist; -struct perf_session; -struct perf_tool; -union perf_event; - extern const char perf_version_string[]; int perf_session__read_header(struct perf_session *session); @@ -196,14 +195,16 @@ int write_padded(struct feat_fd *fd, const void *bf, #define MAX_CACHE_LVL 4 -int is_cpu_online(unsigned int cpu); int build_caches_for_cpu(u32 cpu, struct cpu_cache_level caches[], u32 *cntp); /* * arch specific callback */ -int get_cpuid(char *buffer, size_t sz); +int get_cpuid(char *buffer, size_t sz, struct perf_cpu cpu); + +char *get_cpuid_str(struct perf_cpu cpu); + +char *get_cpuid_allow_env_override(struct perf_cpu cpu); -char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); int strcmp_cpuid_str(const char *s1, const char *s2); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index f387e85a0087..fff134565801 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -218,6 +218,9 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h) hists__new_col_len(hists, HISTC_LOCAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_GLOBAL_P_STAGE_CYC, 13); hists__new_col_len(hists, HISTC_ADDR, BITS_PER_LONG / 4 + 2); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_PREDICTED, 9); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_ABORT, 5); + hists__new_col_len(hists, HISTC_CALLCHAIN_BRANCH_CYCLES, 6); if (symbol_conf.nanosecs) hists__new_col_len(hists, HISTC_TIME, 16); diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 7d7ae94b4b31..1131056924d9 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -87,6 +87,9 @@ enum hist_column { HISTC_TYPE_OFFSET, HISTC_SYMBOL_OFFSET, HISTC_TYPE_CACHELINE, + HISTC_CALLCHAIN_BRANCH_PREDICTED, + HISTC_CALLCHAIN_BRANCH_ABORT, + HISTC_CALLCHAIN_BRANCH_CYCLES, HISTC_NR_COLS, /* Last entry */ }; diff --git a/tools/perf/util/hwmon_pmu.c b/tools/perf/util/hwmon_pmu.c new file mode 100644 index 000000000000..e61429b38ba7 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +#include "counts.h" +#include "debug.h" +#include "evsel.h" +#include "hashmap.h" +#include "hwmon_pmu.h" +#include "pmu.h" +#include <internal/xyarray.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <sys/types.h> +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <api/fs/fs.h> +#include <api/io.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/zalloc.h> + +/** Strings that correspond to enum hwmon_type. */ +static const char * const hwmon_type_strs[HWMON_TYPE_MAX] = { + NULL, + "cpu", + "curr", + "energy", + "fan", + "humidity", + "in", + "intrusion", + "power", + "pwm", + "temp", +}; +#define LONGEST_HWMON_TYPE_STR "intrusion" + +/** Strings that correspond to enum hwmon_item. */ +static const char * const hwmon_item_strs[HWMON_ITEM__MAX] = { + NULL, + "accuracy", + "alarm", + "auto_channels_temp", + "average", + "average_highest", + "average_interval", + "average_interval_max", + "average_interval_min", + "average_lowest", + "average_max", + "average_min", + "beep", + "cap", + "cap_hyst", + "cap_max", + "cap_min", + "crit", + "crit_hyst", + "div", + "emergency", + "emergency_hist", + "enable", + "fault", + "freq", + "highest", + "input", + "label", + "lcrit", + "lcrit_hyst", + "lowest", + "max", + "max_hyst", + "min", + "min_hyst", + "mod", + "offset", + "pulses", + "rated_max", + "rated_min", + "reset_history", + "target", + "type", + "vid", +}; +#define LONGEST_HWMON_ITEM_STR "average_interval_max" + +static const char *const hwmon_units[HWMON_TYPE_MAX] = { + NULL, + "V", /* cpu */ + "A", /* curr */ + "J", /* energy */ + "rpm", /* fan */ + "%", /* humidity */ + "V", /* in */ + "", /* intrusion */ + "W", /* power */ + "Hz", /* pwm */ + "'C", /* temp */ +}; + +struct hwmon_pmu { + struct perf_pmu pmu; + struct hashmap events; + int hwmon_dir_fd; +}; + +/** + * union hwmon_pmu_event_key: Key for hwmon_pmu->events as such each key + * represents an event. + * + * Related hwmon files start <type><number> that this key represents. + */ +union hwmon_pmu_event_key { + long type_and_num; + struct { + int num :16; + enum hwmon_type type :8; + }; +}; + +/** + * struct hwmon_pmu_event_value: Value in hwmon_pmu->events. + * + * Hwmon files are of the form <type><number>_<item> and may have a suffix + * _alarm. + */ +struct hwmon_pmu_event_value { + /** @items: which item files are present. */ + DECLARE_BITMAP(items, HWMON_ITEM__MAX); + /** @alarm_items: which item files are present. */ + DECLARE_BITMAP(alarm_items, HWMON_ITEM__MAX); + /** @label: contents of <type><number>_label if present. */ + char *label; + /** @name: name computed from label of the form <type>_<label>. */ + char *name; +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu) +{ + return pmu && pmu->type >= PERF_PMU_TYPE_HWMON_START && + pmu->type <= PERF_PMU_TYPE_HWMON_END; +} + +bool evsel__is_hwmon(const struct evsel *evsel) +{ + return perf_pmu__is_hwmon(evsel->pmu); +} + +static size_t hwmon_pmu__event_hashmap_hash(long key, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key).type_and_num; +} + +static bool hwmon_pmu__event_hashmap_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return ((union hwmon_pmu_event_key)key1).type_and_num == + ((union hwmon_pmu_event_key)key2).type_and_num; +} + +static int hwmon_strcmp(const void *a, const void *b) +{ + const char *sa = a; + const char * const *sb = b; + + return strcmp(sa, *sb); +} + +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm) +{ + char fn_type[24]; + const char **elem; + const char *fn_item = NULL; + size_t fn_item_len; + + assert(strlen(LONGEST_HWMON_TYPE_STR) < sizeof(fn_type)); + strlcpy(fn_type, filename, sizeof(fn_type)); + for (size_t i = 0; fn_type[i] != '\0'; i++) { + if (fn_type[i] >= '0' && fn_type[i] <= '9') { + fn_type[i] = '\0'; + *number = strtoul(&filename[i], (char **)&fn_item, 10); + if (*fn_item == '_') + fn_item++; + break; + } + if (fn_type[i] == '_') { + fn_type[i] = '\0'; + *number = -1; + fn_item = &filename[i + 1]; + break; + } + } + if (fn_item == NULL || fn_type[0] == '\0' || (item != NULL && fn_item[0] == '\0')) { + pr_debug3("hwmon_pmu: not a hwmon file '%s'\n", filename); + return false; + } + elem = bsearch(&fn_type, hwmon_type_strs + 1, ARRAY_SIZE(hwmon_type_strs) - 1, + sizeof(hwmon_type_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon type '%s' in file name '%s'\n", + fn_type, filename); + return false; + } + + *type = elem - &hwmon_type_strs[0]; + if (!item) + return true; + + *alarm = false; + fn_item_len = strlen(fn_item); + if (fn_item_len > 6 && !strcmp(&fn_item[fn_item_len - 6], "_alarm")) { + assert(strlen(LONGEST_HWMON_ITEM_STR) < sizeof(fn_type)); + strlcpy(fn_type, fn_item, fn_item_len - 5); + fn_item = fn_type; + *alarm = true; + } + elem = bsearch(fn_item, hwmon_item_strs + 1, ARRAY_SIZE(hwmon_item_strs) - 1, + sizeof(hwmon_item_strs[0]), hwmon_strcmp); + if (!elem) { + pr_debug3("hwmon_pmu: not a hwmon item '%s' in file name '%s'\n", + fn_item, filename); + return false; + } + *item = elem - &hwmon_item_strs[0]; + return true; +} + +static void fix_name(char *p) +{ + char *s = strchr(p, '\n'); + + if (s) + *s = '\0'; + + while (*p != '\0') { + if (strchr(" :,/\n\t", *p)) + *p = '_'; + else + *p = tolower(*p); + p++; + } +} + +static int hwmon_pmu__read_events(struct hwmon_pmu *pmu) +{ + DIR *dir; + struct dirent *ent; + int dup_fd, err = 0; + struct hashmap_entry *cur, *tmp; + size_t bkt; + + if (pmu->pmu.sysfs_aliases_loaded) + return 0; + + /* Use a dup-ed fd as closedir will close it. */ + dup_fd = dup(pmu->hwmon_dir_fd); + if (dup_fd == -1) + return -ENOMEM; + + dir = fdopendir(dup_fd); + if (!dir) { + close(dup_fd); + return -ENOMEM; + } + + while ((ent = readdir(dir)) != NULL) { + enum hwmon_type type; + int number; + enum hwmon_item item; + bool alarm; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hwmon_pmu_event_value *value; + + if (ent->d_type != DT_REG) + continue; + + if (!parse_hwmon_filename(ent->d_name, &type, &number, &item, &alarm)) { + pr_debug3("Not a hwmon file '%s'\n", ent->d_name); + continue; + } + key.num = number; + key.type = type; + if (!hashmap__find(&pmu->events, key.type_and_num, &value)) { + value = zalloc(sizeof(*value)); + if (!value) { + err = -ENOMEM; + goto err_out; + } + err = hashmap__add(&pmu->events, key.type_and_num, value); + if (err) { + free(value); + err = -ENOMEM; + goto err_out; + } + } + __set_bit(item, alarm ? value->alarm_items : value->items); + if (item == HWMON_ITEM_LABEL) { + char buf[128]; + int fd = openat(pmu->hwmon_dir_fd, ent->d_name, O_RDONLY); + ssize_t read_len; + + if (fd < 0) + continue; + + read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) + buf[read_len] = '\0'; + + if (buf[0] == '\0') { + pr_debug("hwmon_pmu: empty label file %s %s\n", + pmu->pmu.name, ent->d_name); + close(fd); + continue; + } + value->label = strdup(buf); + if (!value->label) { + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + continue; + } + snprintf(buf, sizeof(buf), "%s_%s", hwmon_type_strs[type], value->label); + fix_name(buf); + value->name = strdup(buf); + if (!value->name) + pr_debug("hwmon_pmu: memory allocation failure\n"); + close(fd); + } + } + hashmap__for_each_entry_safe((&pmu->events), cur, tmp, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (!test_bit(HWMON_ITEM_INPUT, value->items)) { + pr_debug("hwmon_pmu: removing event '%s%d' that has no input file\n", + hwmon_type_strs[key.type], key.num); + hashmap__delete(&pmu->events, key.type_and_num, &key, &value); + zfree(&value->label); + zfree(&value->name); + free(value); + } + } + pmu->pmu.sysfs_aliases_loaded = true; + +err_out: + closedir(dir); + return err; +} + +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, const char *sysfs_name, const char *name) +{ + char buf[32]; + struct hwmon_pmu *hwm; + + hwm = zalloc(sizeof(*hwm)); + if (!hwm) + return NULL; + + hwm->hwmon_dir_fd = hwmon_dir; + hwm->pmu.type = PERF_PMU_TYPE_HWMON_START + strtoul(sysfs_name + 5, NULL, 10); + if (hwm->pmu.type > PERF_PMU_TYPE_HWMON_END) { + pr_err("Unable to encode hwmon type from %s in valid PMU type\n", sysfs_name); + goto err_out; + } + snprintf(buf, sizeof(buf), "hwmon_%s", name); + fix_name(buf + 6); + hwm->pmu.name = strdup(buf); + if (!hwm->pmu.name) + goto err_out; + hwm->pmu.alias_name = strdup(sysfs_name); + if (!hwm->pmu.alias_name) + goto err_out; + hwm->pmu.cpus = perf_cpu_map__new("0"); + if (!hwm->pmu.cpus) + goto err_out; + INIT_LIST_HEAD(&hwm->pmu.format); + INIT_LIST_HEAD(&hwm->pmu.aliases); + INIT_LIST_HEAD(&hwm->pmu.caps); + hashmap__init(&hwm->events, hwmon_pmu__event_hashmap_hash, + hwmon_pmu__event_hashmap_equal, /*ctx=*/NULL); + + list_add_tail(&hwm->pmu.list, pmus); + return &hwm->pmu; +err_out: + free((char *)hwm->pmu.name); + free(hwm->pmu.alias_name); + free(hwm); + close(hwmon_dir); + return NULL; +} + +void hwmon_pmu__exit(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur, *tmp; + size_t bkt; + + hashmap__for_each_entry_safe((&hwm->events), cur, tmp, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + zfree(&value->label); + zfree(&value->name); + free(value); + } + hashmap__clear(&hwm->events); + close(hwm->hwmon_dir_fd); +} + +static size_t hwmon_pmu__describe_items(struct hwmon_pmu *hwm, char *out_buf, size_t out_buf_len, + union hwmon_pmu_event_key key, + const unsigned long *items, bool is_alarm) +{ + size_t bit; + char buf[64]; + size_t len = 0; + + for_each_set_bit(bit, items, HWMON_ITEM__MAX) { + int fd; + + if (bit == HWMON_ITEM_LABEL || bit == HWMON_ITEM_INPUT) + continue; + + snprintf(buf, sizeof(buf), "%s%d_%s%s", + hwmon_type_strs[key.type], + key.num, + hwmon_item_strs[bit], + is_alarm ? "_alarm" : ""); + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + if (fd > 0) { + ssize_t read_len = read(fd, buf, sizeof(buf)); + + while (read_len > 0 && buf[read_len - 1] == '\n') + read_len--; + + if (read_len > 0) { + long long val; + + buf[read_len] = '\0'; + val = strtoll(buf, /*endptr=*/NULL, 10); + len += snprintf(out_buf + len, out_buf_len - len, "%s%s%s=%g%s", + len == 0 ? " " : ", ", + hwmon_item_strs[bit], + is_alarm ? "_alarm" : "", + (double)val / 1000.0, + hwmon_units[key.type]); + } + close(fd); + } + } + return len; +} + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct hashmap_entry *cur; + size_t bkt; + + if (hwmon_pmu__read_events(hwm)) + return false; + + hashmap__for_each_entry((&hwm->events), cur, bkt) { + static const char *const hwmon_scale_units[HWMON_TYPE_MAX] = { + NULL, + "0.001V", /* cpu */ + "0.001A", /* curr */ + "0.001J", /* energy */ + "1rpm", /* fan */ + "0.001%", /* humidity */ + "0.001V", /* in */ + NULL, /* intrusion */ + "0.001W", /* power */ + "1Hz", /* pwm */ + "0.001'C", /* temp */ + }; + static const char *const hwmon_desc[HWMON_TYPE_MAX] = { + NULL, + "CPU core reference voltage", /* cpu */ + "Current", /* curr */ + "Cumulative energy use", /* energy */ + "Fan", /* fan */ + "Humidity", /* humidity */ + "Voltage", /* in */ + "Chassis intrusion detection", /* intrusion */ + "Power use", /* power */ + "Pulse width modulation fan control", /* pwm */ + "Temperature", /* temp */ + }; + char alias_buf[64]; + char desc_buf[256]; + char encoding_buf[128]; + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + struct pmu_event_info info = { + .pmu = pmu, + .name = value->name, + .alias = alias_buf, + .scale_unit = hwmon_scale_units[key.type], + .desc = desc_buf, + .long_desc = NULL, + .encoding_desc = encoding_buf, + .topic = "hwmon", + .pmu_name = pmu->name, + .event_type_desc = "Hwmon event", + }; + int ret; + size_t len; + + len = snprintf(alias_buf, sizeof(alias_buf), "%s%d", + hwmon_type_strs[key.type], key.num); + if (!info.name) { + info.name = info.alias; + info.alias = NULL; + } + + len = snprintf(desc_buf, sizeof(desc_buf), "%s in unit %s named %s.", + hwmon_desc[key.type], + pmu->name + 6, + value->label ?: info.name); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->items, /*is_alarm=*/false); + + len += hwmon_pmu__describe_items(hwm, desc_buf + len, sizeof(desc_buf) - len, + key, value->alarm_items, /*is_alarm=*/true); + + snprintf(encoding_buf, sizeof(encoding_buf), "%s/config=0x%lx/", + pmu->name, cur->key); + + ret = cb(state, &info); + if (ret) + return ret; + } + return 0; +} + +size_t hwmon_pmu__num_events(struct perf_pmu *pmu) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + + hwmon_pmu__read_events(hwm); + return hashmap__size(&hwm->events); +} + +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + enum hwmon_type type; + int number; + union hwmon_pmu_event_key key = { .type_and_num = 0 }; + struct hashmap_entry *cur; + size_t bkt; + + if (!parse_hwmon_filename(name, &type, &number, /*item=*/NULL, /*is_alarm=*/NULL)) + return false; + + if (hwmon_pmu__read_events(hwm)) + return false; + + key.type = type; + key.num = number; + if (hashmap_find(&hwm->events, key.type_and_num, /*value=*/NULL)) + return true; + if (key.num != -1) + return false; + /* Item is of form <type>_ which means we should match <type>_<label>. */ + hashmap__for_each_entry((&hwm->events), cur, bkt) { + struct hwmon_pmu_event_value *value = cur->pvalue; + + key.type_and_num = cur->key; + if (key.type == type && value->name && !strcasecmp(name, value->name)) + return true; + } + return false; +} + +static int hwmon_pmu__config_term(const struct hwmon_pmu *hwm, + struct perf_event_attr *attr, + struct parse_events_term *term, + struct parse_events_error *err) +{ + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + if (number == -1) { + /* + * Item is of form <type>_ which means we should + * match <type>_<label>. + */ + struct hashmap_entry *cur; + size_t bkt; + + attr->config = 0; + hashmap__for_each_entry((&hwm->events), cur, bkt) { + union hwmon_pmu_event_key key = { + .type_and_num = cur->key, + }; + struct hwmon_pmu_event_value *value = cur->pvalue; + + if (key.type == type && value->name && + !strcasecmp(term->config, value->name)) { + attr->config = key.type_and_num; + break; + } + } + if (attr->config == 0) + return -EINVAL; + } else { + union hwmon_pmu_event_key key = { + .type_and_num = 0, + }; + + key.type = type; + key.num = number; + attr->config = key.type_and_num; + } + return 0; + } + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err) +{ + struct hwmon_pmu *hwm = container_of(pmu, struct hwmon_pmu, pmu); + struct parse_events_term *term; + int ret; + + ret = hwmon_pmu__read_events(hwm); + if (ret) + return ret; + + list_for_each_entry(term, &terms->terms, list) { + if (hwmon_pmu__config_term(hwm, attr, term, err)) + return -EINVAL; + } + + return 0; + +} + +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err) +{ + struct parse_events_term *term = + list_first_entry(&terms->terms, struct parse_events_term, list); + + if (term->type_term == PARSE_EVENTS__TERM_TYPE_USER) { + enum hwmon_type type; + int number; + + if (parse_hwmon_filename(term->config, &type, &number, + /*item=*/NULL, /*is_alarm=*/NULL)) { + info->unit = hwmon_units[type]; + if (type == HWMON_TYPE_FAN || type == HWMON_TYPE_PWM || + type == HWMON_TYPE_INTRUSION) + info->scale = 1; + else + info->scale = 0.001; + } + return 0; + } + if (err) { + char *err_str; + + parse_events_error__handle(err, term->err_val, + asprintf(&err_str, + "unexpected hwmon event term (%s) %s", + parse_events__term_type_str(term->type_term), + term->config) < 0 + ? strdup("unexpected hwmon event term") + : err_str, + NULL); + } + return -EINVAL; +} + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus) +{ + char *line = NULL; + DIR *class_hwmon_dir; + struct dirent *class_hwmon_ent; + char buf[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/", sysfs); + class_hwmon_dir = opendir(buf); + if (!class_hwmon_dir) + return 0; + + while ((class_hwmon_ent = readdir(class_hwmon_dir)) != NULL) { + size_t line_len; + int hwmon_dir, name_fd; + struct io io; + + if (class_hwmon_ent->d_type != DT_LNK) + continue; + + scnprintf(buf, sizeof(buf), "%s/class/hwmon/%s", sysfs, class_hwmon_ent->d_name); + hwmon_dir = open(buf, O_DIRECTORY); + if (hwmon_dir == -1) { + pr_debug("hwmon_pmu: not a directory: '%s/class/hwmon/%s'\n", + sysfs, class_hwmon_ent->d_name); + continue; + } + name_fd = openat(hwmon_dir, "name", O_RDONLY); + if (name_fd == -1) { + pr_debug("hwmon_pmu: failure to open '%s/class/hwmon/%s/name'\n", + sysfs, class_hwmon_ent->d_name); + close(hwmon_dir); + continue; + } + io__init(&io, name_fd, buf, sizeof(buf)); + io__getline(&io, &line, &line_len); + if (line_len > 0 && line[line_len - 1] == '\n') + line[line_len - 1] = '\0'; + hwmon_pmu__new(pmus, hwmon_dir, class_hwmon_ent->d_name, line); + close(name_fd); + } + free(line); + closedir(class_hwmon_dir); + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + struct hwmon_pmu *hwm = container_of(evsel->pmu, struct hwmon_pmu, pmu); + union hwmon_pmu_event_key key = { + .type_and_num = evsel->core.attr.config, + }; + int idx = 0, thread = 0, nthreads, err = 0; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + char buf[64]; + int fd; + + snprintf(buf, sizeof(buf), "%s%d_input", + hwmon_type_strs[key.type], key.num); + + fd = openat(hwm->hwmon_dir_fd, buf, O_RDONLY); + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + return err; +} + +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + char buf[32]; + int fd; + ssize_t len; + struct perf_counts_values *count, *old_count = NULL; + + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + len = pread(fd, buf, sizeof(buf), 0); + if (len <= 0) { + count->lost++; + return -EINVAL; + } + buf[len] = '\0'; + if (old_count) { + count->val = old_count->val + strtoll(buf, NULL, 10); + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = strtoll(buf, NULL, 10); + count->run++; + count->ena++; + } + return 0; +} diff --git a/tools/perf/util/hwmon_pmu.h b/tools/perf/util/hwmon_pmu.h new file mode 100644 index 000000000000..882566846df4 --- /dev/null +++ b/tools/perf/util/hwmon_pmu.h @@ -0,0 +1,151 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __HWMON_PMU_H +#define __HWMON_PMU_H + +#include "pmu.h" +#include <stdbool.h> + +struct list_head; +struct perf_thread_map; + +/** + * enum hwmon_type: + * + * As described in Documentation/hwmon/sysfs-interface.rst hwmon events are + * defined over multiple files of the form <type><num>_<item>. This enum + * captures potential <type> values. + * + * This enum is exposed for testing. + */ +enum hwmon_type { + HWMON_TYPE_NONE, + + HWMON_TYPE_CPU, + HWMON_TYPE_CURR, + HWMON_TYPE_ENERGY, + HWMON_TYPE_FAN, + HWMON_TYPE_HUMIDITY, + HWMON_TYPE_IN, + HWMON_TYPE_INTRUSION, + HWMON_TYPE_POWER, + HWMON_TYPE_PWM, + HWMON_TYPE_TEMP, + + HWMON_TYPE_MAX +}; + +/** + * enum hwmon_item: + * + * Similar to enum hwmon_type but describes the item part of a a sysfs filename. + * + * This enum is exposed for testing. + */ +enum hwmon_item { + HWMON_ITEM_NONE, + + HWMON_ITEM_ACCURACY, + HWMON_ITEM_ALARM, + HWMON_ITEM_AUTO_CHANNELS_TEMP, + HWMON_ITEM_AVERAGE, + HWMON_ITEM_AVERAGE_HIGHEST, + HWMON_ITEM_AVERAGE_INTERVAL, + HWMON_ITEM_AVERAGE_INTERVAL_MAX, + HWMON_ITEM_AVERAGE_INTERVAL_MIN, + HWMON_ITEM_AVERAGE_LOWEST, + HWMON_ITEM_AVERAGE_MAX, + HWMON_ITEM_AVERAGE_MIN, + HWMON_ITEM_BEEP, + HWMON_ITEM_CAP, + HWMON_ITEM_CAP_HYST, + HWMON_ITEM_CAP_MAX, + HWMON_ITEM_CAP_MIN, + HWMON_ITEM_CRIT, + HWMON_ITEM_CRIT_HYST, + HWMON_ITEM_DIV, + HWMON_ITEM_EMERGENCY, + HWMON_ITEM_EMERGENCY_HIST, + HWMON_ITEM_ENABLE, + HWMON_ITEM_FAULT, + HWMON_ITEM_FREQ, + HWMON_ITEM_HIGHEST, + HWMON_ITEM_INPUT, + HWMON_ITEM_LABEL, + HWMON_ITEM_LCRIT, + HWMON_ITEM_LCRIT_HYST, + HWMON_ITEM_LOWEST, + HWMON_ITEM_MAX, + HWMON_ITEM_MAX_HYST, + HWMON_ITEM_MIN, + HWMON_ITEM_MIN_HYST, + HWMON_ITEM_MOD, + HWMON_ITEM_OFFSET, + HWMON_ITEM_PULSES, + HWMON_ITEM_RATED_MAX, + HWMON_ITEM_RATED_MIN, + HWMON_ITEM_RESET_HISTORY, + HWMON_ITEM_TARGET, + HWMON_ITEM_TYPE, + HWMON_ITEM_VID, + + HWMON_ITEM__MAX, +}; + +bool perf_pmu__is_hwmon(const struct perf_pmu *pmu); +bool evsel__is_hwmon(const struct evsel *evsel); + +/** + * parse_hwmon_filename() - Parse filename into constituent parts. + * + * @filename: To be parsed, of the form <type><number>_<item>. + * @type: The type defined from the parsed file name. + * @number: The number of the type, for example there may be more than 1 fan. + * @item: A hwmon <type><number> may have multiple associated items. + * @alarm: Is the filename for an alarm value? + * + * An example of a hwmon filename is "temp1_input". The type is temp for a + * temperature value. The number is 1. The item within the file is an input + * value - the temperature itself. This file doesn't contain an alarm value. + * + * Exposed for testing. + */ +bool parse_hwmon_filename(const char *filename, + enum hwmon_type *type, + int *number, + enum hwmon_item *item, + bool *alarm); + +/** + * hwmon_pmu__new() - Allocate and construct a hwmon PMU. + * + * @pmus: The list of PMUs to be added to. + * @hwmon_dir: An O_DIRECTORY file descriptor for a hwmon directory. + * @sysfs_name: Name of the hwmon sysfs directory like hwmon0. + * @name: The contents of the "name" file in the hwmon directory. + * + * Exposed for testing. Regular construction should happen via + * perf_pmus__read_hwmon_pmus. + */ +struct perf_pmu *hwmon_pmu__new(struct list_head *pmus, int hwmon_dir, + const char *sysfs_name, const char *name); +void hwmon_pmu__exit(struct perf_pmu *pmu); + +int hwmon_pmu__for_each_event(struct perf_pmu *pmu, void *state, pmu_event_callback cb); +size_t hwmon_pmu__num_events(struct perf_pmu *pmu); +bool hwmon_pmu__have_event(struct perf_pmu *pmu, const char *name); +int hwmon_pmu__config_terms(const struct perf_pmu *pmu, + struct perf_event_attr *attr, + struct parse_events_terms *terms, + struct parse_events_error *err); +int hwmon_pmu__check_alias(struct parse_events_terms *terms, struct perf_pmu_info *info, + struct parse_events_error *err); + +int perf_pmus__read_hwmon_pmus(struct list_head *pmus); + + +int evsel__hwmon_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__hwmon_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +#endif /* __HWMON_PMU_H */ diff --git a/tools/perf/util/include/dwarf-regs.h b/tools/perf/util/include/dwarf-regs.h index 75b28dcc8317..6f1b9f6b2466 100644 --- a/tools/perf/util/include/dwarf-regs.h +++ b/tools/perf/util/include/dwarf-regs.h @@ -2,52 +2,132 @@ #ifndef _PERF_DWARF_REGS_H_ #define _PERF_DWARF_REGS_H_ #include "annotate.h" +#include <elf.h> + +#ifndef EM_AARCH64 +#define EM_AARCH64 183 /* ARM 64 bit */ +#endif + +#ifndef EM_CSKY +#define EM_CSKY 252 /* C-SKY */ +#endif +#ifndef EF_CSKY_ABIV1 +#define EF_CSKY_ABIV1 0X10000000 +#endif +#ifndef EF_CSKY_ABIV2 +#define EF_CSKY_ABIV2 0X20000000 +#endif + +#ifndef EM_LOONGARCH +#define EM_LOONGARCH 258 /* LoongArch */ +#endif + +/* EM_HOST gives the ELF machine for host, EF_HOST gives additional flags. */ +#if defined(__x86_64__) + #define EM_HOST EM_X86_64 +#elif defined(__i386__) + #define EM_HOST EM_386 +#elif defined(__aarch64__) + #define EM_HOST EM_AARCH64 +#elif defined(__arm__) + #define EM_HOST EM_ARM +#elif defined(__alpha__) + #define EM_HOST EM_ALPHA +#elif defined(__arc__) + #define EM_HOST EM_ARC +#elif defined(__AVR__) + #define EM_HOST EM_AVR +#elif defined(__AVR32__) + #define EM_HOST EM_AVR32 +#elif defined(__bfin__) + #define EM_HOST EM_BLACKFIN +#elif defined(__csky__) + #define EM_HOST EM_CSKY + #if defined(__CSKYABIV2__) + #define EF_HOST EF_CSKY_ABIV2 + #else + #define EF_HOST EF_CSKY_ABIV1 + #endif +#elif defined(__cris__) + #define EM_HOST EM_CRIS +#elif defined(__hppa__) // HP PA-RISC + #define EM_HOST EM_PARISC +#elif defined(__loongarch__) + #define EM_HOST EM_LOONGARCH +#elif defined(__mips__) + #define EM_HOST EM_MIPS +#elif defined(__m32r__) + #define EM_HOST EM_M32R +#elif defined(__microblaze__) + #define EM_HOST EM_MICROBLAZE +#elif defined(__MSP430__) + #define EM_HOST EM_MSP430 +#elif defined(__powerpc64__) + #define EM_HOST EM_PPC64 +#elif defined(__powerpc__) + #define EM_HOST EM_PPC +#elif defined(__riscv) + #define EM_HOST EM_RISCV +#elif defined(__s390x__) + #define EM_HOST EM_S390 +#elif defined(__sh__) + #define EM_HOST EM_SH +#elif defined(__sparc64__) || defined(__sparc__) + #define EM_HOST EM_SPARC +#elif defined(__xtensa__) + #define EM_HOST EM_XTENSA +#else + /* Unknown host ELF machine type. */ + #define EM_HOST EM_NONE +#endif + +#if !defined(EF_HOST) + #define EF_HOST 0 +#endif #define DWARF_REG_PC 0xd3af9c /* random number */ #define DWARF_REG_FB 0xd3affb /* random number */ -#ifdef HAVE_DWARF_SUPPORT -const char *get_arch_regstr(unsigned int n); -/* - * get_dwarf_regstr - Returns ftrace register string from DWARF regnum - * n: DWARF register number - * machine: ELF machine signature (EM_*) +#ifdef HAVE_LIBDW_SUPPORT +const char *get_csky_regstr(unsigned int n, unsigned int flags); + +/** + * get_dwarf_regstr() - Returns ftrace register string from DWARF regnum. + * @n: DWARF register number. + * @machine: ELF machine signature (EM_*). + * @flags: ELF flags for things like ABI differences. */ -const char *get_dwarf_regstr(unsigned int n, unsigned int machine); +const char *get_dwarf_regstr(unsigned int n, unsigned int machine, unsigned int flags); +int get_x86_regnum(const char *name); + +#if !defined(__x86_64__) && !defined(__i386__) int get_arch_regnum(const char *name); +#endif + /* * get_dwarf_regnum - Returns DWARF regnum from register name * name: architecture register name * machine: ELF machine signature (EM_*) */ -int get_dwarf_regnum(const char *name, unsigned int machine); +int get_dwarf_regnum(const char *name, unsigned int machine, unsigned int flags); + +void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); -#else /* HAVE_DWARF_SUPPORT */ +#else /* HAVE_LIBDW_SUPPORT */ static inline int get_dwarf_regnum(const char *name __maybe_unused, - unsigned int machine __maybe_unused) + unsigned int machine __maybe_unused, + unsigned int flags __maybe_unused) { return -1; } -#endif -#if !defined(__powerpc__) || !defined(HAVE_DWARF_SUPPORT) static inline void get_powerpc_regs(u32 raw_insn __maybe_unused, int is_source __maybe_unused, struct annotated_op_loc *op_loc __maybe_unused) { return; } -#else -void get_powerpc_regs(u32 raw_insn, int is_source, struct annotated_op_loc *op_loc); #endif -#ifdef HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET -/* - * Arch should support fetching the offset of a register in pt_regs - * by its name. See kernel's regs_query_register_offset in - * arch/xxx/kernel/ptrace.c. - */ -int regs_query_register_offset(const char *name); -#endif #endif diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 27d9b5c9fec8..a7c589fecb98 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -100,7 +100,7 @@ static void intel_bts_dump(struct intel_bts *bts __maybe_unused, else sz = len; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < sz; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < br_sz; i++) @@ -808,7 +808,7 @@ static int intel_bts_synth_events(struct intel_bts *bts, static const char * const intel_bts_info_fmts[] = { [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_BTS_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n", diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index bccb988a7a44..94fb16cf9e0c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -10,7 +10,7 @@ #include <byteswap.h> #include <linux/kernel.h> #include <linux/compiler.h> -#include <asm-generic/unaligned.h> +#include <linux/unaligned.h> #include "intel-pt-pkt-decoder.h" diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index fd2597613f3d..30be6dfe09eb 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -249,7 +249,7 @@ static void intel_pt_dump(struct intel_pt *pt __maybe_unused, else pkt_len = 1; printf("."); - color_fprintf(stdout, color, " %08x: ", pos); + color_fprintf(stdout, color, " %08zx: ", pos); for (i = 0; i < pkt_len; i++) color_fprintf(stdout, color, " %02x", buf[i]); for (; i < 16; i++) @@ -4110,7 +4110,7 @@ static int intel_pt_parse_vm_tm_corr_args(struct intel_pt *pt) static const char * const intel_pt_info_fmts[] = { [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n", [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n", - [INTEL_PT_TIME_MULT] = " Time Muliplier %"PRIu64"\n", + [INTEL_PT_TIME_MULT] = " Time Multiplier %"PRIu64"\n", [INTEL_PT_TIME_ZERO] = " Time Zero %"PRIu64"\n", [INTEL_PT_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n", [INTEL_PT_TSC_BIT] = " TSC bit %#"PRIx64"\n", diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index fad227b625d1..4f0ac998b0cc 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1343,7 +1343,7 @@ static int maps__set_module_path(struct maps *maps, const char *path, struct kmo * we need to update the symtab_type if needed. */ if (m->comp && is_kmod_dso(dso)) { - dso__set_symtab_type(dso, dso__symtab_type(dso)); + dso__set_symtab_type(dso, dso__symtab_type(dso)+1); dso__set_comp(dso, m->comp); } map__put(map); diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 051feb93ed8d..bf5090f5220b 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -366,6 +366,12 @@ static const char * const mem_lvl[] = { }; static const char * const mem_lvlnum[] = { + [PERF_MEM_LVLNUM_L1] = "L1", + [PERF_MEM_LVLNUM_L2] = "L2", + [PERF_MEM_LVLNUM_L3] = "L3", + [PERF_MEM_LVLNUM_L4] = "L4", + [PERF_MEM_LVLNUM_L2_MHB] = "L2 MHB", + [PERF_MEM_LVLNUM_MSC] = "Memory-side Cache", [PERF_MEM_LVLNUM_UNC] = "Uncached", [PERF_MEM_LVLNUM_CXL] = "CXL", [PERF_MEM_LVLNUM_IO] = "I/O", @@ -448,7 +454,7 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, const struct mem_info *mem_inf if (mem_lvlnum[lvl]) l += scnprintf(out + l, sz - l, mem_lvlnum[lvl]); else - l += scnprintf(out + l, sz - l, "L%d", lvl); + l += scnprintf(out + l, sz - l, "Unknown level %d", lvl); l += scnprintf(out + l, sz - l, " %s", hit_miss); return l; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4dff3e925a47..46920ebadfd1 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -14,6 +14,7 @@ #include "pmus.h" #include "print-events.h" #include "smt.h" +#include "tool_pmu.h" #include "expr.h" #include "rblist.h" #include <string.h> @@ -297,8 +298,8 @@ static int setup_metric_events(const char *pmu, struct hashmap *ids, struct expr_id_data *val_ptr; /* Don't match events for the wrong hybrid PMU. */ - if (!all_pmus && ev->pmu_name && evsel__is_hybrid(ev) && - strcmp(ev->pmu_name, pmu)) + if (!all_pmus && ev->pmu && evsel__is_hybrid(ev) && + strcmp(ev->pmu->name, pmu)) continue; /* * Check for duplicate events with the same name. For @@ -673,20 +674,20 @@ static int metricgroup__build_event_string(struct strbuf *events, struct hashmap_entry *cur; size_t bkt; bool no_group = true, has_tool_events = false; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; int ret = 0; #define RETURN_IF_NON_ZERO(x) do { if (x) return x; } while (0) hashmap__for_each_entry(ctx->ids, cur, bkt) { const char *sep, *rsep, *id = cur->pkey; - enum perf_tool_event ev; + enum tool_pmu_event ev; pr_debug("found event %s\n", id); /* Always move tool events outside of the group. */ - ev = perf_tool_event__from_str(id); - if (ev != PERF_TOOL_NONE) { + ev = tool_pmu__str_to_event(id); + if (ev != TOOL_PMU__EVENT_NONE) { has_tool_events = true; tool_events[ev] = true; continue; @@ -754,14 +755,14 @@ static int metricgroup__build_event_string(struct strbuf *events, if (has_tool_events) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { if (!no_group) { ret = strbuf_addch(events, ','); RETURN_IF_NON_ZERO(ret); } no_group = false; - ret = strbuf_addstr(events, perf_tool_event__to_str(i)); + ret = strbuf_addstr(events, tool_pmu__event_to_str(i)); RETURN_IF_NON_ZERO(ret); } } @@ -1147,14 +1148,14 @@ static int metric_list_cmp(void *priv __maybe_unused, const struct list_head *l, int i, left_count, right_count; left_count = hashmap__size(left->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(left->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(left->pctx, tool_pmu__event_to_str(i), &data)) left_count--; } right_count = hashmap__size(right->pctx->ids); - perf_tool_event__for_each_event(i) { - if (!expr__get_id(right->pctx, perf_tool_event__to_str(i), &data)) + tool_pmu__for_each_event(i) { + if (!expr__get_id(right->pctx, tool_pmu__event_to_str(i), &data)) right_count--; } @@ -1374,18 +1375,18 @@ static void metricgroup__free_metrics(struct list_head *metric_list) * to true if tool event is found. */ static void find_tool_events(const struct list_head *metric_list, - bool tool_events[PERF_TOOL_MAX]) + bool tool_events[TOOL_PMU__EVENT_MAX]) { struct metric *m; list_for_each_entry(m, metric_list, nd) { int i; - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { struct expr_id_data *data; if (!tool_events[i] && - !expr__get_id(m->pctx, perf_tool_event__to_str(i), &data)) + !expr__get_id(m->pctx, tool_pmu__event_to_str(i), &data)) tool_events[i] = true; } } @@ -1446,7 +1447,7 @@ err_out: */ static int parse_ids(bool metric_no_merge, bool fake_pmu, struct expr_parse_ctx *ids, const char *modifier, - bool group_events, const bool tool_events[PERF_TOOL_MAX], + bool group_events, const bool tool_events[TOOL_PMU__EVENT_MAX], struct evlist **out_evlist) { struct parse_events_error parse_error; @@ -1471,9 +1472,9 @@ static int parse_ids(bool metric_no_merge, bool fake_pmu, * event1 if #smt_on else 0 * Add a tool event to avoid a parse error on an empty string. */ - perf_tool_event__for_each_event(i) { + tool_pmu__for_each_event(i) { if (tool_events[i]) { - char *tmp = strdup(perf_tool_event__to_str(i)); + char *tmp = strdup(tool_pmu__event_to_str(i)); if (!tmp) return -ENOMEM; @@ -1535,7 +1536,7 @@ static int parse_groups(struct evlist *perf_evlist, struct evlist *combined_evlist = NULL; LIST_HEAD(metric_list); struct metric *m; - bool tool_events[PERF_TOOL_MAX] = {false}; + bool tool_events[TOOL_PMU__EVENT_MAX] = {false}; bool is_default = !strcmp(str, "Default"); int ret; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 9a8be1e46d67..afeb8d815bbf 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -228,7 +228,7 @@ __add_event(struct list_head *list, int *idx, bool init_attr, const char *name, const char *metric_id, struct perf_pmu *pmu, struct list_head *config_terms, bool auto_merge_stats, - struct perf_cpu_map *cpu_list) + struct perf_cpu_map *cpu_list, u64 alternate_hw_config) { struct evsel *evsel; struct perf_cpu_map *cpus = perf_cpu_map__is_empty(cpu_list) && pmu ? pmu->cpus : cpu_list; @@ -263,7 +263,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.is_pmu_core = pmu ? pmu->is_core : false; evsel->auto_merge_stats = auto_merge_stats; evsel->pmu = pmu; - evsel->pmu_name = pmu ? strdup(pmu->name) : NULL; + evsel->alternate_hw_config = alternate_hw_config; if (name) evsel->name = strdup(name); @@ -286,47 +286,19 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, { return __add_event(/*list=*/NULL, &idx, attr, /*init_attr=*/false, name, metric_id, pmu, /*config_terms=*/NULL, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL); + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int add_event(struct list_head *list, int *idx, struct perf_event_attr *attr, const char *name, - const char *metric_id, struct list_head *config_terms) + const char *metric_id, struct list_head *config_terms, + u64 alternate_hw_config) { return __add_event(list, idx, attr, /*init_attr*/true, name, metric_id, /*pmu=*/NULL, config_terms, - /*auto_merge_stats=*/false, /*cpu_list=*/NULL) ? 0 : -ENOMEM; -} - -static int add_event_tool(struct list_head *list, int *idx, - enum perf_tool_event tool_event) -{ - struct evsel *evsel; - struct perf_event_attr attr = { - .type = PERF_TYPE_SOFTWARE, - .config = PERF_COUNT_SW_DUMMY, - }; - struct perf_cpu_map *cpu_list = NULL; - - if (tool_event == PERF_TOOL_DURATION_TIME) { - /* Duration time is gathered globally, pretend it is only on CPU0. */ - cpu_list = perf_cpu_map__new("0"); - } - evsel = __add_event(list, idx, &attr, /*init_attr=*/true, /*name=*/NULL, - /*metric_id=*/NULL, /*pmu=*/NULL, - /*config_terms=*/NULL, /*auto_merge_stats=*/false, - cpu_list); - perf_cpu_map__put(cpu_list); - if (!evsel) - return -ENOMEM; - evsel->tool_event = tool_event; - if (tool_event == PERF_TOOL_DURATION_TIME - || tool_event == PERF_TOOL_USER_TIME - || tool_event == PERF_TOOL_SYSTEM_TIME) { - free((char *)evsel->unit); - evsel->unit = strdup("ns"); - } - return 0; + /*auto_merge_stats=*/false, /*cpu_list=*/NULL, + alternate_hw_config) ? 0 : -ENOMEM; } /** @@ -450,7 +422,7 @@ bool parse_events__filter_pmu(const struct parse_events_state *parse_state, static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats); + bool auto_merge_stats, u64 alternate_hw_config); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, @@ -476,7 +448,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, */ ret = parse_events_add_pmu(parse_state, list, pmu, parsed_terms, - perf_pmu__auto_merge_stats(pmu)); + perf_pmu__auto_merge_stats(pmu), + /*alternate_hw_config=*/PERF_COUNT_HW_MAX); if (ret) return ret; continue; @@ -507,7 +480,8 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name, if (__add_event(list, idx, &attr, /*init_attr*/true, config_name ?: name, metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) == NULL) + /*cpu_list=*/NULL, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX) == NULL) return -ENOMEM; free_config_terms(&config_terms); @@ -772,7 +746,7 @@ int parse_events_add_breakpoint(struct parse_events_state *parse_state, name = get_config_name(head_config); return add_event(list, &parse_state->idx, &attr, name, /*mertic_id=*/NULL, - &config_terms); + &config_terms, /*alternate_hw_config=*/PERF_COUNT_HW_MAX); } static int check_type_val(struct parse_events_term *term, @@ -794,7 +768,7 @@ static int check_type_val(struct parse_events_term *term, static bool config_term_shrinked; -static const char *config_term_name(enum parse_events__term_type term_type) +const char *parse_events__term_type_str(enum parse_events__term_type term_type) { /* * Update according to parse-events.l @@ -880,7 +854,7 @@ config_term_avail(enum parse_events__term_type term_type, struct parse_events_er /* term_type is validated so indexing is safe */ if (asprintf(&err_str, "'%s' is not usable in 'perf stat'", - config_term_name(term_type)) >= 0) + parse_events__term_type_str(term_type)) >= 0) parse_events_error__handle(err, -1, err_str, NULL); return false; } @@ -1004,7 +978,7 @@ do { \ case PARSE_EVENTS__TERM_TYPE_HARDWARE: default: parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), + strdup(parse_events__term_type_str(term->type_term)), parse_events_formats_error_string(NULL)); return -EINVAL; } @@ -1072,6 +1046,7 @@ static int config_term_pmu(struct perf_event_attr *attr, if (perf_pmu__have_event(pmu, term->config)) { term->type_term = PARSE_EVENTS__TERM_TYPE_USER; term->no_value = true; + term->alternate_hw_config = true; } else { attr->type = PERF_TYPE_HARDWARE; attr->config = term->val.num; @@ -1127,8 +1102,9 @@ static int config_term_tracepoint(struct perf_event_attr *attr, default: if (err) { parse_events_error__handle(err, term->err_term, - strdup(config_term_name(term->type_term)), - strdup("valid terms: call-graph,stack-size\n")); + strdup(parse_events__term_type_str(term->type_term)), + strdup("valid terms: call-graph,stack-size\n") + ); } return -EINVAL; } @@ -1384,8 +1360,9 @@ static int __parse_events_add_numeric(struct parse_events_state *parse_state, name = get_config_name(head_config); metric_id = get_config_metric_id(head_config); ret = __add_event(list, &parse_state->idx, &attr, /*init_attr*/true, name, - metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, - /*cpu_list=*/NULL) ? 0 : -ENOMEM; + metric_id, pmu, &config_terms, /*auto_merge_stats=*/false, + /*cpu_list=*/NULL, /*alternate_hw_config=*/PERF_COUNT_HW_MAX + ) == NULL ? -ENOMEM : 0; free_config_terms(&config_terms); return ret; } @@ -1421,13 +1398,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, type, /*extended_type=*/0, config, head_config); } -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event) -{ - return add_event_tool(list, &parse_state->idx, tool_event); -} - static bool config_term_percore(struct list_head *config_terms) { struct evsel_config_term *term; @@ -1443,7 +1413,7 @@ static bool config_term_percore(struct list_head *config_terms) static int parse_events_add_pmu(struct parse_events_state *parse_state, struct list_head *list, struct perf_pmu *pmu, const struct parse_events_terms *const_parsed_terms, - bool auto_merge_stats) + bool auto_merge_stats, u64 alternate_hw_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -1480,7 +1450,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /*init_attr=*/true, /*name=*/NULL, /*metric_id=*/NULL, pmu, /*config_terms=*/NULL, auto_merge_stats, - /*cpu_list=*/NULL); + /*cpu_list=*/NULL, alternate_hw_config); return evsel ? 0 : -ENOMEM; } @@ -1501,7 +1471,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, /* Look for event names in the terms and rewrite into format based terms. */ if (perf_pmu__check_alias(pmu, &parsed_terms, - &info, &alias_rewrote_terms, err)) { + &info, &alias_rewrote_terms, + &alternate_hw_config, err)) { parse_events_terms__exit(&parsed_terms); return -EINVAL; } @@ -1537,7 +1508,9 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, return -ENOMEM; } - if (perf_pmu__config(pmu, &attr, &parsed_terms, parse_state->error)) { + /* Skip configuring hard coded terms that were applied by config_attr. */ + if (perf_pmu__config(pmu, &attr, &parsed_terms, /*apply_hardcoded=*/false, + parse_state->error)) { free_config_terms(&config_terms); parse_events_terms__exit(&parsed_terms); return -EINVAL; @@ -1546,7 +1519,8 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, evsel = __add_event(list, &parse_state->idx, &attr, /*init_attr=*/true, get_config_name(&parsed_terms), get_config_metric_id(&parsed_terms), pmu, - &config_terms, auto_merge_stats, /*cpu_list=*/NULL); + &config_terms, auto_merge_stats, /*cpu_list=*/NULL, + alternate_hw_config); if (!evsel) { parse_events_terms__exit(&parsed_terms); return -ENOMEM; @@ -1567,7 +1541,7 @@ static int parse_events_add_pmu(struct parse_events_state *parse_state, } int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc_) { @@ -1620,7 +1594,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, auto_merge_stats = perf_pmu__auto_merge_stats(pmu); if (!parse_events_add_pmu(parse_state, list, pmu, - &parsed_terms, auto_merge_stats)) { + &parsed_terms, auto_merge_stats, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1633,7 +1607,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state, if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, list, perf_pmus__fake_pmu(), &parsed_terms, - /*auto_merge_stats=*/true)) { + /*auto_merge_stats=*/true, hw_config)) { struct strbuf sb; strbuf_init(&sb, /*hint=*/ 0); @@ -1674,13 +1648,15 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Attempt to add to list assuming event_or_pmu is a PMU name. */ pmu = perf_pmus__find(event_or_pmu); if (pmu && !parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; if (parse_state->fake_pmu) { if (!parse_events_add_pmu(parse_state, *listp, perf_pmus__fake_pmu(), const_parsed_terms, - /*auto_merge_stats=*/false)) + /*auto_merge_stats=*/false, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) return 0; } @@ -1693,7 +1669,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state if (!parse_events_add_pmu(parse_state, *listp, pmu, const_parsed_terms, - auto_merge_stats)) { + auto_merge_stats, + /*alternate_hw_config=*/PERF_COUNT_HW_MAX)) { ok++; parse_state->wild_card_pmus = true; } @@ -1704,7 +1681,8 @@ int parse_events_multi_pmu_add_or_add_pmu(struct parse_events_state *parse_state /* Failure to add, assume event_or_pmu is an event name. */ zfree(listp); - if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, const_parsed_terms, listp, loc)) + if (!parse_events_multi_pmu_add(parse_state, event_or_pmu, PERF_COUNT_HW_MAX, + const_parsed_terms, listp, loc)) return 0; if (asprintf(&help, "Unable to find PMU or event on a PMU of '%s'", event_or_pmu) < 0) @@ -1755,14 +1733,10 @@ static int parse_events__modifier_list(struct parse_events_state *parse_state, int exclude = eu | ek | eh; int exclude_GH = group ? evsel->exclude_GH : 0; - if (mod.precise) { - /* use of precise requires exclude_guest */ - eG = 1; - } if (mod.user) { if (!exclude) exclude = eu = ek = eh = 1; - if (!exclude_GH && !perf_guest) + if (!exclude_GH && !perf_guest && exclude_GH_default) eG = 1; eu = 0; } @@ -2566,7 +2540,7 @@ int parse_events_term__num(struct parse_events_term **term, struct parse_events_term temp = { .type_val = PARSE_EVENTS__TERM_TYPE_NUM, .type_term = type_term, - .config = config ? : strdup(config_term_name(type_term)), + .config = config ? : strdup(parse_events__term_type_str(type_term)), .no_value = no_value, .err_term = loc_term ? loc_term->first_column : 0, .err_val = loc_val ? loc_val->first_column : 0, @@ -2600,7 +2574,7 @@ int parse_events_term__term(struct parse_events_term **term, void *loc_term, void *loc_val) { return parse_events_term__str(term, term_lhs, NULL, - strdup(config_term_name(term_rhs)), + strdup(parse_events__term_type_str(term_rhs)), loc_term, loc_val); } @@ -2707,7 +2681,8 @@ int parse_events_terms__to_strbuf(const struct parse_events_terms *terms, struct if (ret < 0) return ret; } else if ((unsigned int)term->type_term < __PARSE_EVENTS__TERM_TYPE_NR) { - ret = strbuf_addf(sb, "%s=", config_term_name(term->type_term)); + ret = strbuf_addf(sb, "%s=", + parse_events__term_type_str(term->type_term)); if (ret < 0) return ret; } @@ -2727,7 +2702,7 @@ static void config_terms_list(char *buf, size_t buf_sz) buf[0] = '\0'; for (i = 0; i < __PARSE_EVENTS__TERM_TYPE_NR; i++) { - const char *name = config_term_name(i); + const char *name = parse_events__term_type_str(i); if (!config_term_avail(i, NULL)) continue; diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 10cc9c433116..3f4334ec6231 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -127,6 +127,12 @@ struct parse_events_term { * value is assumed to be 1. An event name also has no value. */ bool no_value; + /** + * @alternate_hw_config: config is the event name but num is an + * alternate PERF_TYPE_HARDWARE config value which is often nice for the + * sake of quick matching. + */ + bool alternate_hw_config; }; struct parse_events_error { @@ -162,6 +168,8 @@ struct parse_events_state { bool wild_card_pmus; }; +const char *parse_events__term_type_str(enum parse_events__term_type term_type); + bool parse_events__filter_pmu(const struct parse_events_state *parse_state, const struct perf_pmu *pmu); void parse_events__shrink_config_terms(void); @@ -221,9 +229,6 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, u32 type, u64 config, const struct parse_events_terms *head_config, bool wildcard); -int parse_events_add_tool(struct parse_events_state *parse_state, - struct list_head *list, - int tool_event); int parse_events_add_cache(struct list_head *list, int *idx, const char *name, struct parse_events_state *parse_state, struct parse_events_terms *parsed_terms); @@ -238,7 +243,7 @@ struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr, struct perf_pmu *pmu); int parse_events_multi_pmu_add(struct parse_events_state *parse_state, - const char *event_name, + const char *event_name, u64 hw_config, const struct parse_events_terms *const_parsed_terms, struct list_head **listp, void *loc); diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 5a0bcd7f166a..14e5bd856a18 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -121,14 +121,6 @@ static int sym(yyscan_t scanner, int type, int config) return type == PERF_TYPE_HARDWARE ? PE_VALUE_SYM_HW : PE_VALUE_SYM_SW; } -static int tool(yyscan_t scanner, enum perf_tool_event event) -{ - YYSTYPE *yylval = parse_events_get_lval(scanner); - - yylval->num = event; - return PE_VALUE_SYM_TOOL; -} - static int term(yyscan_t scanner, enum parse_events__term_type type) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -404,9 +396,6 @@ cpu-migrations|migrations { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COU alignment-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS); } emulation-faults { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); } dummy { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); } -duration_time { return tool(yyscanner, PERF_TOOL_DURATION_TIME); } -user_time { return tool(yyscanner, PERF_TOOL_USER_TIME); } -system_time { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); } bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); } cgroup-switches { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index b3c51f06cbdc..f888cbb076d6 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -56,7 +56,6 @@ static void free_list_evsel(struct list_head* list_evsel) %token PE_START_EVENTS PE_START_TERMS %token PE_VALUE PE_VALUE_SYM_HW PE_VALUE_SYM_SW PE_TERM -%token PE_VALUE_SYM_TOOL %token PE_EVENT_NAME %token PE_RAW PE_NAME %token PE_MODIFIER_EVENT PE_MODIFIER_BP PE_BP_COLON PE_BP_SLASH @@ -68,7 +67,6 @@ static void free_list_evsel(struct list_head* list_evsel) %type <num> PE_VALUE %type <num> PE_VALUE_SYM_HW %type <num> PE_VALUE_SYM_SW -%type <num> PE_VALUE_SYM_TOOL %type <mod> PE_MODIFIER_EVENT %type <term_type> PE_TERM %type <num> value_sym @@ -292,7 +290,7 @@ PE_NAME sep_dc struct list_head *list; int err; - err = parse_events_multi_pmu_add(_parse_state, $1, NULL, &list, &@1); + err = parse_events_multi_pmu_add(_parse_state, $1, PERF_COUNT_HW_MAX, NULL, &list, &@1); if (err < 0) { struct parse_events_state *parse_state = _parse_state; struct parse_events_error *error = parse_state->error; @@ -350,20 +348,6 @@ value_sym sep_slash_slash_dc PE_ABORT(err); $$ = list; } -| -PE_VALUE_SYM_TOOL sep_slash_slash_dc -{ - struct list_head *list; - int err; - - list = alloc_list(); - if (!list) - YYNOMEM; - err = parse_events_add_tool(_parse_state, list, $1); - if (err) - YYNOMEM; - $$ = list; -} event_legacy_cache: PE_LEGACY_CACHE opt_event_config diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c index 5ccfe4b64cdf..0dacc133ed39 100644 --- a/tools/perf/util/pfm.c +++ b/tools/perf/util/pfm.c @@ -233,7 +233,7 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, } if (is_libpfm_event_supported(name, cpus, threads)) { - print_cb->print_event(print_state, pinfo->name, topic, + print_cb->print_event(print_state, topic, pinfo->name, name, info->equiv, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", @@ -267,8 +267,8 @@ print_libpfm_event(const struct print_callbacks *print_cb, void *print_state, continue; print_cb->print_event(print_state, - pinfo->name, topic, + pinfo->name, name, /*alias=*/NULL, /*scale_unit=*/NULL, /*deprecated=*/NULL, "PFM event", diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 61bdda01a05a..08a9d0bd9301 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -18,7 +18,9 @@ #include "debug.h" #include "evsel.h" #include "pmu.h" +#include "hwmon_pmu.h" #include "pmus.h" +#include "tool_pmu.h" #include <util/pmu-bison.h> #include <util/pmu-flex.h> #include "parse-events.h" @@ -817,31 +819,6 @@ static int is_sysfs_pmu_core(const char *name) return file_available(path); } -char *perf_pmu__getcpuid(struct perf_pmu *pmu) -{ - char *cpuid; - static bool printed; - - cpuid = getenv("PERF_CPUID"); - if (cpuid) - cpuid = strdup(cpuid); - if (!cpuid) - cpuid = get_cpuid_str(pmu); - if (!cpuid) - return NULL; - - if (!printed) { - pr_debug("Using CPUID %s\n", cpuid); - printed = true; - } - return cpuid; -} - -__weak const struct pmu_metrics_table *pmu_metrics_table__find(void) -{ - return perf_pmu__find_metrics_table(NULL); -} - /** * Return the length of the PMU name not including the suffix for uncore PMUs. * @@ -1168,7 +1145,7 @@ struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pm return pmu; } -static bool perf_pmu__is_fake(const struct perf_pmu *pmu) +bool perf_pmu__is_fake(const struct perf_pmu *pmu) { return pmu->type == PERF_PMU_TYPE_FAKE; } @@ -1366,7 +1343,8 @@ static int pmu_config_term(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_term *term, struct parse_events_terms *head_terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -1380,11 +1358,46 @@ static int pmu_config_term(const struct perf_pmu *pmu, return 0; /* - * Hardcoded terms should be already in, so nothing - * to be done for them. + * Hardcoded terms are generally handled in event parsing, which + * traditionally have had to handle not having a PMU. An alias may + * have hard coded config values, optionally apply them below. */ - if (parse_events__is_hardcoded_term(term)) + if (parse_events__is_hardcoded_term(term)) { + /* Config terms set all bits in the config. */ + DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + + if (!apply_hardcoded) + return 0; + + bitmap_fill(bits, PERF_PMU_FORMAT_BITS); + + switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_CONFIG: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG1: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config1, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG2: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config2, zero); + break; + case PARSE_EVENTS__TERM_TYPE_CONFIG3: + assert(term->type_val == PARSE_EVENTS__TERM_TYPE_NUM); + pmu_format_value(bits, term->val.num, &attr->config3, zero); + break; + case PARSE_EVENTS__TERM_TYPE_USER: /* Not hardcoded. */ + return -EINVAL; + case PARSE_EVENTS__TERM_TYPE_NAME ... PARSE_EVENTS__TERM_TYPE_HARDWARE: + /* Skip non-config terms. */ + break; + default: + break; + } return 0; + } format = pmu_find_format(&pmu->format, term->config); if (!format) { @@ -1466,13 +1479,12 @@ static int pmu_config_term(const struct perf_pmu *pmu, if (err) { char *err_str; - parse_events_error__handle(err, term->err_val, - asprintf(&err_str, - "value too big for format (%s), maximum is %llu", - format->name, (unsigned long long)max_val) < 0 - ? strdup("value too big for format") - : err_str, - NULL); + if (asprintf(&err_str, + "value too big for format (%s), maximum is %llu", + format->name, (unsigned long long)max_val) < 0) { + err_str = strdup("value too big for format"); + } + parse_events_error__handle(err, term->err_val, err_str, /*help=*/NULL); return -EINVAL; } /* @@ -1488,12 +1500,16 @@ static int pmu_config_term(const struct perf_pmu *pmu, int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *err) + bool zero, bool apply_hardcoded, + struct parse_events_error *err) { struct parse_events_term *term; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__config_terms(pmu, attr, terms, err); + list_for_each_entry(term, &terms->terms, list) { - if (pmu_config_term(pmu, attr, term, terms, zero, err)) + if (pmu_config_term(pmu, attr, term, terms, zero, apply_hardcoded, err)) return -EINVAL; } @@ -1507,6 +1523,7 @@ int perf_pmu__config_terms(const struct perf_pmu *pmu, */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *err) { bool zero = !!pmu->perf_event_attr_init_default; @@ -1515,7 +1532,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, if (perf_pmu__is_fake(pmu)) return 0; - return perf_pmu__config_terms(pmu, attr, head_terms, zero, err); + return perf_pmu__config_terms(pmu, attr, head_terms, zero, apply_hardcoded, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, @@ -1606,7 +1623,7 @@ static int check_info_data(struct perf_pmu *pmu, */ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err) + u64 *alternate_hw_config, struct parse_events_error *err) { struct parse_events_term *term, *h; struct perf_pmu_alias *alias; @@ -1623,6 +1640,11 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ info->scale = 0.0; info->snapshot = false; + if (perf_pmu__is_hwmon(pmu)) { + ret = hwmon_pmu__check_alias(head_terms, info, err); + goto out; + } + /* Fake PMU doesn't rewrite terms. */ if (perf_pmu__is_fake(pmu)) goto out; @@ -1638,6 +1660,7 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ NULL); return ret; } + *rewrote_terms = true; ret = check_info_data(pmu, alias, info, err, term->err_term); if (ret) @@ -1646,6 +1669,9 @@ int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_ if (alias->per_pkg) info->per_pkg = true; + if (term->alternate_hw_config) + *alternate_hw_config = term->val.num; + list_del_init(&term->list); parse_events_term__delete(term); } @@ -1790,6 +1816,10 @@ bool perf_pmu__have_event(struct perf_pmu *pmu, const char *name) { if (!name) return false; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(name)) + return false; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__have_event(pmu, name); if (perf_pmu__find_alias(pmu, name, /*load=*/ true) != NULL) return true; if (pmu->cpu_aliases_added || !pmu->events_table) @@ -1801,6 +1831,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) { size_t nr; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__num_events(pmu); + pmu_aliases_parse(pmu); nr = pmu->sysfs_aliases + pmu->sys_json_aliases; @@ -1811,6 +1844,9 @@ size_t perf_pmu__num_events(struct perf_pmu *pmu) else assert(pmu->cpu_json_aliases == 0); + if (perf_pmu__is_tool(pmu)) + nr -= tool_pmu__num_skip_events(); + return pmu->selectable ? nr + 1 : nr; } @@ -1861,12 +1897,18 @@ int perf_pmu__for_each_event(struct perf_pmu *pmu, bool skip_duplicate_pmus, int ret = 0; struct strbuf sb; + if (perf_pmu__is_hwmon(pmu)) + return hwmon_pmu__for_each_event(pmu, state, cb); + strbuf_init(&sb, /*hint=*/ 0); pmu_aliases_parse(pmu); pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { size_t buf_used, pmu_name_len; + if (perf_pmu__is_tool(pmu) && tool_pmu__skip_event(event->name)) + continue; + info.pmu_name = event->pmu_name ?: pmu->name; pmu_name_len = pmu_deduped_name_len(pmu, info.pmu_name, skip_duplicate_pmus); @@ -1949,6 +1991,7 @@ bool perf_pmu__is_software(const struct perf_pmu *pmu) case PERF_TYPE_HW_CACHE: return false; case PERF_TYPE_RAW: return false; case PERF_TYPE_BREAKPOINT: return true; + case PERF_PMU_TYPE_TOOL: return true; default: break; } for (size_t i = 0; i < ARRAY_SIZE(known_sw_pmus); i++) { @@ -2200,11 +2243,6 @@ bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok) (need_fnmatch && !fnmatch(tok, name, 0)); } -double __weak perf_pmu__cpu_slots_per_cycle(void) -{ - return NAN; -} - int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size) { const char *sysfs = sysfs__mountpoint(); @@ -2257,6 +2295,9 @@ int perf_pmu__pathname_fd(int dirfd, const char *pmu_name, const char *filename, void perf_pmu__delete(struct perf_pmu *pmu) { + if (perf_pmu__is_hwmon(pmu)) + hwmon_pmu__exit(pmu); + perf_pmu__del_formats(&pmu->format); perf_pmu__del_aliases(pmu); perf_pmu__del_caps(pmu); @@ -2280,7 +2321,9 @@ const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config) pmu_add_cpu_aliases(pmu); list_for_each_entry(event, &pmu->aliases, list) { struct perf_event_attr attr = {.config = 0,}; - int ret = perf_pmu__config(pmu, &attr, &event->terms, NULL); + + int ret = perf_pmu__config(pmu, &attr, &event->terms, /*apply_hardcoded=*/true, + /*err=*/NULL); if (ret == 0 && config == attr.config) return event->name; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 4397c48ad569..dbed6c243a5e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -37,6 +37,9 @@ struct perf_pmu_caps { }; enum { + PERF_PMU_TYPE_HWMON_START = 0xFFFF0000, + PERF_PMU_TYPE_HWMON_END = 0xFFFFFFFD, + PERF_PMU_TYPE_TOOL = 0xFFFFFFFE, PERF_PMU_TYPE_FAKE = 0xFFFFFFFF, }; @@ -169,6 +172,10 @@ struct perf_pmu { * exclude_host. */ bool exclude_guest; + /** + * @checked: Are the missing features checked? + */ + bool checked; } missing_features; /** @@ -206,16 +213,18 @@ typedef int (*pmu_format_callback)(void *state, const char *name, int config, void pmu_add_sys_aliases(struct perf_pmu *pmu); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *head_terms, + bool apply_hardcoded, struct parse_events_error *error); int perf_pmu__config_terms(const struct perf_pmu *pmu, struct perf_event_attr *attr, struct parse_events_terms *terms, - bool zero, struct parse_events_error *error); + bool zero, bool apply_hardcoded, + struct parse_events_error *error); __u64 perf_pmu__format_bits(struct perf_pmu *pmu, const char *name); int perf_pmu__format_type(struct perf_pmu *pmu, const char *name); int perf_pmu__check_alias(struct perf_pmu *pmu, struct parse_events_terms *head_terms, struct perf_pmu_info *info, bool *rewrote_terms, - struct parse_events_error *err); + u64 *alternate_hw_config, struct parse_events_error *err); int perf_pmu__find_event(struct perf_pmu *pmu, const char *event, void *state, pmu_event_callback cb); void perf_pmu_format__set_value(void *format, int config, unsigned long *bits); @@ -253,8 +262,6 @@ void perf_pmu__arch_init(struct perf_pmu *pmu); void pmu_add_cpu_aliases_table(struct perf_pmu *pmu, const struct pmu_events_table *table); -char *perf_pmu__getcpuid(struct perf_pmu *pmu); -const struct pmu_metrics_table *pmu_metrics_table__find(void); bool pmu_uncore_identifier_match(const char *compat, const char *id); int perf_pmu__convert_scale(const char *scale, char **end, double *sval); @@ -268,7 +275,6 @@ void perf_pmu__warn_invalid_formats(struct perf_pmu *pmu); bool perf_pmu__match(const struct perf_pmu *pmu, const char *tok); -double perf_pmu__cpu_slots_per_cycle(void); int perf_pmu__event_source_devices_scnprintf(char *pathname, size_t size); int perf_pmu__pathname_scnprintf(char *buf, size_t size, const char *pmu_name, const char *filename); @@ -280,6 +286,8 @@ struct perf_pmu *perf_pmu__lookup(struct list_head *pmus, int dirfd, const char struct perf_pmu *perf_pmu__create_placeholder_core_pmu(struct list_head *core_pmus); void perf_pmu__delete(struct perf_pmu *pmu); struct perf_pmu *perf_pmus__find_core_pmu(void); + const char *perf_pmu__name_from_config(struct perf_pmu *pmu, u64 config); +bool perf_pmu__is_fake(const struct perf_pmu *pmu); #endif /* __PMU_H */ diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c index 52109af5f2f1..b493da0d22ef 100644 --- a/tools/perf/util/pmus.c +++ b/tools/perf/util/pmus.c @@ -15,6 +15,8 @@ #include "evsel.h" #include "pmus.h" #include "pmu.h" +#include "hwmon_pmu.h" +#include "tool_pmu.h" #include "print-events.h" #include "strbuf.h" @@ -200,6 +202,7 @@ static void pmu_read_sysfs(bool core_only) int fd; DIR *dir; struct dirent *dent; + struct perf_pmu *tool_pmu; if (read_sysfs_all_pmus || (core_only && read_sysfs_core_pmus)) return; @@ -229,6 +232,11 @@ static void pmu_read_sysfs(bool core_only) pr_err("Failure to set up any core PMUs\n"); } list_sort(NULL, &core_pmus, pmus_cmp); + if (!core_only) { + tool_pmu = perf_pmus__tool_pmu(); + list_add_tail(&tool_pmu->list, &other_pmus); + perf_pmus__read_hwmon_pmus(&other_pmus); + } list_sort(NULL, &other_pmus, pmus_cmp); if (!list_empty(&core_pmus)) { read_sysfs_core_pmus = true; @@ -434,6 +442,7 @@ static int perf_pmus__print_pmu_events__callback(void *vstate, pr_err("Unexpected event %s/%s/\n", info->pmu->name, info->name); return 1; } + assert(info->pmu != NULL || info->name != NULL); s = &state->aliases[state->index]; s->pmu = info->pmu; #define COPY_STR(str) s->str = info->str ? strdup(info->str) : NULL @@ -494,8 +503,8 @@ void perf_pmus__print_pmu_events(const struct print_callbacks *print_cb, void *p goto free; print_cb->print_event(print_state, - aliases[j].pmu_name, aliases[j].topic, + aliases[j].pmu_name, aliases[j].name, aliases[j].alias, aliases[j].scale_unit, @@ -724,6 +733,13 @@ struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name) return perf_pmu__lookup(&other_pmus, test_sysfs_dirfd, name, /*eager_load=*/true); } +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name) +{ + return hwmon_pmu__new(&other_pmus, hwmon_dir, sysfs_name, name); +} + struct perf_pmu *perf_pmus__fake_pmu(void) { static struct perf_pmu fake = { diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h index e1742b56eec7..a0cb0eb2ff97 100644 --- a/tools/perf/util/pmus.h +++ b/tools/perf/util/pmus.h @@ -30,6 +30,9 @@ bool perf_pmus__supports_extended_type(void); char *perf_pmus__default_pmu_name(void); struct perf_pmu *perf_pmus__add_test_pmu(int test_sysfs_dirfd, const char *name); +struct perf_pmu *perf_pmus__add_test_hwmon_pmu(int hwmon_dir, + const char *sysfs_name, + const char *name); struct perf_pmu *perf_pmus__fake_pmu(void); #endif /* __PMUS_H */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index 81e0135cddf0..a786cbfb0ff5 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -29,6 +29,7 @@ #include "tracepoint.h" #include "pfm.h" #include "thread_map.h" +#include "tool_pmu.h" #include "util.h" #define MAX_NAME_LEN 100 @@ -43,21 +44,6 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; -static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { - [PERF_TOOL_DURATION_TIME] = { - .symbol = "duration_time", - .alias = "", - }, - [PERF_TOOL_USER_TIME] = { - .symbol = "user_time", - .alias = "", - }, - [PERF_TOOL_SYSTEM_TIME] = { - .symbol = "system_time", - .alias = "", - }, -}; - /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -341,24 +327,6 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta return 0; } -void print_tool_events(const struct print_callbacks *print_cb, void *print_state) -{ - // Start at 1 because the first enum entry means no tool event. - for (int i = 1; i < PERF_TOOL_MAX; ++i) { - print_cb->print_event(print_state, - "tool", - /*pmu_name=*/NULL, - event_symbols_tool[i].symbol, - event_symbols_tool[i].alias, - /*scale_unit=*/NULL, - /*deprecated=*/false, - "Tool event", - /*desc=*/NULL, - /*long_desc=*/NULL, - /*encoding_desc=*/NULL); - } -} - void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max) @@ -422,8 +390,6 @@ void print_events(const struct print_callbacks *print_cb, void *print_state) print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE, event_symbols_sw, PERF_COUNT_SW_MAX); - print_tool_events(print_cb, print_state); - print_hwcache_events(print_cb, print_state); perf_pmus__print_pmu_events(print_cb, print_state); diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h index bf4290bef0cd..445efa1636c1 100644 --- a/tools/perf/util/print-events.h +++ b/tools/perf/util/print-events.h @@ -36,7 +36,6 @@ void print_sdt_events(const struct print_callbacks *print_cb, void *print_state) void print_symbol_events(const struct print_callbacks *print_cb, void *print_state, unsigned int type, const struct event_symbol *syms, unsigned int max); -void print_tool_events(const struct print_callbacks *print_cb, void *print_state); void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state); bool is_event_supported(u8 type, u64 config); diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a17c9b8a7a79..6d51a4c98ad7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -40,6 +40,7 @@ #include "session.h" #include "string2.h" #include "strbuf.h" +#include "parse-events.h" #include <subcmd/pager.h> #include <linux/ctype.h> @@ -51,6 +52,9 @@ #define PERFPROBE_GROUP "probe" +/* Defined in kernel/trace/trace.h */ +#define MAX_EVENT_NAME_LEN 64 + bool probe_event_dry_run; /* Dry run flag */ struct probe_conf probe_conf = { .magic_num = DEFAULT_PROBE_MAGIC_NUM }; @@ -342,7 +346,7 @@ elf_err: return mod_name; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT static int kernel_get_module_dso(const char *module, struct dso **pdso) { @@ -1036,6 +1040,17 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) return rv; } +static int sprint_line_description(char *sbuf, size_t size, struct line_range *lr) +{ + if (!lr->function) + return snprintf(sbuf, size, "file: %s, line: %d", lr->file, lr->start); + + if (lr->file) + return snprintf(sbuf, size, "function: %s, file:%s, line: %d", lr->function, lr->file, lr->start); + + return snprintf(sbuf, size, "function: %s, line:%d", lr->function, lr->start); +} + #define show_one_line_with_num(f,l) _show_one_line(f,l,false,true) #define show_one_line(f,l) _show_one_line(f,l,false,false) #define skip_one_line(f,l) _show_one_line(f,l,true,false) @@ -1065,9 +1080,12 @@ static int __show_line_range(struct line_range *lr, const char *module, ret = debuginfo__find_line_range(dinfo, lr); if (!ret) { /* Not found, retry with an alternative */ + pr_debug2("Failed to find line range in debuginfo. Fallback to alternative\n"); ret = get_alternative_line_range(dinfo, lr, module, user); if (!ret) ret = debuginfo__find_line_range(dinfo, lr); + else /* Ignore error, we just failed to find it. */ + ret = -ENOENT; } if (dinfo->build_id) { build_id__init(&bid, dinfo->build_id, BUILD_ID_SIZE); @@ -1075,7 +1093,8 @@ static int __show_line_range(struct line_range *lr, const char *module, } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { - pr_warning("Specified source line is not found.\n"); + sprint_line_description(sbuf, sizeof(sbuf), lr); + pr_warning("Specified source line(%s) is not found.\n", sbuf); return -ENOENT; } else if (ret < 0) { pr_warning("Debuginfo analysis failed.\n"); @@ -1250,7 +1269,7 @@ out: return ret; } -#else /* !HAVE_DWARF_SUPPORT */ +#else /* !HAVE_LIBDW_SUPPORT */ static void debuginfo_cache__exit(void) { @@ -1343,30 +1362,39 @@ static bool is_c_func_name(const char *name) * * SRC[:SLN[+NUM|-ELN]] * FNC[@SRC][:SLN[+NUM|-ELN]] + * + * FNC@SRC accepts `FNC@*` which forcibly specify FNC as function name. + * SRC and FUNC can be quoted by double/single quotes. */ int parse_line_range_desc(const char *arg, struct line_range *lr) { - char *range, *file, *name = strdup(arg); + char *buf = strdup(arg); + char *p; int err; - if (!name) + if (!buf) return -ENOMEM; lr->start = 0; lr->end = INT_MAX; - range = strchr(name, ':'); - if (range) { - *range++ = '\0'; + p = strpbrk_esq(buf, ":"); + if (p) { + if (p == buf) { + semantic_error("No file/function name in '%s'.\n", p); + err = -EINVAL; + goto err; + } + *(p++) = '\0'; - err = parse_line_num(&range, &lr->start, "start line"); + err = parse_line_num(&p, &lr->start, "start line"); if (err) goto err; - if (*range == '+' || *range == '-') { - const char c = *range++; + if (*p == '+' || *p == '-') { + const char c = *(p++); - err = parse_line_num(&range, &lr->end, "end line"); + err = parse_line_num(&p, &lr->end, "end line"); if (err) goto err; @@ -1390,34 +1418,41 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) " than end line.\n"); goto err; } - if (*range != '\0') { - semantic_error("Tailing with invalid str '%s'.\n", range); + if (*p != '\0') { + semantic_error("Tailing with invalid str '%s'.\n", p); goto err; } } - file = strchr(name, '@'); - if (file) { - *file = '\0'; - lr->file = strdup(++file); - if (lr->file == NULL) { - err = -ENOMEM; + p = strpbrk_esq(buf, "@"); + if (p) { + *p++ = '\0'; + if (strcmp(p, "*")) { + lr->file = strdup_esq(p); + if (lr->file == NULL) { + err = -ENOMEM; + goto err; + } + } + if (*buf != '\0') + lr->function = strdup_esq(buf); + if (!lr->function && !lr->file) { + semantic_error("Only '@*' is not allowed.\n"); + err = -EINVAL; goto err; } - lr->function = name; - } else if (strchr(name, '/') || strchr(name, '.')) - lr->file = name; - else if (is_c_func_name(name))/* We reuse it for checking funcname */ - lr->function = name; + } else if (strpbrk_esq(buf, "/.")) + lr->file = strdup_esq(buf); + else if (is_c_func_name(buf))/* We reuse it for checking funcname */ + lr->function = strdup_esq(buf); else { /* Invalid name */ - semantic_error("'%s' is not a valid function name.\n", name); + semantic_error("'%s' is not a valid function name.\n", buf); err = -EINVAL; goto err; } - return 0; err: - free(name); + free(buf); return err; } @@ -1425,19 +1460,19 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strpbrk_esc(*arg, ":"); + ptr = strpbrk_esq(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup_esc(*arg); + pev->group = strdup_esq(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - pev->event = strdup_esc(*arg); + pev->event = strdup_esq(*arg); if (pev->event == NULL) return -ENOMEM; @@ -1476,7 +1511,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk_esc(arg, ";=@+%"); + ptr = strpbrk_esq(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1490,7 +1525,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup_esc(ptr + 1); + pev->target = strdup_esq(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1531,7 +1566,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) file_spec = true; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1540,7 +1575,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup_esc(arg); + tmp = strdup_esq(arg); if (tmp == NULL) return -ENOMEM; } @@ -1578,7 +1613,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) return -ENOMEM; break; } - ptr = strpbrk_esc(arg, ";:+@%"); + ptr = strpbrk_esq(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1605,7 +1640,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup_esc(arg); + if (!strcmp(arg, "*")) + break; + pp->file = strdup_esq(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2757,7 +2794,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, /* Try no suffix number */ ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { - pr_warning("snprintf() failed: %d; the event name nbase='%s' is too long\n", ret, nbase); + pr_warning("snprintf() failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2777,7 +2817,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, for (i = 1; i < MAX_EVENT_INDEX; i++) { ret = e_snprintf(buf, len, "%s_%d", nbase, i); if (ret < 0) { - pr_debug("snprintf() failed: %d\n", ret); + pr_warning("Add suffix failed: %d; the event name '%s' is too long\n" + " Hint: Set a shorter event with syntax \"EVENT=PROBEDEF\"\n" + " EVENT: Event name (max length: %d bytes).\n", + ret, nbase, MAX_EVENT_NAME_LEN); goto out; } if (!strlist__has_entry(namelist, buf)) @@ -2841,7 +2884,7 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, bool allow_suffix) { const char *event, *group; - char buf[64]; + char buf[MAX_EVENT_NAME_LEN]; int ret; /* If probe_event or trace_event already have the name, reuse it */ @@ -2865,6 +2908,12 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, else group = PERFPROBE_GROUP; + if (strlen(group) >= MAX_EVENT_NAME_LEN) { + pr_err("Probe group string='%s' is too long (>= %d bytes)\n", + group, MAX_EVENT_NAME_LEN); + return -ENOMEM; + } + /* Get an unused new event name */ ret = get_new_event_name(buf, sizeof(buf), event, namelist, tev->point.retprobe, allow_suffix); @@ -3705,59 +3754,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs) } } -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) -{ - int ret; - - ret = init_probe_symbol_maps(pevs->uprobes); - if (ret < 0) - return ret; - - ret = convert_perf_probe_events(pevs, npevs); - if (ret == 0) - ret = apply_perf_probe_events(pevs, npevs); - - cleanup_perf_probe_events(pevs, npevs); - - exit_probe_symbol_maps(); - return ret; -} - -int del_perf_probe_events(struct strfilter *filter) -{ - int ret, ret2, ufd = -1, kfd = -1; - char *str = strfilter__string(filter); - - if (!str) - return -EINVAL; - - /* Get current event names */ - ret = probe_file__open_both(&kfd, &ufd, PF_FL_RW); - if (ret < 0) - goto out; - - ret = probe_file__del_events(kfd, filter); - if (ret < 0 && ret != -ENOENT) - goto error; - - ret2 = probe_file__del_events(ufd, filter); - if (ret2 < 0 && ret2 != -ENOENT) { - ret = ret2; - goto error; - } - ret = 0; - -error: - if (kfd >= 0) - close(kfd); - if (ufd >= 0) - close(ufd); -out: - free(str); - - return ret; -} - int show_available_funcs(const char *target, struct nsinfo *nsi, struct strfilter *_filter, bool user) { diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index 7e3b6c3d1f74..61a5f4ff4e9c 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -159,7 +159,6 @@ void line_range__clear(struct line_range *lr); /* Initialize line range */ int line_range__init(struct line_range *lr); -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs); int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs); int show_probe_trace_events(struct perf_probe_event *pevs, int npevs); @@ -168,8 +167,6 @@ void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs); struct strfilter; -int del_perf_probe_events(struct strfilter *filter); - int show_perf_probe_event(const char *group, const char *event, struct perf_probe_event *pev, const char *module, bool use_stdout); diff --git a/tools/perf/util/probe-file.c b/tools/perf/util/probe-file.c index 3d50de3217d5..ec8ac242fedb 100644 --- a/tools/perf/util/probe-file.c +++ b/tools/perf/util/probe-file.c @@ -366,25 +366,6 @@ int probe_file__del_strlist(int fd, struct strlist *namelist) return ret; } -int probe_file__del_events(int fd, struct strfilter *filter) -{ - struct strlist *namelist; - int ret; - - namelist = strlist__new(NULL, NULL); - if (!namelist) - return -ENOMEM; - - ret = probe_file__get_events(fd, filter, namelist); - if (ret < 0) - goto out; - - ret = probe_file__del_strlist(fd, namelist); -out: - strlist__delete(namelist); - return ret; -} - /* Caller must ensure to remove this entry from list */ static void probe_cache_entry__delete(struct probe_cache_entry *entry) { diff --git a/tools/perf/util/probe-file.h b/tools/perf/util/probe-file.h index 0dba88c0f5f0..c2bb6a5b9dcc 100644 --- a/tools/perf/util/probe-file.h +++ b/tools/perf/util/probe-file.h @@ -44,7 +44,6 @@ struct strlist *probe_file__get_namelist(int fd); struct strlist *probe_file__get_rawlist(int fd); int probe_file__add_event(int fd, struct probe_trace_event *tev); -int probe_file__del_events(int fd, struct strfilter *filter); int probe_file__get_events(int fd, struct strfilter *filter, struct strlist *plist); int probe_file__del_strlist(int fd, struct strlist *namelist); diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 630e16c54ed5..7f2ee0cb43ca 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -56,7 +56,7 @@ static struct probe_trace_arg_ref *alloc_trace_arg_ref(long offs) */ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Op *fb_ops, Dwarf_Die *sp_die, - unsigned int machine, + const struct probe_finder *pf, struct probe_trace_arg *tvar) { Dwarf_Attribute attr; @@ -166,7 +166,7 @@ static_var: if (!tvar) return ret2; - regs = get_dwarf_regstr(regn, machine); + regs = get_dwarf_regstr(regn, pf->e_machine, pf->e_flags); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " @@ -451,7 +451,7 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) dwarf_diename(vr_die)); ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, - &pf->sp_die, pf->machine, pf->tvar); + &pf->sp_die, pf, pf->tvar); if (ret == -ENOENT && pf->skip_empty_arg) /* This can be found in other place. skip it */ return 0; @@ -602,7 +602,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) ret = dwarf_getlocation_addr(&fb_attr, pf->addr, &pf->fb_ops, &nops, 1); if (ret <= 0 || nops == 0) { pf->fb_ops = NULL; -#ifdef HAVE_DWARF_CFI_SUPPORT } else if (nops == 1 && pf->fb_ops[0].atom == DW_OP_call_frame_cfa && (pf->cfi_eh != NULL || pf->cfi_dbg != NULL)) { if ((dwarf_cfi_addrframe(pf->cfi_eh, pf->addr, &frame) != 0 && @@ -613,7 +612,6 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) free(frame); return -ENOENT; } -#endif /* HAVE_DWARF_CFI_SUPPORT */ } /* Call finder's callback handler */ @@ -1136,9 +1134,9 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (gelf_getehdr(elf, &ehdr) == NULL) return -EINVAL; - pf->machine = ehdr.e_machine; + pf->e_machine = ehdr.e_machine; + pf->e_flags = ehdr.e_flags; -#ifdef HAVE_DWARF_CFI_SUPPORT do { GElf_Shdr shdr; @@ -1148,7 +1146,6 @@ static int debuginfo__find_probes(struct debuginfo *dbg, pf->cfi_dbg = dwarf_getcfi(dbg->dbg); } while (0); -#endif /* HAVE_DWARF_CFI_SUPPORT */ ret = debuginfo__find_probe_location(dbg, pf); return ret; @@ -1175,7 +1172,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, - pf->machine, NULL) == 0) { + pf, /*tvar=*/NULL) == 0) { vf->args[vf->nargs].var = (char *)dwarf_diename(die_mem); if (vf->args[vf->nargs].var == NULL) { vf->ret = -ENOMEM; @@ -1379,6 +1376,8 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, if (ret >= 0 && tf.pf.skip_empty_arg) ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs); + dwarf_cfi_end(tf.pf.cfi_eh); + if (ret < 0 || tf.ntevs == 0) { for (i = 0; i < tf.ntevs; i++) clear_probe_trace_event(&tf.tevs[i]); @@ -1404,7 +1403,7 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) tag == DW_TAG_variable) { ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, - af->pf.machine, NULL); + &af->pf, /*tvar=*/NULL); if (ret == 0 || ret == -ERANGE) { int ret2; bool externs = !af->child; @@ -1583,8 +1582,21 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, /* Find a corresponding function (name, baseline and baseaddr) */ if (die_find_realfunc(&cudie, (Dwarf_Addr)addr, &spdie)) { - /* Get function entry information */ - func = basefunc = dwarf_diename(&spdie); + /* + * Get function entry information. + * + * As described in the document DWARF Debugging Information + * Format Version 5, section 2.22 Linkage Names, "mangled names, + * are used in various ways, ... to distinguish multiple + * entities that have the same name". + * + * Firstly try to get distinct linkage name, if fail then + * rollback to get associated name in DIE. + */ + func = basefunc = die_get_linkage_name(&spdie); + if (!func) + func = basefunc = dwarf_diename(&spdie); + if (!func || die_entrypc(&spdie, &baseaddr) != 0 || dwarf_decl_line(&spdie, &baseline) != 0) { @@ -1863,7 +1875,11 @@ int find_source_path(const char *raw_path, const char *sbuild_id, const char *prefix = symbol_conf.source_prefix; if (sbuild_id && !prefix) { - if (!get_source_from_debuginfod(raw_path, sbuild_id, new_path)) + char prefixed_raw_path[PATH_MAX]; + + path__join(prefixed_raw_path, sizeof(prefixed_raw_path), comp_dir, raw_path); + + if (!get_source_from_debuginfod(prefixed_raw_path, sbuild_id, new_path)) return 0; } diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 3add5ff516e1..be7b46ea2460 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -21,7 +21,7 @@ static inline int is_c_varname(const char *name) return isalpha(name[0]) || name[0] == '_'; } -#ifdef HAVE_DWARF_SUPPORT +#ifdef HAVE_LIBDW_SUPPORT #include "dwarf-aux.h" #include "debuginfo.h" @@ -63,14 +63,13 @@ struct probe_finder { struct intlist *lcache; /* Line cache for lazy match */ /* For variable searching */ -#if _ELFUTILS_PREREQ(0, 142) - /* Call Frame Information from .eh_frame */ + /* Call Frame Information from .eh_frame. Owned by this struct. */ Dwarf_CFI *cfi_eh; - /* Call Frame Information from .debug_frame */ + /* Call Frame Information from .debug_frame. Not owned. */ Dwarf_CFI *cfi_dbg; -#endif Dwarf_Op *fb_ops; /* Frame base attribute */ - unsigned int machine; /* Target machine arch */ + unsigned int e_machine; /* ELF target machine arch */ + unsigned int e_flags; /* ELF target machine flags */ struct perf_probe_arg *pvar; /* Current target variable */ struct probe_trace_arg *tvar; /* Current result variable */ bool skip_empty_arg; /* Skip non-exist args */ @@ -104,6 +103,6 @@ struct line_finder { int found; }; -#endif /* HAVE_DWARF_SUPPORT */ +#endif /* HAVE_LIBDW_SUPPORT */ #endif /*_PROBE_FINDER_H */ diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 31a223eaf8e6..2096cdbaa53b 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -6,7 +6,7 @@ #include <linux/err.h> #include <perf/cpumap.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include <perf/mmap.h> #include "evlist.h" @@ -19,6 +19,7 @@ #include "util/bpf-filter.h" #include "util/env.h" #include "util/kvm-stat.h" +#include "util/stat.h" #include "util/kwork.h" #include "util/sample.h" #include "util/lock-contention.h" @@ -1355,6 +1356,7 @@ error: unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH; +#ifdef HAVE_KVM_STAT_SUPPORT bool kvm_entry_event(struct evsel *evsel __maybe_unused) { return false; @@ -1384,6 +1386,7 @@ void exit_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused, char *decode __maybe_unused) { } +#endif // HAVE_KVM_STAT_SUPPORT int find_scripts(char **scripts_array __maybe_unused, char **scripts_path_array __maybe_unused, int num __maybe_unused, int pathlen __maybe_unused) diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index 73846b73d0cf..30638653ad2d 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -345,7 +345,7 @@ static bool s390_cpumsf_trailer_show(const char *color, size_t pos, } color_fprintf(stdout, color, " [%#08zx] Trailer %c%c%c bsdes:%d" " dsdes:%d Overflow:%lld Time:%#llx\n" - "\t\tC:%d TOD:%#lx\n", + "\t\tC:%d TOD:%#llx\n", pos, te->f ? 'F' : ' ', te->a ? 'A' : ' ', diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 53383e97ec9d..335217bb532b 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -98,12 +98,12 @@ static void s390_cpumcfdg_dumptrail(const char *color, size_t offset, te.res2 = be32_to_cpu(tep->res2); color_fprintf(stdout, color, " [%#08zx] Trailer:%c%c%c%c%c" - " Cfvn:%d Csvn:%d Speed:%d TOD:%#llx\n", + " Cfvn:%d Csvn:%d Speed:%d TOD:%#lx\n", offset, te.clock_base ? 'T' : ' ', te.speed ? 'S' : ' ', te.mtda ? 'M' : ' ', te.caca ? 'C' : ' ', te.lcda ? 'L' : ' ', te.cfvn, te.csvn, te.cpu_speed, te.timestamp); - color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#llx" + color_fprintf(stdout, color, "\t\t1:%lx 2:%lx 3:%lx TOD-Base:%#lx" " Type:%x\n\n", te.progusage1, te.progusage2, te.progusage3, te.tod_base, te.mach_type); @@ -205,7 +205,7 @@ static void s390_cpumcfdg_dump(struct perf_pmu *pmu, struct perf_sample *sample) char *ev_name = get_counter_name(ce.set, i, pmu); color_fprintf(stdout, color, - "\tCounter:%03d %s Value:%#018lx\n", i, + "\tCounter:%03zd %s Value:%#018"PRIx64"\n", i, ev_name ?: "<unknown>", be64_to_cpu(*p)); free(ev_name); } @@ -260,7 +260,7 @@ static void s390_pai_all_dump(struct evsel *evsel, struct perf_sample *sample) ev_name = get_counter_name(evsel->core.attr.config, pai_data.event_nr, evsel->pmu); - color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018lx\n", + color_fprintf(stdout, color, "\tCounter:%03d %s Value:%#018"PRIx64"\n", pai_data.event_nr, ev_name ?: "<unknown>", pai_data.event_val); free(ev_name); diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index e16257d5ab2c..85b7f188f729 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -27,7 +27,7 @@ #include <errno.h> #include <linux/bitmap.h> #include <linux/time64.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <stdbool.h> /* perl needs the following define, right after including stdbool.h */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index d7183134b669..8bdae066e839 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -31,7 +31,7 @@ #include <linux/compiler.h> #include <linux/time64.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "../build-id.h" @@ -793,7 +793,8 @@ static int set_regs_in_dict(PyObject *dict, static void set_sym_in_dict(PyObject *dict, struct addr_location *al, const char *dso_field, const char *dso_bid_field, const char *dso_map_start, const char *dso_map_end, - const char *sym_field, const char *symoff_field) + const char *sym_field, const char *symoff_field, + const char *map_pgoff) { char sbuild_id[SBUILD_ID_SIZE]; @@ -809,6 +810,8 @@ static void set_sym_in_dict(PyObject *dict, struct addr_location *al, PyLong_FromUnsignedLong(map__start(al->map))); pydict_set_item_string_decref(dict, dso_map_end, PyLong_FromUnsignedLong(map__end(al->map))); + pydict_set_item_string_decref(dict, map_pgoff, + PyLong_FromUnsignedLongLong(map__pgoff(al->map))); } if (al->sym) { pydict_set_item_string_decref(dict, sym_field, @@ -895,7 +898,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, pydict_set_item_string_decref(dict, "comm", _PyUnicode_FromString(thread__comm_str(al->thread))); set_sym_in_dict(dict, al, "dso", "dso_bid", "dso_map_start", "dso_map_end", - "symbol", "symoff"); + "symbol", "symoff", "map_pgoff"); pydict_set_item_string_decref(dict, "callchain", callchain); @@ -920,7 +923,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyBool_FromLong(1)); set_sym_in_dict(dict_sample, addr_al, "addr_dso", "addr_dso_bid", "addr_dso_map_start", "addr_dso_map_end", - "addr_symbol", "addr_symoff"); + "addr_symbol", "addr_symoff", "addr_map_pgoff"); } if (sample->flags) diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index dbaf07bf6c5f..507e6cba9545 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1171,18 +1171,24 @@ static int deliver_sample_value(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, - struct machine *machine) + struct machine *machine, + bool per_thread) { struct perf_sample_id *sid = evlist__id2sid(evlist, v->id); struct evsel *evsel; + u64 *storage = NULL; if (sid) { + storage = perf_sample_id__get_period_storage(sid, sample->tid, per_thread); + } + + if (storage) { sample->id = v->id; - sample->period = v->value - sid->period; - sid->period = v->value; + sample->period = v->value - *storage; + *storage = v->value; } - if (!sid || sid->evsel == NULL) { + if (!storage || sid->evsel == NULL) { ++evlist->stats.nr_unknown_id; return 0; } @@ -1203,17 +1209,19 @@ static int deliver_sample_group(struct evlist *evlist, union perf_event *event, struct perf_sample *sample, struct machine *machine, - u64 read_format) + u64 read_format, + bool per_thread) { int ret = -EINVAL; struct sample_read_value *v = sample->read.group.values; if (tool->dont_split_sample_group) - return deliver_sample_value(evlist, tool, event, sample, v, machine); + return deliver_sample_value(evlist, tool, event, sample, v, machine, + per_thread); sample_read_group__for_each(v, sample->read.group.nr, read_format) { ret = deliver_sample_value(evlist, tool, event, sample, v, - machine); + machine, per_thread); if (ret) break; } @@ -1228,6 +1236,7 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* We know evsel != NULL. */ u64 sample_type = evsel->core.attr.sample_type; u64 read_format = evsel->core.attr.read_format; + bool per_thread = perf_evsel__attr_has_per_thread_sample_period(&evsel->core); /* Standard sample delivery. */ if (!(sample_type & PERF_SAMPLE_READ)) @@ -1236,10 +1245,11 @@ static int evlist__deliver_sample(struct evlist *evlist, const struct perf_tool /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) return deliver_sample_group(evlist, tool, event, sample, - machine, read_format); + machine, read_format, per_thread); else return deliver_sample_value(evlist, tool, event, sample, - &sample->read.one, machine); + &sample->read.one, machine, + per_thread); } static int machines__deliver_event(struct machines *machines, diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 013020f33ece..9dd60c7869a2 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -35,7 +35,7 @@ #include <linux/string.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif regex_t parent_regex; @@ -677,6 +677,102 @@ struct sort_entry sort_sym_ipc_null = { .se_width_idx = HISTC_SYMBOL_IPC, }; +/* --sort callchain_branch_predicted */ + +static int64_t +sort__callchain_branch_predicted_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_predicted_snprintf( + struct hist_entry *he, char *bf, size_t size, unsigned int width) +{ + u64 branch_count, predicted_count; + double percent = 0.0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + &predicted_count, NULL, NULL); + + if (branch_count) + percent = predicted_count * 100.0 / branch_count; + + snprintf(str, sizeof(str), "%.1f%%", percent); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_predicted = { + .se_header = "Predicted", + .se_cmp = sort__callchain_branch_predicted_cmp, + .se_snprintf = hist_entry__callchain_branch_predicted_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_PREDICTED, +}; + +/* --sort callchain_branch_abort */ + +static int64_t +sort__callchain_branch_abort_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_abort_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, abort_count; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, &abort_count, NULL); + + snprintf(str, sizeof(str), "%" PRId64, abort_count); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_abort = { + .se_header = "Abort", + .se_cmp = sort__callchain_branch_abort_cmp, + .se_snprintf = hist_entry__callchain_branch_abort_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_ABORT, +}; + +/* --sort callchain_branch_cycles */ + +static int64_t +sort__callchain_branch_cycles_cmp(struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int hist_entry__callchain_branch_cycles_snprintf(struct hist_entry *he, + char *bf, size_t size, + unsigned int width) +{ + u64 branch_count, cycles_count, cycles = 0; + char str[32]; + + callchain_branch_counts(he->callchain, &branch_count, + NULL, NULL, &cycles_count); + + if (branch_count) + cycles = cycles_count / branch_count; + + snprintf(str, sizeof(str), "%" PRId64 "", cycles); + return repsep_snprintf(bf, size, "%-*.*s", width, width, str); +} + +struct sort_entry sort_callchain_branch_cycles = { + .se_header = "Cycles", + .se_cmp = sort__callchain_branch_cycles_cmp, + .se_snprintf = hist_entry__callchain_branch_cycles_snprintf, + .se_width_idx = HISTC_CALLCHAIN_BRANCH_CYCLES, +}; + /* --sort srcfile */ static char no_srcfile[1]; @@ -2456,6 +2552,15 @@ static struct sort_dimension bstack_sort_dimensions[] = { DIM(SORT_SYM_IPC, "ipc_lbr", sort_sym_ipc), DIM(SORT_ADDR_FROM, "addr_from", sort_addr_from), DIM(SORT_ADDR_TO, "addr_to", sort_addr_to), + DIM(SORT_CALLCHAIN_BRANCH_PREDICTED, + "callchain_branch_predicted", + sort_callchain_branch_predicted), + DIM(SORT_CALLCHAIN_BRANCH_ABORT, + "callchain_branch_abort", + sort_callchain_branch_abort), + DIM(SORT_CALLCHAIN_BRANCH_CYCLES, + "callchain_branch_cycles", + sort_callchain_branch_cycles) }; #undef DIM @@ -3484,7 +3589,13 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, if (!sd->name || strncasecmp(tok, sd->name, strlen(tok))) continue; - if (sort__mode != SORT_MODE__BRANCH) + if ((sort__mode != SORT_MODE__BRANCH) && + strncasecmp(tok, "callchain_branch_predicted", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_abort", + strlen(tok)) && + strncasecmp(tok, "callchain_branch_cycles", + strlen(tok))) return -EINVAL; if (sd->entry == &sort_sym_from || sd->entry == &sort_sym_to) diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 9ff68c6786e7..a8572574e168 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -88,6 +88,9 @@ enum sort_type { SORT_SYM_IPC, SORT_ADDR_FROM, SORT_ADDR_TO, + SORT_CALLCHAIN_BRANCH_PREDICTED, + SORT_CALLCHAIN_BRANCH_ABORT, + SORT_CALLCHAIN_BRANCH_CYCLES, /* memory mode specific sort keys */ __SORT_MEMORY_MODE, diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ea96e4ebad8c..53dcdf07f5a2 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -7,6 +7,7 @@ #include <perf/cpumap.h> #include "color.h" #include "counts.h" +#include "debug.h" #include "evlist.h" #include "evsel.h" #include "stat.h" @@ -21,6 +22,7 @@ #include "iostat.h" #include "pmu.h" #include "pmus.h" +#include "tool_pmu.h" #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" @@ -71,6 +73,32 @@ static const char *aggr_header_std[] = { [AGGR_GLOBAL] = "" }; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh) +{ + const char * const colors[] = { + "", /* unknown */ + PERF_COLOR_RED, /* bad */ + PERF_COLOR_MAGENTA, /* nearly bad */ + PERF_COLOR_YELLOW, /* less good */ + PERF_COLOR_GREEN, /* good */ + }; + static_assert(ARRAY_SIZE(colors) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return colors[thresh]; +} + +static const char *metric_threshold_classify__str(enum metric_threshold_classify thresh) +{ + const char * const strs[] = { + "unknown", + "bad", + "nearly bad", + "less good", + "good", + }; + static_assert(ARRAY_SIZE(strs) - 1 == METRIC_THRESHOLD_GOOD, "missing enum value"); + return strs[thresh]; +} + static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { if (run != ena) @@ -403,13 +431,14 @@ static void do_new_line_std(struct perf_stat_config *config, } static void print_metric_std(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; int n; bool newline = os->newline; + const char *color = metric_threshold_classify__color(thresh); os->newline = false; @@ -441,7 +470,7 @@ static void new_line_csv(struct perf_stat_config *config, void *ctx) static void print_metric_csv(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; @@ -462,15 +491,20 @@ static void print_metric_csv(struct perf_stat_config *config __maybe_unused, static void print_metric_json(struct perf_stat_config *config __maybe_unused, void *ctx, - const char *color __maybe_unused, + enum metric_threshold_classify thresh, const char *fmt __maybe_unused, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - fprintf(out, "\"metric-value\" : \"%f\", ", val); - fprintf(out, "\"metric-unit\" : \"%s\"", unit); + if (unit) { + fprintf(out, "\"metric-value\" : \"%f\", \"metric-unit\" : \"%s\"", val, unit); + if (thresh != METRIC_THRESHOLD_UNKNOWN) { + fprintf(out, ", \"metric-threshold\" : \"%s\"", + metric_threshold_classify__str(thresh)); + } + } if (!config->metric_only) fprintf(out, "}"); } @@ -557,13 +591,14 @@ static const char *fixunit(char *buf, struct evsel *evsel, } static void print_metric_only(struct perf_stat_config *config, - void *ctx, const char *color, const char *fmt, - const char *unit, double val) + void *ctx, enum metric_threshold_classify thresh, + const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; char buf[1024], str[1024]; unsigned mlen = config->metric_only_len; + const char *color = metric_threshold_classify__color(thresh); if (!valid_only_metric(unit)) return; @@ -580,7 +615,8 @@ static void print_metric_only(struct perf_stat_config *config, } static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { @@ -602,25 +638,29 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused } static void print_metric_only_json(struct perf_stat_config *config __maybe_unused, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt, const char *unit, double val) { struct outstate *os = ctx; FILE *out = os->fh; - char buf[64], *vals, *ends; + char buf[64], *ends; char tbuf[1024]; + const char *vals; if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); + if (!unit[0]) + return; snprintf(buf, sizeof(buf), fmt ?: "", val); - ends = vals = skip_spaces(buf); + vals = ends = skip_spaces(buf); while (isdigit(*ends) || *ends == '.') ends++; *ends = 0; - if (!unit[0] || !vals[0]) - return; + if (!vals[0]) + vals = "none"; fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals); os->first = false; } @@ -631,7 +671,8 @@ static void new_line_metric(struct perf_stat_config *config __maybe_unused, } static void print_metric_header(struct perf_stat_config *config, - void *ctx, const char *color __maybe_unused, + void *ctx, + enum metric_threshold_classify thresh __maybe_unused, const char *fmt __maybe_unused, const char *unit, double val __maybe_unused) { @@ -805,7 +846,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, if (run == 0 || ena == 0 || counter->counts->scaled == -1) { if (config->metric_only) { - pm(config, os, NULL, "", "", 0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, "", "", 0); return; } @@ -860,7 +901,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os, perf_stat__print_shadow_stats(config, counter, uval, aggr_idx, &out, &config->metric_events); } else { - pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + pm(config, os, METRIC_THRESHOLD_UNKNOWN, /*format=*/NULL, /*unit=*/"", /*val=*/0); } if (!config->metric_only) { @@ -871,38 +912,66 @@ static void printout(struct perf_stat_config *config, struct outstate *os, static void uniquify_event_name(struct evsel *counter) { - char *new_name; - char *config; - int ret = 0; + const char *name, *pmu_name; + char *new_name, *config; + int ret; + + /* The evsel was already uniquified. */ + if (counter->uniquified_name) + return; + + /* Avoid checking to uniquify twice. */ + counter->uniquified_name = true; + + /* The evsel has a "name=" config term or is from libpfm. */ + if (counter->use_config_name || counter->is_libpfm_event) + return; + + /* Legacy no PMU event, don't uniquify. */ + if (!counter->pmu || + (counter->pmu->type < PERF_TYPE_MAX && counter->pmu->type != PERF_TYPE_RAW)) + return; - if (counter->uniquified_name || counter->use_config_name || - !counter->pmu_name || !strncmp(evsel__name(counter), counter->pmu_name, - strlen(counter->pmu_name))) + /* A sysfs or json event replacing a legacy event, don't uniquify. */ + if (counter->pmu->is_core && counter->alternate_hw_config != PERF_COUNT_HW_MAX) return; - config = strchr(counter->name, '/'); + name = evsel__name(counter); + pmu_name = counter->pmu->name; + /* Already prefixed by the PMU name. */ + if (!strncmp(name, pmu_name, strlen(pmu_name))) + return; + + config = strchr(name, '/'); if (config) { - if (asprintf(&new_name, - "%s%s", counter->pmu_name, config) > 0) { - free(counter->name); - counter->name = new_name; - } - } else { - if (evsel__is_hybrid(counter)) { - ret = asprintf(&new_name, "%s/%s/", - counter->pmu_name, counter->name); + int len = config - name; + + if (config[1] == '/') { + /* case: event// */ + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 2); } else { - ret = asprintf(&new_name, "%s [%s]", - counter->name, counter->pmu_name); + /* case: event/.../ */ + ret = asprintf(&new_name, "%s/%.*s,%s", pmu_name, len, name, config + 1); } + } else { + config = strchr(name, ':'); + if (config) { + /* case: event:.. */ + int len = config - name; - if (ret) { - free(counter->name); - counter->name = new_name; + ret = asprintf(&new_name, "%s/%.*s/%s", pmu_name, len, name, config + 1); + } else { + /* case: event */ + ret = asprintf(&new_name, "%s/%s/", pmu_name, name); } } - - counter->uniquified_name = true; + if (ret > 0) { + free(counter->name); + counter->name = new_name; + } else { + /* ENOMEM from asprintf. */ + counter->uniquified_name = false; + } } static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) @@ -940,15 +1009,29 @@ static bool should_skip_zero_counter(struct perf_stat_config *config, int idx; /* + * Skip unsupported default events when not verbose. (default events + * are all marked 'skippable'). + */ + if (verbose == 0 && counter->skippable && !counter->supported) + return true; + + /* * Skip value 0 when enabling --per-thread globally, * otherwise it will have too many 0 output. */ if (config->aggr_mode == AGGR_THREAD && config->system_wide) return true; - /* Tool events have the software PMU but are only gathered on 1. */ - if (evsel__is_tool(counter)) - return true; + /* + * Many tool events are only gathered on the first index, skip other + * zero values. + */ + if (evsel__is_tool(counter)) { + struct aggr_cpu_id own_id = + config->aggr_get_id(config, (struct perf_cpu){ .cpu = 0 }); + + return !aggr_cpu_id__equal(id, &own_id); + } /* * Skip value 0 when it's an uncore event and the given aggr id @@ -1559,6 +1642,31 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist print_metric_end(config, os); } +static void disable_uniquify(struct evlist *evlist) +{ + struct evsel *counter; + struct perf_pmu *last_pmu = NULL; + bool first = true; + + evlist__for_each_entry(evlist, counter) { + /* If PMUs vary then uniquify can be useful. */ + if (!first && counter->pmu != last_pmu) + return; + first = false; + if (counter->pmu) { + /* Allow uniquify for uncore PMUs. */ + if (!counter->pmu->is_core) + return; + /* Keep hybrid event names uniquified for clarity. */ + if (perf_pmus__num_core_pmus() > 1) + return; + } + } + evlist__for_each_entry_continue(evlist, counter) { + counter->uniquified_name = true; + } +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) @@ -1572,6 +1680,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf .first = true, }; + disable_uniquify(evlist); + if (config->iostat_run) evlist->selected = evlist__first(evlist); diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 99376c12dd8e..47718610d5d8 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -15,6 +15,7 @@ #include <linux/zalloc.h> #include "iostat.h" #include "util/hashmap.h" +#include "tool_pmu.h" struct stats walltime_nsecs_stats; struct rusage_stats ru_stats; @@ -76,7 +77,7 @@ void perf_stat__reset_shadow_stats(void) memset(&ru_stats, 0, sizeof(ru_stats)); } -static enum stat_type evsel__stat_type(const struct evsel *evsel) +static enum stat_type evsel__stat_type(struct evsel *evsel) { /* Fake perf_hw_cache_op_id values for use with evsel__match. */ u64 PERF_COUNT_hw_cache_l1d_miss = PERF_COUNT_HW_CACHE_L1D | @@ -136,23 +137,19 @@ static enum stat_type evsel__stat_type(const struct evsel *evsel) return STAT_NONE; } -static const char *get_ratio_color(const double ratios[3], double val) +static enum metric_threshold_classify get_ratio_thresh(const double ratios[3], double val) { - const char *color = PERF_COLOR_NORMAL; + assert(ratios[0] > ratios[1]); + assert(ratios[1] > ratios[2]); - if (val > ratios[0]) - color = PERF_COLOR_RED; - else if (val > ratios[1]) - color = PERF_COLOR_MAGENTA; - else if (val > ratios[2]) - color = PERF_COLOR_YELLOW; - - return color; + return val > ratios[1] + ? (val > ratios[0] ? METRIC_THRESHOLD_BAD : METRIC_THRESHOLD_NEARLY_BAD) + : (val > ratios[2] ? METRIC_THRESHOLD_LESS_GOOD : METRIC_THRESHOLD_GOOD); } static double find_stat(const struct evsel *evsel, int aggr_idx, enum stat_type type) { - const struct evsel *cur; + struct evsel *cur; int evsel_ctx = evsel_context(evsel); evlist__for_each_entry(evsel->evlist, cur) { @@ -195,17 +192,21 @@ static void print_ratio(struct perf_stat_config *config, const struct evsel *evsel, int aggr_idx, double numerator, struct perf_stat_output_ctx *out, enum stat_type denominator_type, - const double color_ratios[3], const char *unit) + const double thresh_ratios[3], const char *_unit) { double denominator = find_stat(evsel, aggr_idx, denominator_type); + double ratio = 0; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; + const char *fmt = NULL; + const char *unit = NULL; if (numerator && denominator) { - double ratio = numerator / denominator * 100.0; - const char *color = get_ratio_color(color_ratios, ratio); - - out->print_metric(config, out->ctx, color, "%7.2f%%", unit, ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, unit, 0); + ratio = numerator / denominator * 100.0; + thresh = get_ratio_thresh(thresh_ratios, ratio); + fmt = "%7.2f%%"; + unit = _unit; + } + out->print_metric(config, out->ctx, thresh, fmt, unit, ratio); } static void print_stalled_cycles_front(struct perf_stat_config *config, @@ -213,9 +214,9 @@ static void print_stalled_cycles_front(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {50.0, 30.0, 10.0}; + const double thresh_ratios[3] = {50.0, 30.0, 10.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "frontend cycles idle"); } @@ -224,9 +225,9 @@ static void print_stalled_cycles_back(struct perf_stat_config *config, int aggr_idx, double stalled, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {75.0, 50.0, 20.0}; + const double thresh_ratios[3] = {75.0, 50.0, 20.0}; - print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, color_ratios, + print_ratio(config, evsel, aggr_idx, stalled, out, STAT_CYCLES, thresh_ratios, "backend cycles idle"); } @@ -235,9 +236,9 @@ static void print_branch_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_BRANCHES, thresh_ratios, "of all branches"); } @@ -246,9 +247,9 @@ static void print_l1d_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_DCACHE, thresh_ratios, "of all L1-dcache accesses"); } @@ -257,9 +258,9 @@ static void print_l1i_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_L1_ICACHE, thresh_ratios, "of all L1-icache accesses"); } @@ -268,9 +269,9 @@ static void print_ll_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_LL_CACHE, thresh_ratios, "of all LL-cache accesses"); } @@ -279,9 +280,9 @@ static void print_dtlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_DTLB_CACHE, thresh_ratios, "of all dTLB cache accesses"); } @@ -290,9 +291,9 @@ static void print_itlb_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_ITLB_CACHE, thresh_ratios, "of all iTLB cache accesses"); } @@ -301,9 +302,9 @@ static void print_cache_miss(struct perf_stat_config *config, int aggr_idx, double misses, struct perf_stat_output_ctx *out) { - static const double color_ratios[3] = {20.0, 10.0, 5.0}; + const double thresh_ratios[3] = {20.0, 10.0, 5.0}; - print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, color_ratios, + print_ratio(config, evsel, aggr_idx, misses, out, STAT_CACHE_REFS, thresh_ratios, "of all cache refs"); } @@ -319,15 +320,16 @@ static void print_instructions(struct perf_stat_config *config, find_stat(evsel, aggr_idx, STAT_STALLED_CYCLES_BACK)); if (cycles) { - print_metric(config, ctxp, NULL, "%7.2f ", "insn per cycle", - instructions / cycles); - } else - print_metric(config, ctxp, NULL, NULL, "insn per cycle", 0); - + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "insn per cycle", instructions / cycles); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "insn per cycle", 0); + } if (max_stalled && instructions) { out->new_line(config, ctxp); - print_metric(config, ctxp, NULL, "%7.2f ", "stalled cycles per insn", - max_stalled / instructions); + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%7.2f ", + "stalled cycles per insn", max_stalled / instructions); } } @@ -341,9 +343,12 @@ static void print_cycles(struct perf_stat_config *config, if (cycles && nsecs) { double ratio = cycles / nsecs; - out->print_metric(config, out->ctx, NULL, "%8.3f", "GHz", ratio); - } else - out->print_metric(config, out->ctx, NULL, NULL, "GHz", 0); + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + "GHz", ratio); + } else { + out->print_metric(config, out->ctx, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "GHz", 0); + } } static void print_nsecs(struct perf_stat_config *config, @@ -356,10 +361,12 @@ static void print_nsecs(struct perf_stat_config *config, double wall_time = avg_stats(&walltime_nsecs_stats); if (wall_time) { - print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized", + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", "CPUs utilized", nsecs / (wall_time * evsel->scale)); - } else - print_metric(config, ctxp, NULL, NULL, "CPUs utilized", 0); + } else { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, /*fmt=*/NULL, + "CPUs utilized", 0); + } } static int prepare_metric(const struct metric_expr *mexp, @@ -381,26 +388,35 @@ static int prepare_metric(const struct metric_expr *mexp, double scale; switch (evsel__tool_event(metric_events[i])) { - case PERF_TOOL_DURATION_TIME: + case TOOL_PMU__EVENT_DURATION_TIME: stats = &walltime_nsecs_stats; scale = 1e-9; break; - case PERF_TOOL_USER_TIME: + case TOOL_PMU__EVENT_USER_TIME: stats = &ru_stats.ru_utime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_SYSTEM_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: stats = &ru_stats.ru_stime_usec_stat; scale = 1e-6; break; - case PERF_TOOL_NONE: + case TOOL_PMU__EVENT_NONE: pr_err("Invalid tool event 'none'"); abort(); - case PERF_TOOL_MAX: + case TOOL_PMU__EVENT_MAX: pr_err("Invalid tool event 'max'"); abort(); + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: default: - pr_err("Unknown tool event '%s'", evsel__name(metric_events[i])); + pr_err("Unexpected tool event '%s'", evsel__name(metric_events[i])); abort(); } val = avg_stats(stats) * scale; @@ -483,7 +499,7 @@ static void generic_metric(struct perf_stat_config *config, double ratio, scale, threshold; int i; void *ctxp = out->ctx; - const char *color = NULL; + enum metric_threshold_classify thresh = METRIC_THRESHOLD_UNKNOWN; pctx = expr__ctx_new(); if (!pctx) @@ -501,13 +517,13 @@ static void generic_metric(struct perf_stat_config *config, if (!metric_events[i]) { if (expr__parse(&ratio, pctx, metric_expr) == 0) { char *unit; - char metric_bf[64]; + char metric_bf[128]; if (metric_threshold && expr__parse(&threshold, pctx, metric_threshold) == 0 && !isnan(threshold)) { - color = fpclassify(threshold) == FP_ZERO - ? PERF_COLOR_GREEN : PERF_COLOR_RED; + thresh = fpclassify(threshold) == FP_ZERO + ? METRIC_THRESHOLD_GOOD : METRIC_THRESHOLD_BAD; } if (metric_unit && metric_name) { @@ -522,22 +538,22 @@ static void generic_metric(struct perf_stat_config *config, scnprintf(metric_bf, sizeof(metric_bf), "%s %s", unit, metric_name); - print_metric(config, ctxp, color, "%8.1f", + print_metric(config, ctxp, thresh, "%8.1f", metric_bf, ratio); } else { - print_metric(config, ctxp, color, "%8.2f", + print_metric(config, ctxp, thresh, "%8.2f", metric_name ? metric_name : out->force_header ? evsel->name : "", ratio); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } } else { - print_metric(config, ctxp, color, /*unit=*/NULL, + print_metric(config, ctxp, thresh, /*fmt=*/NULL, out->force_header ? (metric_name ?: evsel->name) : "", 0); } @@ -573,7 +589,7 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, { bool need_full_name = perf_pmus__num_core_pmus() > 1; static const char *last_name; - static const char *last_pmu; + static const struct perf_pmu *last_pmu; char full_name[64]; /* @@ -584,21 +600,21 @@ static void perf_stat__print_metricgroup_header(struct perf_stat_config *config, * different metric events. */ if (last_name && !strcmp(last_name, name)) { - if (!need_full_name || !strcmp(last_pmu, evsel->pmu_name)) { + if (!need_full_name || last_pmu != evsel->pmu) { out->print_metricgroup_header(config, ctxp, NULL); return; } } - if (need_full_name) - scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu_name); + if (need_full_name && evsel->pmu) + scnprintf(full_name, sizeof(full_name), "%s (%s)", name, evsel->pmu->name); else scnprintf(full_name, sizeof(full_name), "%s", name); out->print_metricgroup_header(config, ctxp, full_name); last_name = name; - last_pmu = evsel->pmu_name; + last_pmu = evsel->pmu; } /** @@ -708,17 +724,21 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, if (unit != ' ') snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); - print_metric(config, ctxp, NULL, "%8.3f", unit_buf, ratio); - } else + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, "%8.3f", + unit_buf, ratio); + } else { num = 0; + } } } perf_stat__print_shadow_stats_metricgroup(config, evsel, aggr_idx, &num, NULL, out, metric_events); - if (num == 0) - print_metric(config, ctxp, NULL, NULL, NULL, 0); + if (num == 0) { + print_metric(config, ctxp, METRIC_THRESHOLD_UNKNOWN, + /*fmt=*/NULL, /*unit=*/NULL, 0); + } } /** diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 0bd5467389e4..7c2ccdcc3fdb 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -553,7 +553,7 @@ static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) return false; - return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); + return evsel_a->pmu != evsel_b->pmu; } static void evsel__merge_aliases(struct evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index fd7a187551bd..6f8cff3cd39a 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -154,9 +154,21 @@ struct evlist; extern struct stats walltime_nsecs_stats; extern struct rusage_stats ru_stats; +enum metric_threshold_classify { + METRIC_THRESHOLD_UNKNOWN, + METRIC_THRESHOLD_BAD, + METRIC_THRESHOLD_NEARLY_BAD, + METRIC_THRESHOLD_LESS_GOOD, + METRIC_THRESHOLD_GOOD, +}; +const char *metric_threshold_classify__color(enum metric_threshold_classify thresh); + typedef void (*print_metric_t)(struct perf_stat_config *config, - void *ctx, const char *color, const char *unit, - const char *fmt, double val); + void *ctx, + enum metric_threshold_classify thresh, + const char *fmt, + const char *unit, + double val); typedef void (*new_line_t)(struct perf_stat_config *config, void *ctx); /* Used to print the display name of the Default metricgroup for now. */ diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index 116a642ad99d..308fc7ec88cc 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -263,6 +263,34 @@ char *strpbrk_esc(char *str, const char *stopset) return ptr; } +/* Like strpbrk_esc(), but not break if it is quoted with single/double quotes */ +char *strpbrk_esq(char *str, const char *stopset) +{ + char *_stopset = NULL; + char *ptr; + const char *squote = "'"; + const char *dquote = "\""; + + if (asprintf(&_stopset, "%s%c%c", stopset, *squote, *dquote) < 0) + return NULL; + + do { + ptr = strpbrk_esc(str, _stopset); + if (!ptr) + break; + if (*ptr == *squote) + ptr = strpbrk_esc(ptr + 1, squote); + else if (*ptr == *dquote) + ptr = strpbrk_esc(ptr + 1, dquote); + else + break; + str = ptr + 1; + } while (ptr); + + free(_stopset); + return ptr; +} + /* Like strdup, but do not copy a single backslash */ char *strdup_esc(const char *str) { @@ -293,6 +321,78 @@ char *strdup_esc(const char *str) return ret; } +/* Remove backslash right before quote and return next quote address. */ +static char *remove_consumed_esc(char *str, int len, int quote) +{ + char *ptr = str, *end = str + len; + + while (*ptr != quote && ptr < end) { + if (*ptr == '\\' && *(ptr + 1) == quote) { + memmove(ptr, ptr + 1, end - (ptr + 1)); + /* now *ptr is `quote`. */ + end--; + } + ptr++; + } + + return *ptr == quote ? ptr : NULL; +} + +/* + * Like strdup_esc, but keep quoted string as it is (and single backslash + * before quote is removed). If there is no closed quote, return NULL. + */ +char *strdup_esq(const char *str) +{ + char *d, *ret; + + /* If there is no quote, return normal strdup_esc() */ + d = strpbrk_esc((char *)str, "\"'"); + if (!d) + return strdup_esc(str); + + ret = strdup(str); + if (!ret) + return NULL; + + d = ret; + do { + d = strpbrk(d, "\\\"\'"); + if (!d) + break; + + if (*d == '"' || *d == '\'') { + /* This is non-escaped quote */ + int quote = *d; + int len = strlen(d + 1) + 1; + + /* + * Remove the start quote and remove consumed escape (backslash + * before quote) and remove the end quote. If there is no end + * quote, it is the input error. + */ + memmove(d, d + 1, len); + d = remove_consumed_esc(d, len, quote); + if (!d) + goto error; + memmove(d, d + 1, strlen(d + 1) + 1); + } + if (*d == '\\') { + memmove(d, d + 1, strlen(d + 1) + 1); + if (*d == '\\') { + /* double backslash -- keep the second one. */ + d++; + } + } + } while (*d != '\0'); + + return ret; + +error: + free(ret); + return NULL; +} + unsigned int hex(char c) { if (c >= '0' && c <= '9') diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index 52cb8ba057c7..4c8bff47cfd3 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -37,6 +37,8 @@ char *asprintf__tp_filter_pids(size_t npids, pid_t *pids); char *strpbrk_esc(char *str, const char *stopset); char *strdup_esc(const char *str); +char *strpbrk_esq(char *str, const char *stopset); +char *strdup_esq(const char *str); unsigned int hex(char c); char *strreplace_chars(char needle, const char *haystack, const char *replace); diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a18927d792af..0037f1163919 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -257,7 +257,7 @@ void symbols__fixup_end(struct rb_root_cached *symbols, bool is_kallsyms) * like in: * ffffffffc1937000 T hdmi_driver_init [snd_hda_codec_hdmi] */ - if (prev->end == prev->start) { + if (prev->end == prev->start && prev->type != STT_NOTYPE) { const char *prev_mod; const char *curr_mod; @@ -1931,6 +1931,9 @@ int dso__load(struct dso *dso, struct map *map) if (next_slot) { ss_pos++; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) + dso__set_binary_type(dso, symtab_type); + if (syms_ss && runtime_ss) break; } else { diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 7c15dec6900d..69d8dcf5cf28 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -46,6 +46,15 @@ static const char *const *syscalltbl_native = syscalltbl_mips_n64; #include <asm/syscalls.c> const int syscalltbl_native_max_id = SYSCALLTBL_LOONGARCH_MAX_ID; static const char *const *syscalltbl_native = syscalltbl_loongarch; +#elif defined(__riscv) +#include <asm/syscalls.c> +const int syscalltbl_native_max_id = SYSCALLTBL_RISCV_MAX_ID; +static const char *const *syscalltbl_native = syscalltbl_riscv; +#else +const int syscalltbl_native_max_id = 0; +static const char *const syscalltbl_native[] = { + [0] = "unknown", +}; #endif struct syscall { @@ -182,6 +191,11 @@ int syscalltbl__id(struct syscalltbl *tbl, const char *name) return audit_name_to_syscall(name, tbl->audit_machine); } +int syscalltbl__id_at_idx(struct syscalltbl *tbl __maybe_unused, int idx) +{ + return idx; +} + int syscalltbl__strglobmatch_next(struct syscalltbl *tbl __maybe_unused, const char *syscall_glob __maybe_unused, int *idx __maybe_unused) { diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index d582cae8e105..2ee2cc30340f 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -17,6 +17,7 @@ struct target { bool default_per_cpu; bool per_thread; bool use_bpf; + bool inherit; int initial_delay; const char *attr_map; }; diff --git a/tools/perf/util/threads.c b/tools/perf/util/threads.c index ff2b169e0085..6ca0b178fb6c 100644 --- a/tools/perf/util/threads.c +++ b/tools/perf/util/threads.c @@ -141,7 +141,7 @@ void threads__remove_all_threads(struct threads *threads) down_write(&table->lock); __threads_table_entry__set_last_match(table, NULL); - hashmap__for_each_entry_safe((&table->shard), cur, tmp, bkt) { + hashmap__for_each_entry_safe(&table->shard, cur, tmp, bkt) { struct thread *old_value; hashmap__delete(&table->shard, cur->key, /*old_key=*/NULL, &old_value); @@ -175,7 +175,7 @@ int threads__for_each_thread(struct threads *threads, size_t bkt; down_read(&table->lock); - hashmap__for_each_entry((&table->shard), cur, bkt) { + hashmap__for_each_entry(&table->shard, cur, bkt) { int rc = fn((struct thread *)cur->pvalue, data); if (rc != 0) { diff --git a/tools/perf/util/tool_pmu.c b/tools/perf/util/tool_pmu.c new file mode 100644 index 000000000000..4fb097578479 --- /dev/null +++ b/tools/perf/util/tool_pmu.c @@ -0,0 +1,505 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "cgroup.h" +#include "counts.h" +#include "cputopo.h" +#include "evsel.h" +#include "pmu.h" +#include "print-events.h" +#include "smt.h" +#include "time-utils.h" +#include "tool_pmu.h" +#include "tsc.h" +#include <api/fs/fs.h> +#include <api/io.h> +#include <internal/threadmap.h> +#include <perf/threadmap.h> +#include <fcntl.h> +#include <strings.h> + +static const char *const tool_pmu__event_names[TOOL_PMU__EVENT_MAX] = { + NULL, + "duration_time", + "user_time", + "system_time", + "has_pmem", + "num_cores", + "num_cpus", + "num_cpus_online", + "num_dies", + "num_packages", + "slots", + "smt_on", + "system_tsc_freq", +}; + +bool tool_pmu__skip_event(const char *name __maybe_unused) +{ +#if !defined(__aarch64__) + /* The slots event should only appear on arm64. */ + if (strcasecmp(name, "slots") == 0) + return true; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + /* The system_tsc_freq event should only appear on x86. */ + if (strcasecmp(name, "system_tsc_freq") == 0) + return true; +#endif + return false; +} + +int tool_pmu__num_skip_events(void) +{ + int num = 0; + +#if !defined(__aarch64__) + num++; +#endif +#if !defined(__i386__) && !defined(__x86_64__) + num++; +#endif + return num; +} + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev) +{ + if (ev > TOOL_PMU__EVENT_NONE && ev < TOOL_PMU__EVENT_MAX) + return tool_pmu__event_names[ev]; + + return NULL; +} + +enum tool_pmu_event tool_pmu__str_to_event(const char *str) +{ + int i; + + if (tool_pmu__skip_event(str)) + return TOOL_PMU__EVENT_NONE; + + tool_pmu__for_each_event(i) { + if (!strcasecmp(str, tool_pmu__event_names[i])) + return i; + } + return TOOL_PMU__EVENT_NONE; +} + +bool perf_pmu__is_tool(const struct perf_pmu *pmu) +{ + return pmu && pmu->type == PERF_PMU_TYPE_TOOL; +} + +bool evsel__is_tool(const struct evsel *evsel) +{ + return perf_pmu__is_tool(evsel->pmu); +} + +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel) +{ + if (!evsel__is_tool(evsel)) + return TOOL_PMU__EVENT_NONE; + + return (enum tool_pmu_event)evsel->core.attr.config; +} + +const char *evsel__tool_pmu_event_name(const struct evsel *evsel) +{ + return tool_pmu__event_to_str(evsel->core.attr.config); +} + +static bool read_until_char(struct io *io, char e) +{ + int c; + + do { + c = io__get_char(io); + if (c == -1) + return false; + } while (c != e); + return true; +} + +static int read_stat_field(int fd, struct perf_cpu cpu, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int i; + + io__init(&io, fd, buf, sizeof(buf)); + + /* Skip lines to relevant CPU. */ + for (i = -1; i < cpu.cpu; i++) { + if (!read_until_char(&io, '\n')) + return -EINVAL; + } + /* Skip to "cpu". */ + if (io__get_char(&io) != 'c') return -EINVAL; + if (io__get_char(&io) != 'p') return -EINVAL; + if (io__get_char(&io) != 'u') return -EINVAL; + + /* Skip N of cpuN. */ + if (!read_until_char(&io, ' ')) + return -EINVAL; + + i = 1; + while (true) { + if (io__get_dec(&io, val) != ' ') + break; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +static int read_pid_stat_field(int fd, int field, __u64 *val) +{ + char buf[256]; + struct io io; + int c, i; + + io__init(&io, fd, buf, sizeof(buf)); + if (io__get_dec(&io, val) != ' ') + return -EINVAL; + if (field == 1) + return 0; + + /* Skip comm. */ + if (io__get_char(&io) != '(' || !read_until_char(&io, ')')) + return -EINVAL; + if (field == 2) + return -EINVAL; /* String can't be returned. */ + + /* Skip state */ + if (io__get_char(&io) != ' ' || io__get_char(&io) == -1) + return -EINVAL; + if (field == 3) + return -EINVAL; /* String can't be returned. */ + + /* Loop over numeric fields*/ + if (io__get_char(&io) != ' ') + return -EINVAL; + + i = 4; + while (true) { + c = io__get_dec(&io, val); + if (c == -1) + return -EINVAL; + if (c == -2) { + /* Assume a -ve was read */ + c = io__get_dec(&io, val); + *val *= -1; + } + if (c != ' ') + return -EINVAL; + if (field == i) + return 0; + i++; + } + return -EINVAL; +} + +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads) +{ + if ((evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME || + evsel__tool_event(evsel) == TOOL_PMU__EVENT_USER_TIME) && + !evsel->start_times) { + evsel->start_times = xyarray__new(perf_cpu_map__nr(cpus), + nthreads, + sizeof(__u64)); + if (!evsel->start_times) + return -ENOMEM; + } + return 0; +} + +#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y)) + +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx) +{ + enum tool_pmu_event ev = evsel__tool_event(evsel); + int pid = -1, idx = 0, thread = 0, nthreads, err = 0, old_errno; + + if (ev == TOOL_PMU__EVENT_NUM_CPUS) + return 0; + + if (ev == TOOL_PMU__EVENT_DURATION_TIME) { + if (evsel->core.attr.sample_period) /* no sampling */ + return -EINVAL; + evsel->start_time = rdclock(); + return 0; + } + + if (evsel->cgrp) + pid = evsel->cgrp->fd; + + nthreads = perf_thread_map__nr(threads); + for (idx = start_cpu_map_idx; idx < end_cpu_map_idx; idx++) { + for (thread = 0; thread < nthreads; thread++) { + if (thread >= nthreads) + break; + + if (!evsel->cgrp && !evsel->core.system_wide) + pid = perf_thread_map__pid(threads, thread); + + if (ev == TOOL_PMU__EVENT_USER_TIME || ev == TOOL_PMU__EVENT_SYSTEM_TIME) { + bool system = ev == TOOL_PMU__EVENT_SYSTEM_TIME; + __u64 *start_time = NULL; + int fd; + + if (evsel->core.attr.sample_period) { + /* no sampling */ + err = -EINVAL; + goto out_close; + } + if (pid > -1) { + char buf[64]; + + snprintf(buf, sizeof(buf), "/proc/%d/stat", pid); + fd = open(buf, O_RDONLY); + evsel->pid_stat = true; + } else { + fd = open("/proc/stat", O_RDONLY); + } + FD(evsel, idx, thread) = fd; + if (fd < 0) { + err = -errno; + goto out_close; + } + start_time = xyarray__entry(evsel->start_times, idx, thread); + if (pid > -1) { + err = read_pid_stat_field(fd, system ? 15 : 14, + start_time); + } else { + struct perf_cpu cpu; + + cpu = perf_cpu_map__cpu(evsel->core.cpus, idx); + err = read_stat_field(fd, cpu, system ? 3 : 1, + start_time); + } + if (err) + goto out_close; + } + + } + } + return 0; +out_close: + if (err) + threads->err_thread = thread; + + old_errno = errno; + do { + while (--thread >= 0) { + if (FD(evsel, idx, thread) >= 0) + close(FD(evsel, idx, thread)); + FD(evsel, idx, thread) = -1; + } + thread = nthreads; + } while (--idx >= 0); + errno = old_errno; + return err; +} + +#if !defined(__i386__) && !defined(__x86_64__) +u64 arch_get_tsc_freq(void) +{ + return 0; +} +#endif + +#if !defined(__aarch64__) +u64 tool_pmu__cpu_slots_per_cycle(void) +{ + return 0; +} +#endif + +static bool has_pmem(void) +{ + static bool has_pmem, cached; + const char *sysfs = sysfs__mountpoint(); + char path[PATH_MAX]; + + if (!cached) { + snprintf(path, sizeof(path), "%s/firmware/acpi/tables/NFIT", sysfs); + has_pmem = access(path, F_OK) == 0; + cached = true; + } + return has_pmem; +} + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result) +{ + const struct cpu_topology *topology; + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + *result = has_pmem() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_NUM_CORES: + topology = online_topology(); + *result = topology->core_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS: + *result = cpu__max_present_cpu().cpu; + return true; + + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: { + struct perf_cpu_map *online = cpu_map__online(); + + if (online) { + *result = perf_cpu_map__nr(online); + return true; + } + return false; + } + case TOOL_PMU__EVENT_NUM_DIES: + topology = online_topology(); + *result = topology->die_cpus_lists; + return true; + + case TOOL_PMU__EVENT_NUM_PACKAGES: + topology = online_topology(); + *result = topology->package_cpus_lists; + return true; + + case TOOL_PMU__EVENT_SLOTS: + *result = tool_pmu__cpu_slots_per_cycle(); + return *result ? true : false; + + case TOOL_PMU__EVENT_SMT_ON: + *result = smt_on() ? 1 : 0; + return true; + + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + *result = arch_get_tsc_freq(); + return true; + + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_DURATION_TIME: + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: + case TOOL_PMU__EVENT_MAX: + default: + return false; + } +} + +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread) +{ + __u64 *start_time, cur_time, delta_start; + u64 val; + int fd, err = 0; + struct perf_counts_values *count, *old_count = NULL; + bool adjust = false; + enum tool_pmu_event ev = evsel__tool_event(evsel); + + count = perf_counts(evsel->counts, cpu_map_idx, thread); + + switch (ev) { + case TOOL_PMU__EVENT_HAS_PMEM: + case TOOL_PMU__EVENT_NUM_CORES: + case TOOL_PMU__EVENT_NUM_CPUS: + case TOOL_PMU__EVENT_NUM_CPUS_ONLINE: + case TOOL_PMU__EVENT_NUM_DIES: + case TOOL_PMU__EVENT_NUM_PACKAGES: + case TOOL_PMU__EVENT_SLOTS: + case TOOL_PMU__EVENT_SMT_ON: + case TOOL_PMU__EVENT_SYSTEM_TSC_FREQ: + if (evsel->prev_raw_counts) + old_count = perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + val = 0; + if (cpu_map_idx == 0 && thread == 0) { + if (!tool_pmu__read_event(ev, &val)) { + count->lost++; + val = 0; + } + } + if (old_count) { + count->val = old_count->val + val; + count->run = old_count->run + 1; + count->ena = old_count->ena + 1; + } else { + count->val = val; + count->run++; + count->ena++; + } + return 0; + case TOOL_PMU__EVENT_DURATION_TIME: + /* + * Pretend duration_time is only on the first CPU and thread, or + * else aggregation will scale duration_time by the number of + * CPUs/threads. + */ + start_time = &evsel->start_time; + if (cpu_map_idx == 0 && thread == 0) + cur_time = rdclock(); + else + cur_time = *start_time; + break; + case TOOL_PMU__EVENT_USER_TIME: + case TOOL_PMU__EVENT_SYSTEM_TIME: { + bool system = evsel__tool_event(evsel) == TOOL_PMU__EVENT_SYSTEM_TIME; + + start_time = xyarray__entry(evsel->start_times, cpu_map_idx, thread); + fd = FD(evsel, cpu_map_idx, thread); + lseek(fd, SEEK_SET, 0); + if (evsel->pid_stat) { + /* The event exists solely on 1 CPU. */ + if (cpu_map_idx == 0) + err = read_pid_stat_field(fd, system ? 15 : 14, &cur_time); + else + cur_time = 0; + } else { + /* The event is for all threads. */ + if (thread == 0) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, + cpu_map_idx); + + err = read_stat_field(fd, cpu, system ? 3 : 1, &cur_time); + } else { + cur_time = 0; + } + } + adjust = true; + break; + } + case TOOL_PMU__EVENT_NONE: + case TOOL_PMU__EVENT_MAX: + default: + err = -EINVAL; + } + if (err) + return err; + + delta_start = cur_time - *start_time; + if (adjust) { + __u64 ticks_per_sec = sysconf(_SC_CLK_TCK); + + delta_start *= 1000000000 / ticks_per_sec; + } + count->val = delta_start; + count->ena = count->run = delta_start; + count->lost = 0; + return 0; +} + +struct perf_pmu *perf_pmus__tool_pmu(void) +{ + static struct perf_pmu tool = { + .name = "tool", + .type = PERF_PMU_TYPE_TOOL, + .aliases = LIST_HEAD_INIT(tool.aliases), + .caps = LIST_HEAD_INIT(tool.caps), + .format = LIST_HEAD_INIT(tool.format), + }; + if (!tool.events_table) + tool.events_table = find_core_events_table("common", "common"); + + return &tool; +} diff --git a/tools/perf/util/tool_pmu.h b/tools/perf/util/tool_pmu.h new file mode 100644 index 000000000000..a60184859080 --- /dev/null +++ b/tools/perf/util/tool_pmu.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TOOL_PMU_H +#define __TOOL_PMU_H + +#include "pmu.h" + +struct evsel; +struct perf_thread_map; +struct print_callbacks; + +enum tool_pmu_event { + TOOL_PMU__EVENT_NONE = 0, + TOOL_PMU__EVENT_DURATION_TIME, + TOOL_PMU__EVENT_USER_TIME, + TOOL_PMU__EVENT_SYSTEM_TIME, + TOOL_PMU__EVENT_HAS_PMEM, + TOOL_PMU__EVENT_NUM_CORES, + TOOL_PMU__EVENT_NUM_CPUS, + TOOL_PMU__EVENT_NUM_CPUS_ONLINE, + TOOL_PMU__EVENT_NUM_DIES, + TOOL_PMU__EVENT_NUM_PACKAGES, + TOOL_PMU__EVENT_SLOTS, + TOOL_PMU__EVENT_SMT_ON, + TOOL_PMU__EVENT_SYSTEM_TSC_FREQ, + + TOOL_PMU__EVENT_MAX, +}; + +#define tool_pmu__for_each_event(ev) \ + for ((ev) = TOOL_PMU__EVENT_DURATION_TIME; (ev) < TOOL_PMU__EVENT_MAX; ev++) + +const char *tool_pmu__event_to_str(enum tool_pmu_event ev); +enum tool_pmu_event tool_pmu__str_to_event(const char *str); +bool tool_pmu__skip_event(const char *name); +int tool_pmu__num_skip_events(void); + +bool tool_pmu__read_event(enum tool_pmu_event ev, u64 *result); + +u64 tool_pmu__cpu_slots_per_cycle(void); + +bool perf_pmu__is_tool(const struct perf_pmu *pmu); + +bool evsel__is_tool(const struct evsel *evsel); +enum tool_pmu_event evsel__tool_event(const struct evsel *evsel); +const char *evsel__tool_pmu_event_name(const struct evsel *evsel); +int evsel__tool_pmu_prepare_open(struct evsel *evsel, + struct perf_cpu_map *cpus, + int nthreads); +int evsel__tool_pmu_open(struct evsel *evsel, + struct perf_thread_map *threads, + int start_cpu_map_idx, int end_cpu_map_idx); +int evsel__tool_pmu_read(struct evsel *evsel, int cpu_map_idx, int thread); + +struct perf_pmu *perf_pmus__tool_pmu(void); + +#endif /* __TOOL_PMU_H */ diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index f0332bd3a501..41d53e1b43e7 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -12,7 +12,7 @@ #include <linux/ctype.h> #include <linux/kernel.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> static int get_common_field(struct scripting_context *context, int *offset, int *size, const char *type) @@ -116,12 +116,6 @@ void event_format__fprintf(struct tep_event *event, trace_seq_destroy(&s); } -void event_format__print(struct tep_event *event, - int cpu, void *data, int size) -{ - return event_format__fprintf(event, cpu, data, size, stdout); -} - /* * prev_state is of size long, which is 32 bits on 32 bit architectures. * As it needs to have the same bits for both 32 bit and 64 bit architectures diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c index 1162c49b8082..ecbbb93f0185 100644 --- a/tools/perf/util/trace-event-read.c +++ b/tools/perf/util/trace-event-read.c @@ -11,7 +11,7 @@ #include <sys/stat.h> #include <sys/wait.h> #include <sys/mman.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <fcntl.h> #include <unistd.h> #include <errno.h> diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index bd0000300c77..5596fcda2c10 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -10,7 +10,7 @@ #include <string.h> #include <errno.h> #ifdef HAVE_LIBTRACEEVENT -#include <traceevent/event-parse.h> +#include <event-parse.h> #endif #include "debug.h" diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c index 8ad75b31e09b..6a8c66c64b70 100644 --- a/tools/perf/util/trace-event.c +++ b/tools/perf/util/trace-event.c @@ -8,7 +8,7 @@ #include <fcntl.h> #include <linux/kernel.h> #include <linux/err.h> -#include <traceevent/event-parse.h> +#include <event-parse.h> #include <api/fs/tracing_path.h> #include <api/fs/fs.h> #include "trace-event.h" diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index bbf8b26bc8da..79b939f947dd 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -42,9 +42,6 @@ struct tep_event *trace_event__tp_format_id(int id); void event_format__fprintf(struct tep_event *event, int cpu, void *data, int size, FILE *fp); -void event_format__print(struct tep_event *event, - int cpu, void *data, int size); - int parse_ftrace_file(struct tep_handle *pevent, char *buf, unsigned long size); int parse_event_file(struct tep_handle *pevent, char *buf, unsigned long size, char *sys); @@ -150,7 +147,7 @@ int common_lock_depth(struct scripting_context *context); int perf_sample__sprintf_flags(u32 flags, char *str, size_t sz); #if defined(LIBTRACEEVENT_VERSION) && LIBTRACEEVENT_VERSION >= MAKE_LIBTRACEEVENT_VERSION(1, 5, 0) -#include <traceevent/event-parse.h> +#include <event-parse.h> static inline bool tep_field_is_relative(unsigned long flags) { diff --git a/tools/perf/util/tsc.c b/tools/perf/util/tsc.c index 2e33a20e1e1b..511a517ce613 100644 --- a/tools/perf/util/tsc.c +++ b/tools/perf/util/tsc.c @@ -119,7 +119,7 @@ size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp) size_t ret; ret = fprintf(fp, "\n... Time Shift %" PRI_lu64 "\n", tc->time_shift); - ret += fprintf(fp, "... Time Muliplier %" PRI_lu64 "\n", tc->time_mult); + ret += fprintf(fp, "... Time Multiplier %" PRI_lu64 "\n", tc->time_mult); ret += fprintf(fp, "... Time Zero %" PRI_lu64 "\n", tc->time_zero); /* diff --git a/tools/perf/util/tsc.h b/tools/perf/util/tsc.h index 88fd1c4c1cb8..57ce8449647f 100644 --- a/tools/perf/util/tsc.h +++ b/tools/perf/util/tsc.h @@ -25,7 +25,7 @@ int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc, u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc); u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc); u64 rdtsc(void); -double arch_get_tsc_freq(void); +u64 arch_get_tsc_freq(void); size_t perf_event__fprintf_time_conv(union perf_event *event, FILE *fp); diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 9d55a13787ce..0f031eb80b4c 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -78,17 +78,23 @@ bool sysctl__nmi_watchdog_enabled(void) bool test_attr__enabled; +bool exclude_GH_default; + bool perf_host = true; bool perf_guest = false; void event_attr_init(struct perf_event_attr *attr) { + /* to capture ABI version */ + attr->size = sizeof(*attr); + + if (!exclude_GH_default) + return; + if (!perf_host) attr->exclude_host = 1; if (!perf_guest) attr->exclude_guest = 1; - /* to capture ABI version */ - attr->size = sizeof(*attr); } int mkdir_p(char *path, mode_t mode) @@ -336,91 +342,6 @@ bool perf_event_paranoid_check(int max_level) return perf_event_paranoid() <= max_level; } -static int -fetch_ubuntu_kernel_version(unsigned int *puint) -{ - ssize_t len; - size_t line_len = 0; - char *ptr, *line = NULL; - int version, patchlevel, sublevel, err; - FILE *vsig; - - if (!puint) - return 0; - - vsig = fopen("/proc/version_signature", "r"); - if (!vsig) { - pr_debug("Open /proc/version_signature failed: %s\n", - strerror(errno)); - return -1; - } - - len = getline(&line, &line_len, vsig); - fclose(vsig); - err = -1; - if (len <= 0) { - pr_debug("Reading from /proc/version_signature failed: %s\n", - strerror(errno)); - goto errout; - } - - ptr = strrchr(line, ' '); - if (!ptr) { - pr_debug("Parsing /proc/version_signature failed: %s\n", line); - goto errout; - } - - err = sscanf(ptr + 1, "%d.%d.%d", - &version, &patchlevel, &sublevel); - if (err != 3) { - pr_debug("Unable to get kernel version from /proc/version_signature '%s'\n", - line); - goto errout; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - err = 0; -errout: - free(line); - return err; -} - -int -fetch_kernel_version(unsigned int *puint, char *str, - size_t str_size) -{ - struct utsname utsname; - int version, patchlevel, sublevel, err; - bool int_ver_ready = false; - - if (access("/proc/version_signature", R_OK) == 0) - if (!fetch_ubuntu_kernel_version(puint)) - int_ver_ready = true; - - if (uname(&utsname)) - return -1; - - if (str && str_size) { - strncpy(str, utsname.release, str_size); - str[str_size - 1] = '\0'; - } - - if (!puint || int_ver_ready) - return 0; - - err = sscanf(utsname.release, "%d.%d.%d", - &version, &patchlevel, &sublevel); - - if (err != 3) { - pr_debug("Unable to get kernel version from uname '%s'\n", - utsname.release); - return -1; - } - - *puint = (version << 16) + (patchlevel << 8) + sublevel; - return 0; -} - int perf_tip(char **strp, const char *dirpath) { struct strlist *tips; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 9966c21aaf04..3423778e39a5 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -21,6 +21,9 @@ extern const char perf_more_info_string[]; extern const char *input_name; +/* This will control if perf_{host,guest} will set attr.exclude_{host,guest}. */ +extern bool exclude_GH_default; + extern bool perf_host; extern bool perf_guest; @@ -43,14 +46,6 @@ int sysctl__max_stack(void); bool sysctl__nmi_watchdog_enabled(void); -int fetch_kernel_version(unsigned int *puint, - char *str, size_t str_sz); -#define KVER_VERSION(x) (((x) >> 16) & 0xff) -#define KVER_PATCHLEVEL(x) (((x) >> 8) & 0xff) -#define KVER_SUBLEVEL(x) ((x) & 0xff) -#define KVER_FMT "%d.%d.%d" -#define KVER_PARAM(x) KVER_VERSION(x), KVER_PATCHLEVEL(x), KVER_SUBLEVEL(x) - int perf_tip(char **strp, const char *dirpath); #ifndef HAVE_SCHED_GETCPU_SUPPORT @@ -81,13 +76,6 @@ char *perf_exe(char *buf, int len); #endif #endif -extern bool test_attr__enabled; -void test_attr__ready(void); -void test_attr__init(void); -struct perf_event_attr; -void test_attr__open(struct perf_event_attr *attr, pid_t pid, struct perf_cpu cpu, - int fd, int group_fd, unsigned long flags); - struct perf_debuginfod { const char *urls; bool set; diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 1b6f8f6db7aa..c12f5d8c4bf6 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine, if (!dso) { dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); - if (dso && dso_type != dso__type(dso, machine)) + if (dso && dso_type != dso__type(dso, machine)) { + dso__put(dso); dso = NULL; + } } break; case DSO__TYPE_X32BIT: diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore index 7677329c42a6..5113d5a7aee0 100644 --- a/tools/power/cpupower/.gitignore +++ b/tools/power/cpupower/.gitignore @@ -27,3 +27,6 @@ debug/i386/intel_gsic debug/i386/powernow-k8-decode debug/x86_64/centrino-decode debug/x86_64/powernow-k8-decode + +# Clang's compilation database file +compile_commands.json diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index 6c02f401069e..175004ce44b2 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -57,7 +57,7 @@ LIB_MIN= 1 PACKAGE = cpupower PACKAGE_BUGREPORT = linux-pm@vger.kernel.org -LANGUAGES = de fr it cs pt ka +LANGUAGES = de fr it cs pt ka zh_CN # Directory definitions. These are default and most probably @@ -86,12 +86,12 @@ INSTALL_SCRIPT = ${INSTALL} -m 644 # If you are running a cross compiler, you may want to set this # to something more interesting, like "arm-linux-". If you want # to compile vs uClibc, that can be done here as well. -CROSS = #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- -CC = $(CROSS)gcc -LD = $(CROSS)gcc -AR = $(CROSS)ar -STRIP = $(CROSS)strip -RANLIB = $(CROSS)ranlib +CROSS ?= #/usr/i386-linux-uclibc/usr/bin/i386-uclibc- +CC ?= $(CROSS)gcc +LD ?= $(CROSS)gcc +AR ?= $(CROSS)ar +STRIP ?= $(CROSS)strip +RANLIB ?= $(CROSS)ranlib HOSTCC = gcc MKDIR = mkdir @@ -218,17 +218,28 @@ else endif $(QUIET) $(STRIPCMD) $@ +ifeq (, $(shell which xgettext)) +$(warning "Install xgettext to extract translatable strings.") +else $(OUTPUT)po/$(PACKAGE).pot: $(UTIL_SRC) $(ECHO) " GETTEXT " $@ $(QUIET) xgettext --default-domain=$(PACKAGE) --add-comments \ --keyword=_ --keyword=N_ $(UTIL_SRC) -p $(@D) -o $(@F) +endif +ifeq (, $(shell which msgfmt)) +$(warning "Install msgfmt to generate binary message catalogs.") +else $(OUTPUT)po/%.gmo: po/%.po $(ECHO) " MSGFMT " $@ $(QUIET) msgfmt -o $@ po/$*.po +endif create-gmo: ${GMO_FILES} +ifeq (, $(shell which msgmerge)) +$(warning "Install msgmerge to merge translations.") +else update-po: $(OUTPUT)po/$(PACKAGE).pot $(ECHO) " MSGMRG " $@ $(QUIET) @for HLANG in $(LANGUAGES); do \ @@ -241,6 +252,7 @@ update-po: $(OUTPUT)po/$(PACKAGE).pot rm -f $(OUTPUT)po/$$HLANG.new.po; \ fi; \ done; +endif compile-bench: $(OUTPUT)libcpupower.so.$(LIB_MAJ) @V=$(V) confdir=$(confdir) $(MAKE) -C bench O=$(OUTPUT) diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c index e63dc11fa3a5..080678d9d74e 100644 --- a/tools/power/cpupower/bench/parse.c +++ b/tools/power/cpupower/bench/parse.c @@ -4,6 +4,7 @@ * Copyright (C) 2008 Christian Kornacker <ckornacker@suse.de> */ +#include <errno.h> #include <stdio.h> #include <stdlib.h> #include <stdarg.h> @@ -165,8 +166,8 @@ int prepare_config(const char *path, struct config *config) configfile = fopen(path, "r"); if (configfile == NULL) { - perror("fopen"); - fprintf(stderr, "error: unable to read configfile\n"); + fprintf(stderr, "error: unable to read configfile: %s, %s\n", + path, strerror(errno)); free(config); return 1; } diff --git a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py index 3d6f62b9556a..ca5aa46c9b20 100755 --- a/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py +++ b/tools/power/cpupower/bindings/python/test_raw_pylibcpupower.py @@ -15,22 +15,38 @@ else: print(f"cstate count error: return code: {cpu_cstates_count}") """ -Disable cstate (will fail if the above is 0, ex: a virtual machine) +Disable cstate (will fail if the above returns is under 1, ex: a virtual machine) """ cstate_disabled = p.cpuidle_state_disable(0, 0, 1) -if cpu_cstates_count == 0: - print(f"CPU 0 has {cpu_cstates_count} c-states") -else: - print(f"cstate count error: return code: {cpu_cstates_count}") match cstate_disabled: case 0: print(f"CPU state disabled") case -1: print(f"Idlestate not available") + case -2: + print(f"Disabling is not supported by the kernel") + case -3: + print(f"No write access to disable/enable C-states: try using sudo") case _: - print(f"Not documented") + print(f"Not documented: {cstate_disabled}") + +""" +Test cstate is disabled +""" +is_cstate_disabled = p.cpuidle_is_state_disabled(0, 0) +match is_cstate_disabled: + case 1: + print(f"CPU is disabled") + case 0: + print(f"CPU is enabled") + case -1: + print(f"Idlestate not available") + case -2: + print(f"Disabling is not supported by kernel") + case _: + print(f"Not documented: {is_cstate_disabled}") # Pointer example diff --git a/tools/power/cpupower/man/cpupower-set.1 b/tools/power/cpupower/man/cpupower-set.1 index 2bcc696f4496..500653ef98c7 100644 --- a/tools/power/cpupower/man/cpupower-set.1 +++ b/tools/power/cpupower/man/cpupower-set.1 @@ -3,7 +3,7 @@ cpupower\-set \- Set processor power related kernel or hardware configurations .SH SYNOPSIS .ft B -.B cpupower set [ \-b VAL ] +.B cpupower set [ \-b VAL | \-e POLICY | \-m MODE | \-t BOOL ] .SH DESCRIPTION @@ -19,7 +19,7 @@ described in the Options sections. Use \fBcpupower info \fP to read out current settings and whether they are supported on the system at all. -.SH Options +.SH OPTIONS .PP \-\-perf-bias, \-b .RS 4 @@ -56,6 +56,40 @@ Use \fBcpupower -c all info -b\fP to verify. This options needs the msr kernel driver (CONFIG_X86_MSR) loaded. .RE +.PP +\-\-epp, \-e +.RS 4 +Sets the energy performance policy preference on supported Intel or AMD +processors which use the Intel or AMD P-State cpufreq driver respectively. + +Available policies can be found with +\fBcat /sys/devices/system/cpu/cpufreq/policy0/energy_performance_available_preferences\fP : +.RS 4 +default performance balance_performance balance_power power +.RE + +.RE + +.PP +\-\-amd\-pstate\-mode, \-m +.RS 4 +Sets the AMD P-State mode for supported AMD processors. +Available modes are "active", "guided" or "passive". + +Refer to the AMD P-State kernel documentation for further information. + +.RE + +.PP +\-\-turbo\-boost, \-t +.RS 4 +This option is used to enable or disable the turbo boost feature on +supported Intel and AMD processors. + +This option takes as parameter either \fB1\fP to enable, or \fB0\fP to disable the feature. + +.RE + .SH "SEE ALSO" cpupower-info(1), cpupower-monitor(1), powertop(1) .PP diff --git a/tools/power/cpupower/po/zh_CN.po b/tools/power/cpupower/po/zh_CN.po new file mode 100644 index 000000000000..0489abffb702 --- /dev/null +++ b/tools/power/cpupower/po/zh_CN.po @@ -0,0 +1,942 @@ +# Chinese Simplified translations for cpufrequtils package +# Copyright (C) 2004 THE PACKAGE'S COPYRIGHT HOLDER +# This file is distributed under the same license as the cpufrequtils package. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: cpufrequtils 006\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2011-03-08 17:03+0100\n" +"PO-Revision-Date: 2024-05-22 15:36+0000\n" +"Last-Translator: Kieran Moy <kfatyuip@gmail.com>\n" +"Language-Team: NONE\n" +"Language: zh_CN\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"X-Generator: Poedit 3.4.2\n" + +#: utils/idle_monitor/nhm_idle.c:36 +msgid "Processor Core C3" +msgstr "处理器 Core C3" + +#: utils/idle_monitor/nhm_idle.c:43 +msgid "Processor Core C6" +msgstr "处理器 Core C6" + +#: utils/idle_monitor/nhm_idle.c:51 +msgid "Processor Package C3" +msgstr "处理器套件 C3" + +#: utils/idle_monitor/nhm_idle.c:58 utils/idle_monitor/amd_fam14h_idle.c:70 +msgid "Processor Package C6" +msgstr "处理器套件 C6" + +#: utils/idle_monitor/snb_idle.c:33 +msgid "Processor Core C7" +msgstr "处理器 Core C7" + +#: utils/idle_monitor/snb_idle.c:40 +msgid "Processor Package C2" +msgstr "处理器套件 C2" + +#: utils/idle_monitor/snb_idle.c:47 +msgid "Processor Package C7" +msgstr "处理器套件 C7" + +#: utils/idle_monitor/amd_fam14h_idle.c:56 +msgid "Package in sleep state (PC1 or deeper)" +msgstr "Package in sleep state (PC1 或更深)" + +#: utils/idle_monitor/amd_fam14h_idle.c:63 +msgid "Processor Package C1" +msgstr "处理器套件 C1" + +#: utils/idle_monitor/amd_fam14h_idle.c:77 +msgid "North Bridge P1 boolean counter (returns 0 or 1)" +msgstr "北桥 P1 布尔计数器(返回 0 或 1)" + +#: utils/idle_monitor/mperf_monitor.c:35 +msgid "Processor Core not idle" +msgstr "处理器 Core不空闲" + +#: utils/idle_monitor/mperf_monitor.c:42 +msgid "Processor Core in an idle state" +msgstr "处理器 Core处于空闲状态" + +#: utils/idle_monitor/mperf_monitor.c:50 +msgid "Average Frequency (including boost) in MHz" +msgstr "平均频率(包括增加频率),单位 MHz" + +#: utils/idle_monitor/cpupower-monitor.c:66 +#, c-format +msgid "" +"cpupower monitor: [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor:[-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:69 +#, c-format +msgid "" +"cpupower monitor: [-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command ...]\n" +msgstr "" +"cpupower monitor:[-v] [-h] [ [-t] | [-l] | [-m <mon1>,[<mon2>] ] ] [-i " +"interval_sec | -c command...]\n" + +#: utils/idle_monitor/cpupower-monitor.c:71 +#, c-format +msgid "\t -v: be more verbose\n" +msgstr "-v:更详细\n" + +#: utils/idle_monitor/cpupower-monitor.c:73 +#, c-format +msgid "\t -h: print this help\n" +msgstr "-h:打印此帮助\n" + +#: utils/idle_monitor/cpupower-monitor.c:74 +#, c-format +msgid "\t -i: time interval to measure for in seconds (default 1)\n" +msgstr "-i:测量的时间间隔(以秒为单位)(默认 1)\n" + +#: utils/idle_monitor/cpupower-monitor.c:75 +#, c-format +msgid "\t -t: show CPU topology/hierarchy\n" +msgstr "-t:显示CPU拓扑/层次结构\n" + +#: utils/idle_monitor/cpupower-monitor.c:76 +#, c-format +msgid "\t -l: list available CPU sleep monitors (for use with -m)\n" +msgstr "-l:列出可用的 CPU 睡眠监视器(与 -m 一起使用)\n" + +#: utils/idle_monitor/cpupower-monitor.c:77 +#, c-format +msgid "\t -m: show specific CPU sleep monitors only (in same order)\n" +msgstr "-m:仅显示特定的CPU睡眠监视器(按相同顺序)\n" + +#: utils/idle_monitor/cpupower-monitor.c:79 +#, c-format +msgid "" +"only one of: -t, -l, -m are allowed\n" +"If none of them is passed," +msgstr "" +"仅允许以下之一:-t、-l、-m\n" +"如果都没有通过的话" + +#: utils/idle_monitor/cpupower-monitor.c:80 +#, c-format +msgid " all supported monitors are shown\n" +msgstr " 显示所有支持的显示器\n" + +#: utils/idle_monitor/cpupower-monitor.c:197 +#, c-format +msgid "Monitor %s, Counter %s has no count function. Implementation error\n" +msgstr "监视器 %s、计数器 %s 无计数功能。 执行错误\n" + +#: utils/idle_monitor/cpupower-monitor.c:207 +#, c-format +msgid " *is offline\n" +msgstr " *离线\n" + +#: utils/idle_monitor/cpupower-monitor.c:236 +#, c-format +msgid "%s: max monitor name length (%d) exceeded\n" +msgstr "%s:超出最大监视器名称长度 (%d)\n" + +#: utils/idle_monitor/cpupower-monitor.c:250 +#, c-format +msgid "No matching monitor found in %s, try -l option\n" +msgstr "在 %s 中找不到匹配的监视器,请尝试 -l 选项\n" + +#: utils/idle_monitor/cpupower-monitor.c:266 +#, c-format +msgid "Monitor \"%s\" (%d states) - Might overflow after %u s\n" +msgstr "监视器“%s”(%d 状态)- 可能会在 %u 秒后溢出\n" + +#: utils/idle_monitor/cpupower-monitor.c:319 +#, c-format +msgid "%s took %.5f seconds and exited with status %d\n" +msgstr "%s 用了 %.5f 秒并退出,状态为 %d\n" + +#: utils/idle_monitor/cpupower-monitor.c:406 +#, c-format +msgid "Cannot read number of available processors\n" +msgstr "无法读取可用处理器的数量\n" + +#: utils/idle_monitor/cpupower-monitor.c:417 +#, c-format +msgid "Available monitor %s needs root access\n" +msgstr "可用监视器 %s 需要 root 访问权限\n" + +#: utils/idle_monitor/cpupower-monitor.c:428 +#, c-format +msgid "No HW Cstate monitors found\n" +msgstr "未找到 HW Cstate 监视器\n" + +#: utils/cpupower.c:78 +#, c-format +msgid "cpupower [ -c cpulist ] subcommand [ARGS]\n" +msgstr "cpupower [ -c cpulist ] subcommand [ARGS]\n" + +#: utils/cpupower.c:79 +#, c-format +msgid "cpupower --version\n" +msgstr "cpupower --version\n" + +#: utils/cpupower.c:80 +#, c-format +msgid "Supported subcommands are:\n" +msgstr "支持的子命令有:\n" + +#: utils/cpupower.c:83 +#, c-format +msgid "" +"\n" +"Some subcommands can make use of the -c cpulist option.\n" +msgstr "" +"\n" +"某些子命令可以使用 -c cpulist 选项。\n" + +#: utils/cpupower.c:84 +#, c-format +msgid "Look at the general cpupower manpage how to use it\n" +msgstr "看看一般的cpupower manpage如何使用它\n" + +#: utils/cpupower.c:85 +#, c-format +msgid "and read up the subcommand's manpage whether it is supported.\n" +msgstr "并阅读子命令的manpage是否受支持。\n" + +#: utils/cpupower.c:86 +#, c-format +msgid "" +"\n" +"Use cpupower help subcommand for getting help for above subcommands.\n" +msgstr "" +"\n" +"使用 cpupower help subcommand获取上述子命令的帮助。\n" + +#: utils/cpupower.c:91 +#, c-format +msgid "Report errors and bugs to %s, please.\n" +msgstr "请向 %s 报告错误和错误。\n" + +#: utils/cpupower.c:114 +#, c-format +msgid "Error parsing cpu list\n" +msgstr "解析cpu列表时出错\n" + +#: utils/cpupower.c:172 +#, c-format +msgid "Subcommand %s needs root privileges\n" +msgstr "子命令 %s 需要 root 权限\n" + +#: utils/cpufreq-info.c:31 +#, c-format +msgid "Couldn't count the number of CPUs (%s: %s), assuming 1\n" +msgstr "无法计算 CPU 数量(%s:%s),假设为 1\n" + +#: utils/cpufreq-info.c:63 +#, c-format +msgid "" +" minimum CPU frequency - maximum CPU frequency - governor\n" +msgstr "最低 CPU 频率 - 最高 CPU 频率 - 调速器\n" + +#: utils/cpufreq-info.c:151 +#, c-format +msgid "Error while evaluating Boost Capabilities on CPU %d -- are you root?\n" +msgstr "评估 CPU %d 上的 Boost 功能时出错 - 您是 root 吗?\n" + +#. P state changes via MSR are identified via cpuid 80000007 +#. on Intel and AMD, but we assume boost capable machines can do that +#. if (cpuid_eax(0x80000000) >= 0x80000007 +#. && (cpuid_edx(0x80000007) & (1 << 7))) +#. +#: utils/cpufreq-info.c:161 +#, c-format +msgid " boost state support: \n" +msgstr " 升压状态支持:\n" + +#: utils/cpufreq-info.c:163 +#, c-format +msgid " Supported: %s\n" +msgstr " 支持:%s\n" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "yes" +msgstr "是" + +#: utils/cpufreq-info.c:163 utils/cpufreq-info.c:164 +msgid "no" +msgstr "不是" + +#: utils/cpufreq-info.c:164 +#, c-format +msgid " Active: %s\n" +msgstr " 活跃:%s\n" + +#: utils/cpufreq-info.c:177 +#, c-format +msgid " Boost States: %d\n" +msgstr " 提升状态:%d\n" + +#: utils/cpufreq-info.c:178 +#, c-format +msgid " Total States: %d\n" +msgstr " 状态总数:%d\n" + +#: utils/cpufreq-info.c:181 +#, c-format +msgid " Pstate-Pb%d: %luMHz (boost state)\n" +msgstr " Pstate-Pb%d:%luMHz(升压状态)\n" + +#: utils/cpufreq-info.c:184 +#, c-format +msgid " Pstate-P%d: %luMHz\n" +msgstr " Pstate-P%d:%luMHz\n" + +#: utils/cpufreq-info.c:211 +#, c-format +msgid " no or unknown cpufreq driver is active on this CPU\n" +msgstr " 该 CPU 上没有或未知的 cpufreq 驱动程序处于活动状态\n" + +#: utils/cpufreq-info.c:213 +#, c-format +msgid " driver: %s\n" +msgstr " 驱动程序:%s\n" + +#: utils/cpufreq-info.c:219 +#, c-format +msgid " CPUs which run at the same hardware frequency: " +msgstr " 以相同硬件频率运行的 CPU:" + +#: utils/cpufreq-info.c:230 +#, c-format +msgid " CPUs which need to have their frequency coordinated by software: " +msgstr " 需要通过软件协调频率的 CPU:" + +#: utils/cpufreq-info.c:241 +#, c-format +msgid " maximum transition latency: " +msgstr " 最大转换延迟:" + +#: utils/cpufreq-info.c:247 +#, c-format +msgid " hardware limits: " +msgstr " 硬件限制:" + +#: utils/cpufreq-info.c:256 +#, c-format +msgid " available frequency steps: " +msgstr " 可用频率范围:" + +#: utils/cpufreq-info.c:269 +#, c-format +msgid " available cpufreq governors: " +msgstr " 可用的cpufreq调节器:" + +#: utils/cpufreq-info.c:280 +#, c-format +msgid " current policy: frequency should be within " +msgstr " 当前政策:频率应在" + +#: utils/cpufreq-info.c:282 +#, c-format +msgid " and " +msgstr "和" + +#: utils/cpufreq-info.c:286 +#, c-format +msgid "" +"The governor \"%s\" may decide which speed to use\n" +" within this range.\n" +msgstr "" +"调速器“%s”可以决定使用哪种速度\n" +" 在这个范围内。\n" + +#: utils/cpufreq-info.c:293 +#, c-format +msgid " current CPU frequency is " +msgstr " 当前CPU频率是" + +#: utils/cpufreq-info.c:296 +#, c-format +msgid " (asserted by call to hardware)" +msgstr " (通过调用硬件来断言)" + +#: utils/cpufreq-info.c:304 +#, c-format +msgid " cpufreq stats: " +msgstr " cpu频率统计:" + +#: utils/cpufreq-info.c:472 +#, c-format +msgid "Usage: cpupower freqinfo [options]\n" +msgstr "用法:cpupower freqinfo [选项]\n" + +#: utils/cpufreq-info.c:473 utils/cpufreq-set.c:26 utils/cpupower-set.c:23 +#: utils/cpupower-info.c:22 utils/cpuidle-info.c:148 +#, c-format +msgid "Options:\n" +msgstr "选项:\n" + +#: utils/cpufreq-info.c:474 +#, c-format +msgid " -e, --debug Prints out debug information [default]\n" +msgstr " -e, --debug 打印出调试信息[默认]\n" + +#: utils/cpufreq-info.c:475 +#, c-format +msgid "" +" -f, --freq Get frequency the CPU currently runs at, according\n" +" to the cpufreq core *\n" +msgstr "" +" -f, --freq 获取CPU当前运行的频率,根据\n" +" 到 cpufreq 核心 *\n" + +#: utils/cpufreq-info.c:477 +#, c-format +msgid "" +" -w, --hwfreq Get frequency the CPU currently runs at, by reading\n" +" it from hardware (only available to root) *\n" +msgstr "" +" -w, --hwfreq 通过读取获取CPU当前运行的频率\n" +" 它来自硬件(仅适用于root)*\n" + +#: utils/cpufreq-info.c:479 +#, c-format +msgid "" +" -l, --hwlimits Determine the minimum and maximum CPU frequency " +"allowed *\n" +msgstr " -l, --hwlimits 确定允许的最小和最大 CPU 频率 *\n" + +#: utils/cpufreq-info.c:480 +#, c-format +msgid " -d, --driver Determines the used cpufreq kernel driver *\n" +msgstr " -d, --driver 确定使用的 cpufreq 内核驱动程序 *\n" + +#: utils/cpufreq-info.c:481 +#, c-format +msgid " -p, --policy Gets the currently used cpufreq policy *\n" +msgstr " -p, --policy 获取当前使用的cpufreq策略 *\n" + +#: utils/cpufreq-info.c:482 +#, c-format +msgid " -g, --governors Determines available cpufreq governors *\n" +msgstr " -g, --governors 确定可用的 cpufreq 调节器 *\n" + +#: utils/cpufreq-info.c:483 +#, c-format +msgid "" +" -r, --related-cpus Determines which CPUs run at the same hardware " +"frequency *\n" +msgstr " -r, --lated-cpus 确定哪些 CPU 以相同的硬件频率运行 *\n" + +#: utils/cpufreq-info.c:484 +#, c-format +msgid "" +" -a, --affected-cpus Determines which CPUs need to have their frequency\n" +" coordinated by software *\n" +msgstr "" +" -a, --affected-cpus 确定哪些 CPU 需要其频率\n" +" 由软件协调*\n" + +#: utils/cpufreq-info.c:486 +#, c-format +msgid " -s, --stats Shows cpufreq statistics if available\n" +msgstr " -s, --stats 显示 cpufreq 统计信息(如果有)\n" + +#: utils/cpufreq-info.c:487 +#, c-format +msgid "" +" -y, --latency Determines the maximum latency on CPU frequency " +"changes *\n" +msgstr " -y, --latency 确定 CPU 频率变化的最大延迟*\n" + +#: utils/cpufreq-info.c:488 +#, c-format +msgid " -b, --boost Checks for turbo or boost modes *\n" +msgstr " -b, --boost 检查 Turbo 或 boost 模式 *\n" + +#: utils/cpufreq-info.c:489 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"cpufreq\n" +" interface in 2.4. and early 2.6. kernels\n" +msgstr "" +" -o, --proc 打印 /proc/cpufreq 提供的信息\n" +" 2.4 中的接口。 以及 2.6 之前的内核。\n" + +#: utils/cpufreq-info.c:491 +#, c-format +msgid "" +" -m, --human human-readable output for the -f, -w, -s and -y " +"parameters\n" +msgstr " -m, -- human -f, -w, -s 和 -y 参数的人类可读输出\n" + +#: utils/cpufreq-info.c:492 utils/cpuidle-info.c:152 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help 打印此屏幕\n" + +#: utils/cpufreq-info.c:495 +#, c-format +msgid "" +"If no argument or only the -c, --cpu parameter is given, debug output " +"about\n" +"cpufreq is printed which is useful e.g. for reporting bugs.\n" +msgstr "" +"screen如果没有参数或仅给出了 -c, --cpu 参数,则调试输出有关\n" +"cpufreq 被打印出来,这很有用,例如 用于报告错误。\n" + +#: utils/cpufreq-info.c:497 +#, c-format +msgid "" +"For the arguments marked with *, omitting the -c or --cpu argument is\n" +"equivalent to setting it to zero\n" +msgstr "" +"对于标有 * 的参数,省略 -c 或 --cpu 参数是\n" +"相当于将其设置为零\n" + +#: utils/cpufreq-info.c:580 +#, c-format +msgid "" +"The argument passed to this tool can't be combined with passing a --cpu " +"argument\n" +msgstr "传递给此工具的参数不能与传递 --cpu 参数结合使用\n" + +#: utils/cpufreq-info.c:596 +#, c-format +msgid "" +"You can't specify more than one --cpu parameter and/or\n" +"more than one output-specific argument\n" +msgstr "" +"您不能指定多个 --cpu 参数和/或\n" +"多个特定于输出的参数\n" + +#: utils/cpufreq-info.c:600 utils/cpufreq-set.c:82 utils/cpupower-set.c:42 +#: utils/cpupower-info.c:42 utils/cpuidle-info.c:213 +#, c-format +msgid "invalid or unknown argument\n" +msgstr "无效或未知的参数\n" + +#: utils/cpufreq-info.c:617 +#, c-format +msgid "couldn't analyze CPU %d as it doesn't seem to be present\n" +msgstr "无法分析 CPU %d,因为它似乎不存在\n" + +#: utils/cpufreq-info.c:620 utils/cpupower-info.c:142 +#, c-format +msgid "analyzing CPU %d:\n" +msgstr "分析 CPU %d:\n" + +#: utils/cpufreq-set.c:25 +#, c-format +msgid "Usage: cpupower frequency-set [options]\n" +msgstr "用法:cpupower frequency-set [选项]\n" + +#: utils/cpufreq-set.c:27 +#, c-format +msgid "" +" -d FREQ, --min FREQ new minimum CPU frequency the governor may " +"select\n" +msgstr " -d FREQ, --min FREQ 调控器可以选择的新的最小 CPU 频率\n" + +#: utils/cpufreq-set.c:28 +#, c-format +msgid "" +" -u FREQ, --max FREQ new maximum CPU frequency the governor may " +"select\n" +msgstr " -u FREQ, --max FREQ 调控器可以选择的新的最大 CPU 频率\n" + +#: utils/cpufreq-set.c:29 +#, c-format +msgid " -g GOV, --governor GOV new cpufreq governor\n" +msgstr " -g GOV, --governor GOV 新的 cpufreq 调节器\n" + +#: utils/cpufreq-set.c:30 +#, c-format +msgid "" +" -f FREQ, --freq FREQ specific frequency to be set. Requires " +"userspace\n" +" governor to be available and loaded\n" +msgstr "" +" -f FREQ, --freq FREQ 要设置的特定频率。 需要用户空间\n" +" 调速器可用并已加载\n" + +#: utils/cpufreq-set.c:32 +#, c-format +msgid " -r, --related Switches all hardware-related CPUs\n" +msgstr " -r, --related 切换所有与硬件相关的CPU\n" + +#: utils/cpufreq-set.c:33 utils/cpupower-set.c:28 utils/cpupower-info.c:27 +#, c-format +msgid " -h, --help Prints out this screen\n" +msgstr " -h, --help 打印此屏幕\n" + +#: utils/cpufreq-set.c:35 +#, c-format +msgid "" +"Notes:\n" +"1. Omitting the -c or --cpu argument is equivalent to setting it to " +"\"all\"\n" +msgstr "" +"注意:\n" +"1.省略-c或--cpu参数相当于将其设置为“all”\n" + +#: utils/cpufreq-set.c:37 +#, c-format +msgid "" +"2. The -f FREQ, --freq FREQ parameter cannot be combined with any other " +"parameter\n" +" except the -c CPU, --cpu CPU parameter\n" +"3. FREQuencies can be passed in Hz, kHz (default), MHz, GHz, or THz\n" +" by postfixing the value with the wanted unit name, without any space\n" +" (FREQuency in kHz =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000).\n" +msgstr "" +"2. -f FREQ、--freq FREQ参数不能与任何其他参数组合使用\n" +" 除了 -c CPU、--cpu CPU 参数\n" +"3. 频率可以以 Hz、kHz(默认)、MHz、GHz 或 THz 为单位传递\n" +" 通过在值后面添加所需的单位名称,不带任何空格\n" +" (以 kHz 为单位的频率 =^ Hz * 0.001 =^ MHz * 1000 =^ GHz * 1000000)。\n" + +#: utils/cpufreq-set.c:57 +#, c-format +msgid "" +"Error setting new values. Common errors:\n" +"- Do you have proper administration rights? (super-user?)\n" +"- Is the governor you requested available and modprobed?\n" +"- Trying to set an invalid policy?\n" +"- Trying to set a specific frequency, but userspace governor is not " +"available,\n" +" for example because of hardware which cannot be set to a specific " +"frequency\n" +" or because the userspace governor isn't loaded?\n" +msgstr "" +"设置新值时出错。 常见错误:\n" +"- 您有适当的管理权吗? (超级用户?)\n" +"- 您请求的调控器是否可用并已进行 modprobed?\n" +"- 尝试设置无效的策略?\n" +"- 尝试设置特定频率,但用户空间调控器不可用,\n" +" 例如由于硬件无法设置为特定频率\n" +" 或者因为用户空间调控器未加载?\n" + +#: utils/cpufreq-set.c:170 +#, c-format +msgid "wrong, unknown or unhandled CPU?\n" +msgstr "错误、未知或未处理的CPU?\n" + +#: utils/cpufreq-set.c:302 +#, c-format +msgid "" +"the -f/--freq parameter cannot be combined with -d/--min, -u/--max or\n" +"-g/--governor parameters\n" +msgstr "" +"-f/--freq 参数不能与 -d/--min、-u/--max 或\n" +"-g/--调速器参数\n" + +#: utils/cpufreq-set.c:308 +#, c-format +msgid "" +"At least one parameter out of -f/--freq, -d/--min, -u/--max, and\n" +"-g/--governor must be passed\n" +msgstr "" +"-f/--freq、-d/--min、-u/--max 和 -f/--freq 中的至少一个参数\n" +"-g/--governor 必须通过\n" + +#: utils/cpufreq-set.c:347 +#, c-format +msgid "Setting cpu: %d\n" +msgstr "设置CPU:%d\n" + +#: utils/cpupower-set.c:22 +#, c-format +msgid "Usage: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" +msgstr "用法: cpupower set [ -b val ] [ -m val ] [ -s val ]\n" + +#: utils/cpupower-set.c:24 +#, c-format +msgid "" +" -b, --perf-bias [VAL] Sets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias [VAL] 设置 CPU 的功耗与性能策略\n" +" Intel 型号 [0-15],请参阅manpage了解详细信息\n" + +#: utils/cpupower-set.c:26 +#, c-format +msgid "" +" -m, --sched-mc [VAL] Sets the kernel's multi core scheduler policy.\n" +msgstr " -m, --sched-mc [VAL] 设置内核的多核调度程序策略。\n" + +#: utils/cpupower-set.c:27 +#, c-format +msgid "" +" -s, --sched-smt [VAL] Sets the kernel's thread sibling scheduler " +"policy.\n" +msgstr " -s, --sched-smt [VAL] 设置内核的线程同级调度程序策略。\n" + +#: utils/cpupower-set.c:80 +#, c-format +msgid "--perf-bias param out of range [0-%d]\n" +msgstr "--perf-bias 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:91 +#, c-format +msgid "--sched-mc param out of range [0-%d]\n" +msgstr "--sched-mc 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:102 +#, c-format +msgid "--sched-smt param out of range [0-%d]\n" +msgstr "--sched-smt 参数超出范围 [0-%d]\n" + +#: utils/cpupower-set.c:121 +#, c-format +msgid "Error setting sched-mc %s\n" +msgstr "设置 sched-mc %s 时出错\n" + +#: utils/cpupower-set.c:127 +#, c-format +msgid "Error setting sched-smt %s\n" +msgstr "设置 sched-smt %s 时出错\n" + +#: utils/cpupower-set.c:146 +#, c-format +msgid "Error setting perf-bias value on CPU %d\n" +msgstr "在 CPU %d 上设置性能偏差值时出错\n" + +#: utils/cpupower-info.c:21 +#, c-format +msgid "Usage: cpupower info [ -b ] [ -m ] [ -s ]\n" +msgstr "用法:cpupower info [-b][-m][-s]\n" + +#: utils/cpupower-info.c:23 +#, c-format +msgid "" +" -b, --perf-bias Gets CPU's power vs performance policy on some\n" +" Intel models [0-15], see manpage for details\n" +msgstr "" +" -b, --perf-bias 获取 CPU 在某些方面的功耗与性能策略\n" +" Intel 型号 [0-15],请参阅联机帮助页了解详细信" +"息\n" + +#: utils/cpupower-info.c:25 +#, c-format +msgid " -m, --sched-mc Gets the kernel's multi core scheduler policy.\n" +msgstr " -m, --sched-mc 获取内核的多核调度程序策略。\n" + +#: utils/cpupower-info.c:26 +#, c-format +msgid "" +" -s, --sched-smt Gets the kernel's thread sibling scheduler policy.\n" +msgstr " -s, --sched-smt 获取内核的线程同级调度程序策略。\n" + +#: utils/cpupower-info.c:28 +#, c-format +msgid "" +"\n" +"Passing no option will show all info, by default only on core 0\n" +msgstr "" +"\n" +"不传递任何选项将显示所有信息,默认情况下仅在核心 0 上\n" + +#: utils/cpupower-info.c:102 +#, c-format +msgid "System's multi core scheduler setting: " +msgstr "系统的多核调度器设置:" + +#. if sysfs file is missing it's: errno == ENOENT +#: utils/cpupower-info.c:105 utils/cpupower-info.c:114 +#, c-format +msgid "not supported\n" +msgstr "不支持\n" + +#: utils/cpupower-info.c:111 +#, c-format +msgid "System's thread sibling scheduler setting: " +msgstr "系统的线程兄调度程序设置:" + +#: utils/cpupower-info.c:126 +#, c-format +msgid "Intel's performance bias setting needs root privileges\n" +msgstr "Intel的性能偏差设置需要root权限\n" + +#: utils/cpupower-info.c:128 +#, c-format +msgid "System does not support Intel's performance bias setting\n" +msgstr "系统不支持Intel的性能偏差设置\n" + +#: utils/cpupower-info.c:147 +#, c-format +msgid "Could not read perf-bias value\n" +msgstr "无法读取性能偏差值\n" + +#: utils/cpupower-info.c:150 +#, c-format +msgid "perf-bias: %d\n" +msgstr "性能偏差:%d\n" + +#: utils/cpuidle-info.c:28 +#, c-format +msgid "Analyzing CPU %d:\n" +msgstr "正在分析 CPU %d:\n" + +#: utils/cpuidle-info.c:32 +#, c-format +msgid "CPU %u: No idle states\n" +msgstr "CPU %u:无空闲状态\n" + +#: utils/cpuidle-info.c:36 +#, c-format +msgid "CPU %u: Can't read idle state info\n" +msgstr "CPU %u:无法读取空闲状态信息\n" + +#: utils/cpuidle-info.c:41 +#, c-format +msgid "Could not determine max idle state %u\n" +msgstr "无法确定最大空闲状态 %u\n" + +#: utils/cpuidle-info.c:46 +#, c-format +msgid "Number of idle states: %d\n" +msgstr "空闲状态数:%d\n" + +#: utils/cpuidle-info.c:48 +#, c-format +msgid "Available idle states:" +msgstr "可用的空闲状态:" + +#: utils/cpuidle-info.c:71 +#, c-format +msgid "Flags/Description: %s\n" +msgstr "标志/描述:%s\n" + +#: utils/cpuidle-info.c:74 +#, c-format +msgid "Latency: %lu\n" +msgstr "延迟:%lu\n" + +#: utils/cpuidle-info.c:76 +#, c-format +msgid "Usage: %lu\n" +msgstr "用法:%lu\n" + +#: utils/cpuidle-info.c:78 +#, c-format +msgid "Duration: %llu\n" +msgstr "持续时间:%llu\n" + +#: utils/cpuidle-info.c:90 +#, c-format +msgid "Could not determine cpuidle driver\n" +msgstr "无法确定 cpuidle 驱动程序\n" + +#: utils/cpuidle-info.c:94 +#, c-format +msgid "CPUidle driver: %s\n" +msgstr "CPU 空闲驱动程序:%s\n" + +#: utils/cpuidle-info.c:99 +#, c-format +msgid "Could not determine cpuidle governor\n" +msgstr "无法确定 cpuidle 调控器\n" + +#: utils/cpuidle-info.c:103 +#, c-format +msgid "CPUidle governor: %s\n" +msgstr "CPU 空闲调节器:%s\n" + +#: utils/cpuidle-info.c:122 +#, c-format +msgid "CPU %u: Can't read C-state info\n" +msgstr "CPU %u:无法读取 C 状态信息\n" + +#. printf("Cstates: %d\n", cstates); +#: utils/cpuidle-info.c:127 +#, c-format +msgid "active state: C0\n" +msgstr "活动状态: C0\n" + +#: utils/cpuidle-info.c:128 +#, c-format +msgid "max_cstate: C%u\n" +msgstr "最大c状态: C%u\n" + +#: utils/cpuidle-info.c:129 +#, c-format +msgid "maximum allowed latency: %lu usec\n" +msgstr "允许的最大延迟:%lu usec\n" + +#: utils/cpuidle-info.c:130 +#, c-format +msgid "states:\t\n" +msgstr "状态:\t\n" + +#: utils/cpuidle-info.c:132 +#, c-format +msgid " C%d: type[C%d] " +msgstr " C%d: 类型[C%d]" + +#: utils/cpuidle-info.c:134 +#, c-format +msgid "promotion[--] demotion[--] " +msgstr "晋升[--] 降级[--]" + +#: utils/cpuidle-info.c:135 +#, c-format +msgid "latency[%03lu] " +msgstr "延迟[%03lu]" + +#: utils/cpuidle-info.c:137 +#, c-format +msgid "usage[%08lu] " +msgstr "使用情况[%08lu]" + +#: utils/cpuidle-info.c:139 +#, c-format +msgid "duration[%020Lu] \n" +msgstr "持续时间[%020Lu]\n" + +#: utils/cpuidle-info.c:147 +#, c-format +msgid "Usage: cpupower idleinfo [options]\n" +msgstr "用法:cpupower idleinfo [选项]\n" + +#: utils/cpuidle-info.c:149 +#, c-format +msgid " -s, --silent Only show general C-state information\n" +msgstr " -s, --silent 只显示一般C状态信息\n" + +#: utils/cpuidle-info.c:150 +#, c-format +msgid "" +" -o, --proc Prints out information like provided by the /proc/" +"acpi/processor/*/power\n" +" interface in older kernels\n" +msgstr "" +" -o, --proc 打印 /proc/acpi/processor/*/power 提供的信息\n" +" 旧内核中的接口\n" + +#: utils/cpuidle-info.c:209 +#, c-format +msgid "You can't specify more than one output-specific argument\n" +msgstr "您不能指定多个特定于输出的参数\n" + +#~ msgid "" +#~ " -c CPU, --cpu CPU CPU number which information shall be determined " +#~ "about\n" +#~ msgstr "" +#~ " -c CPU, --cpu CPU Numéro du CPU pour lequel l'information sera " +#~ "affichée\n" + +#~ msgid "" +#~ " -c CPU, --cpu CPU number of CPU where cpufreq settings shall be " +#~ "modified\n" +#~ msgstr "" +#~ " -c CPU, --cpu CPU numéro du CPU à prendre en compte pour les\n" +#~ " changements\n" diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8 index 643271b6fc6f..491ca21dccdb 100644 --- a/tools/power/pm-graph/sleepgraph.8 +++ b/tools/power/pm-graph/sleepgraph.8 @@ -81,6 +81,9 @@ as resume failures. .TP \fB-wifitrace\fR Trace through the wifi reconnect time and include it in the timeline. +.TP +\fB-debugtiming\fR +Add timestamp to each printed output line, accurate to the millisecond. .SS "advanced" .TP diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py index ef87e63c05c7..918eae58b0b4 100755 --- a/tools/power/pm-graph/sleepgraph.py +++ b/tools/power/pm-graph/sleepgraph.py @@ -18,7 +18,7 @@ # # Links: # Home Page -# https://01.org/pm-graph +# https://www.intel.com/content/www/us/en/developer/topic-technology/open/pm-graph/overview.html # Source repo # git@github.com:intel/pm-graph # @@ -65,6 +65,7 @@ import gzip from threading import Thread from subprocess import call, Popen, PIPE import base64 +import traceback debugtiming = False mystarttime = time.time() @@ -86,7 +87,7 @@ def ascii(text): # store system values and test parameters class SystemValues: title = 'SleepGraph' - version = '5.12' + version = '5.13' ansi = False rs = 0 display = '' @@ -236,7 +237,11 @@ class SystemValues: 'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 }, 'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 }, 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 }, - 'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 }, + 'usleep_range': { + 'func':'usleep_range_state', + 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, + 'ub': 1 + }, 'mutex_lock_slowpath': { 'func':'__mutex_lock_slowpath', 'ub': 1 }, 'acpi_os_stall': {'ub': 1}, 'rt_mutex_slowlock': {'ub': 1}, @@ -342,15 +347,21 @@ class SystemValues: if self.verbose or msg.startswith('WARNING:'): pprint(msg) def signalHandler(self, signum, frame): - if not self.result: - return signame = self.signames[signum] if signum in self.signames else 'UNKNOWN' - msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno) + if signame in ['SIGUSR1', 'SIGUSR2', 'SIGSEGV']: + traceback.print_stack() + stack = traceback.format_list(traceback.extract_stack()) + self.outputResult({'stack':stack}) + if signame == 'SIGUSR1': + return + msg = '%s caused a tool exit, line %d' % (signame, frame.f_lineno) + pprint(msg) self.outputResult({'error':msg}) + os.kill(os.getpid(), signal.SIGKILL) sys.exit(3) def signalHandlerInit(self): capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT', - 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM'] + 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'USR1', 'USR2'] self.signames = dict() for i in capture: s = 'SIG'+i @@ -859,6 +870,11 @@ class SystemValues: # files needed for any trace data files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock', 'trace_marker', 'trace_options', 'tracing_on'] + # legacy check for old systems + if not os.path.exists(self.tpath+'trace'): + self.tpath = '/sys/kernel/debug/tracing/' + if not os.path.exists(self.epath): + self.epath = '/sys/kernel/debug/tracing/events/power/' # files needed for callgraph trace data tp = self.tpath if(self.usecallgraph): @@ -911,6 +927,13 @@ class SystemValues: if num > 0: n = '%d' % num fp = open(self.result, 'a') + if 'stack' in testdata: + fp.write('Printing stack trace:\n') + for line in testdata['stack']: + fp.write(line) + fp.close() + self.sudoUserchown(self.result) + return if 'error' in testdata: fp.write('result%s: fail\n' % n) fp.write('error%s: %s\n' % (n, testdata['error'])) @@ -1980,7 +2003,7 @@ class Data: length = -1.0 if(start >= 0 and end >= 0): length = end - start - if pid == -2 or name not in sysvals.tracefuncs.keys(): + if pid >= -2: i = 2 origname = name while(name in list): @@ -2753,7 +2776,8 @@ class Timeline: def createHeader(self, sv, stamp): if(not stamp['time']): return - self.html += '<div class="version"><a href="https://01.org/pm-graph">%s v%s</a></div>' \ + self.html += '<div class="version"><a href="https://www.intel.com/content/www/'+\ + 'us/en/developer/topic-technology/open/pm-graph/overview.html">%s v%s</a></div>' \ % (sv.title, sv.version) if sv.logmsg and sv.testlog: self.html += '<button id="showtest" class="logbtn btnfmt">log</button>' @@ -5238,12 +5262,16 @@ def addScriptCode(hf, testruns): } var info = dev[i].title.split(" "); var pname = info[info.length-1]; - pd[pname] = parseFloat(info[info.length-3].slice(1)); - total[0] += pd[pname]; + var length = parseFloat(info[info.length-3].slice(1)); + if (pname in pd) + pd[pname] += length; + else + pd[pname] = length; + total[0] += length; if(pname.indexOf("suspend") >= 0) - total[tidx] += pd[pname]; + total[tidx] += length; else - total[tidx+1] += pd[pname]; + total[tidx+1] += length; } } var devname = deviceTitle(this.title, total, cpu); @@ -5262,7 +5290,7 @@ def addScriptCode(hf, testruns): phases[i].style.left = left+"%"; phases[i].title = phases[i].id+" "+pd[phases[i].id]+" ms"; left += w; - var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id]+" ms<br></t4>"; + var time = "<t4 style=\"font-size:"+fs+"px\">"+pd[phases[i].id].toFixed(3)+" ms<br></t4>"; var pname = "<t3 style=\"font-size:"+fs2+"px\">"+phases[i].id.replace(new RegExp("_", "g"), " ")+"</t3>"; phases[i].innerHTML = time+pname; } else { @@ -6742,6 +6770,7 @@ def printHelp(): ' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\ ' -wifitrace Trace kernel execution through wifi reconnect.\n'\ ' -netfix Use netfix to reset the network in the event it fails to resume.\n'\ + ' -debugtiming Add timestamp to each printed line\n'\ ' [testprep]\n'\ ' -sync Sync the filesystems before starting the test\n'\ ' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\ @@ -7047,7 +7076,6 @@ if __name__ == '__main__': except: doError('No result file supplied', True) sysvals.result = val - sysvals.signalHandlerInit() else: doError('Invalid argument: '+arg, True) @@ -7057,6 +7085,7 @@ if __name__ == '__main__': if(sysvals.usecallgraph and sysvals.useprocmon): doError('-proc is not compatible with -f') + sysvals.signalHandlerInit() if sysvals.usecallgraph and sysvals.cgskip: sysvals.vprint('Using cgskip file: %s' % sysvals.cgskip) sysvals.setCallgraphBlacklist(sysvals.cgskip) diff --git a/tools/sched_ext/include/scx/common.bpf.h b/tools/sched_ext/include/scx/common.bpf.h index f538c75db183..2f36b7b6418d 100644 --- a/tools/sched_ext/include/scx/common.bpf.h +++ b/tools/sched_ext/include/scx/common.bpf.h @@ -7,7 +7,13 @@ #ifndef __SCX_COMMON_BPF_H #define __SCX_COMMON_BPF_H +#ifdef LSP +#define __bpf__ +#include "../vmlinux/vmlinux.h" +#else #include "vmlinux.h" +#endif + #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <asm-generic/errno.h> @@ -30,15 +36,15 @@ static inline void ___vmlinux_h_sanity_check___(void) s32 scx_bpf_create_dsq(u64 dsq_id, s32 node) __ksym; s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *is_idle) __ksym; -void scx_bpf_dispatch(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym; -void scx_bpf_dispatch_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym; +void scx_bpf_dsq_insert(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; +void scx_bpf_dsq_insert_vtime(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; u32 scx_bpf_dispatch_nr_slots(void) __ksym; void scx_bpf_dispatch_cancel(void) __ksym; -bool scx_bpf_consume(u64 dsq_id) __ksym; -void scx_bpf_dispatch_from_dsq_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; -void scx_bpf_dispatch_from_dsq_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; -bool scx_bpf_dispatch_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; -bool scx_bpf_dispatch_vtime_from_dsq(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dsq_move_to_local(u64 dsq_id) __ksym; +void scx_bpf_dsq_move_set_slice(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym; +void scx_bpf_dsq_move_set_vtime(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym; +bool scx_bpf_dsq_move(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dsq_move_vtime(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; u32 scx_bpf_reenqueue_local(void) __ksym; void scx_bpf_kick_cpu(s32 cpu, u64 flags) __ksym; s32 scx_bpf_dsq_nr_queued(u64 dsq_id) __ksym; @@ -65,11 +71,11 @@ s32 scx_bpf_pick_any_cpu(const cpumask_t *cpus_allowed, u64 flags) __ksym; bool scx_bpf_task_running(const struct task_struct *p) __ksym; s32 scx_bpf_task_cpu(const struct task_struct *p) __ksym; struct rq *scx_bpf_cpu_rq(s32 cpu) __ksym; -struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym; +struct cgroup *scx_bpf_task_cgroup(struct task_struct *p) __ksym __weak; /* - * Use the following as @it__iter when calling - * scx_bpf_dispatch[_vtime]_from_dsq() from within bpf_for_each() loops. + * Use the following as @it__iter when calling scx_bpf_dsq_move[_vtime]() from + * within bpf_for_each() loops. */ #define BPF_FOR_EACH_ITER (&___it) @@ -309,6 +315,15 @@ void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym u32 bpf_cpumask_any_distribute(const struct cpumask *cpumask) __ksym; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; + +/* + * Access a cpumask in read-only mode (typically to check bits). + */ +static __always_inline const struct cpumask *cast_mask(struct bpf_cpumask *mask) +{ + return (const struct cpumask *)mask; +} /* rcu */ void bpf_rcu_read_lock(void) __ksym; diff --git a/tools/sched_ext/include/scx/compat.bpf.h b/tools/sched_ext/include/scx/compat.bpf.h index 3d2fe1208900..d56520100a26 100644 --- a/tools/sched_ext/include/scx/compat.bpf.h +++ b/tools/sched_ext/include/scx/compat.bpf.h @@ -15,6 +15,116 @@ __ret; \ }) +/* v6.12: 819513666966 ("sched_ext: Add cgroup support") */ +#define __COMPAT_scx_bpf_task_cgroup(p) \ + (bpf_ksym_exists(scx_bpf_task_cgroup) ? \ + scx_bpf_task_cgroup((p)) : NULL) + +/* + * v6.13: The verb `dispatch` was too overloaded and confusing. kfuncs are + * renamed to unload the verb. + * + * Build error is triggered if old names are used. New binaries work with both + * new and old names. The compat macros will be removed on v6.15 release. + * + * scx_bpf_dispatch_from_dsq() and friends were added during v6.12 by + * 4c30f5ce4f7a ("sched_ext: Implement scx_bpf_dispatch[_vtime]_from_dsq()"). + * Preserve __COMPAT macros until v6.15. + */ +void scx_bpf_dispatch___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 enq_flags) __ksym __weak; +void scx_bpf_dispatch_vtime___compat(struct task_struct *p, u64 dsq_id, u64 slice, u64 vtime, u64 enq_flags) __ksym __weak; +bool scx_bpf_consume___compat(u64 dsq_id) __ksym __weak; +void scx_bpf_dispatch_from_dsq_set_slice___compat(struct bpf_iter_scx_dsq *it__iter, u64 slice) __ksym __weak; +void scx_bpf_dispatch_from_dsq_set_vtime___compat(struct bpf_iter_scx_dsq *it__iter, u64 vtime) __ksym __weak; +bool scx_bpf_dispatch_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; +bool scx_bpf_dispatch_vtime_from_dsq___compat(struct bpf_iter_scx_dsq *it__iter, struct task_struct *p, u64 dsq_id, u64 enq_flags) __ksym __weak; + +#define scx_bpf_dsq_insert(p, dsq_id, slice, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_insert) ? \ + scx_bpf_dsq_insert((p), (dsq_id), (slice), (enq_flags)) : \ + scx_bpf_dispatch___compat((p), (dsq_id), (slice), (enq_flags))) + +#define scx_bpf_dsq_insert_vtime(p, dsq_id, slice, vtime, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_insert_vtime) ? \ + scx_bpf_dsq_insert_vtime((p), (dsq_id), (slice), (vtime), (enq_flags)) : \ + scx_bpf_dispatch_vtime___compat((p), (dsq_id), (slice), (vtime), (enq_flags))) + +#define scx_bpf_dsq_move_to_local(dsq_id) \ + (bpf_ksym_exists(scx_bpf_dsq_move_to_local) ? \ + scx_bpf_dsq_move_to_local((dsq_id)) : \ + scx_bpf_consume___compat((dsq_id))) + +#define __COMPAT_scx_bpf_dsq_move_set_slice(it__iter, slice) \ + (bpf_ksym_exists(scx_bpf_dsq_move_set_slice) ? \ + scx_bpf_dsq_move_set_slice((it__iter), (slice)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_slice___compat) ? \ + scx_bpf_dispatch_from_dsq_set_slice___compat((it__iter), (slice)) : \ + (void)0)) + +#define __COMPAT_scx_bpf_dsq_move_set_vtime(it__iter, vtime) \ + (bpf_ksym_exists(scx_bpf_dsq_move_set_vtime) ? \ + scx_bpf_dsq_move_set_vtime((it__iter), (vtime)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq_set_vtime___compat) ? \ + scx_bpf_dispatch_from_dsq_set_vtime___compat((it__iter), (vtime)) : \ + (void) 0)) + +#define __COMPAT_scx_bpf_dsq_move(it__iter, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_move) ? \ + scx_bpf_dsq_move((it__iter), (p), (dsq_id), (enq_flags)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_from_dsq___compat) ? \ + scx_bpf_dispatch_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \ + false)) + +#define __COMPAT_scx_bpf_dsq_move_vtime(it__iter, p, dsq_id, enq_flags) \ + (bpf_ksym_exists(scx_bpf_dsq_move_vtime) ? \ + scx_bpf_dsq_move_vtime((it__iter), (p), (dsq_id), (enq_flags)) : \ + (bpf_ksym_exists(scx_bpf_dispatch_vtime_from_dsq___compat) ? \ + scx_bpf_dispatch_vtime_from_dsq___compat((it__iter), (p), (dsq_id), (enq_flags)) : \ + false)) + +#define scx_bpf_dispatch(p, dsq_id, slice, enq_flags) \ + _Static_assert(false, "scx_bpf_dispatch() renamed to scx_bpf_dsq_insert()") + +#define scx_bpf_dispatch_vtime(p, dsq_id, slice, vtime, enq_flags) \ + _Static_assert(false, "scx_bpf_dispatch_vtime() renamed to scx_bpf_dsq_insert_vtime()") + +#define scx_bpf_consume(dsq_id) ({ \ + _Static_assert(false, "scx_bpf_consume() renamed to scx_bpf_dsq_move_to_local()"); \ + false; \ +}) + +#define scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_slice() renamed to scx_bpf_dsq_move_set_slice()") + +#define scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq_set_vtime() renamed to scx_bpf_dsq_move_set_vtime()") + +#define scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "scx_bpf_dispatch_from_dsq() renamed to scx_bpf_dsq_move()"); \ + false; \ +}) + +#define scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "scx_bpf_dispatch_vtime_from_dsq() renamed to scx_bpf_dsq_move_vtime()"); \ + false; \ +}) + +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_slice(it__iter, slice) \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_slice() renamed to __COMPAT_scx_bpf_dsq_move_set_slice()") + +#define __COMPAT_scx_bpf_dispatch_from_dsq_set_vtime(it__iter, vtime) \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq_set_vtime() renamed to __COMPAT_scx_bpf_dsq_move_set_vtime()") + +#define __COMPAT_scx_bpf_dispatch_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move()"); \ + false; \ +}) + +#define __COMPAT_scx_bpf_dispatch_vtime_from_dsq(it__iter, p, dsq_id, enq_flags) ({ \ + _Static_assert(false, "__COMPAT_scx_bpf_dispatch_vtime_from_dsq() renamed to __COMPAT_scx_bpf_dsq_move_vtime()"); \ + false; \ +}) + /* * Define sched_ext_ops. This may be expanded to define multiple variants for * backward compatibility. See compat.h::SCX_OPS_LOAD/ATTACH(). diff --git a/tools/sched_ext/include/scx/user_exit_info.h b/tools/sched_ext/include/scx/user_exit_info.h index 891693ee604e..8ce2734402e1 100644 --- a/tools/sched_ext/include/scx/user_exit_info.h +++ b/tools/sched_ext/include/scx/user_exit_info.h @@ -25,7 +25,11 @@ struct user_exit_info { #ifdef __bpf__ +#ifdef LSP +#include "../vmlinux/vmlinux.h" +#else #include "vmlinux.h" +#endif #include <bpf/bpf_core_read.h> #define UEI_DEFINE(__name) \ diff --git a/tools/sched_ext/scx_central.bpf.c b/tools/sched_ext/scx_central.bpf.c index 8dd8eb73b6b8..e6fad6211f6c 100644 --- a/tools/sched_ext/scx_central.bpf.c +++ b/tools/sched_ext/scx_central.bpf.c @@ -118,14 +118,14 @@ void BPF_STRUCT_OPS(central_enqueue, struct task_struct *p, u64 enq_flags) */ if ((p->flags & PF_KTHREAD) && p->nr_cpus_allowed == 1) { __sync_fetch_and_add(&nr_locals, 1); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, - enq_flags | SCX_ENQ_PREEMPT); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_INF, + enq_flags | SCX_ENQ_PREEMPT); return; } if (bpf_map_push_elem(¢ral_q, &pid, 0)) { __sync_fetch_and_add(&nr_overflows, 1); - scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, enq_flags); + scx_bpf_dsq_insert(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, enq_flags); return; } @@ -158,7 +158,7 @@ static bool dispatch_to_cpu(s32 cpu) */ if (!bpf_cpumask_test_cpu(cpu, p->cpus_ptr)) { __sync_fetch_and_add(&nr_mismatches, 1); - scx_bpf_dispatch(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, 0); + scx_bpf_dsq_insert(p, FALLBACK_DSQ_ID, SCX_SLICE_INF, 0); bpf_task_release(p); /* * We might run out of dispatch buffer slots if we continue dispatching @@ -172,7 +172,7 @@ static bool dispatch_to_cpu(s32 cpu) } /* dispatch to local and mark that @cpu doesn't need more */ - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, SCX_SLICE_INF, 0); if (cpu != central_cpu) scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); @@ -219,13 +219,13 @@ void BPF_STRUCT_OPS(central_dispatch, s32 cpu, struct task_struct *prev) } /* look for a task to run on the central CPU */ - if (scx_bpf_consume(FALLBACK_DSQ_ID)) + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ_ID)) return; dispatch_to_cpu(central_cpu); } else { bool *gimme; - if (scx_bpf_consume(FALLBACK_DSQ_ID)) + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ_ID)) return; gimme = ARRAY_ELEM_PTR(cpu_gimme_task, cpu, nr_cpu_ids); diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 3ab2b60781a0..4e3afcd260bf 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -49,7 +49,10 @@ /* * Maximum amount of retries to find a valid cgroup. */ -#define CGROUP_MAX_RETRIES 1024 +enum { + FALLBACK_DSQ = 0, + CGROUP_MAX_RETRIES = 1024, +}; char _license[] SEC("license") = "GPL"; @@ -225,7 +228,7 @@ static void cgrp_refresh_hweight(struct cgroup *cgrp, struct fcg_cgrp_ctx *cgc) break; /* - * We can be oppotunistic here and not grab the + * We can be opportunistic here and not grab the * cgv_tree_lock and deal with the occasional races. * However, hweight updates are already cached and * relatively low-frequency. Let's just do the @@ -258,8 +261,7 @@ static void cgrp_cap_budget(struct cgv_node *cgv_node, struct fcg_cgrp_ctx *cgc) * and thus can't be updated and repositioned. Instead, we collect the * vtime deltas separately and apply it asynchronously here. */ - delta = cgc->cvtime_delta; - __sync_fetch_and_sub(&cgc->cvtime_delta, delta); + delta = __sync_fetch_and_sub(&cgc->cvtime_delta, cgc->cvtime_delta); cvtime = cgv_node->cvtime + delta; /* @@ -339,7 +341,7 @@ s32 BPF_STRUCT_OPS(fcg_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake if (is_idle) { set_bypassed_at(p, taskc); stat_inc(FCG_STAT_LOCAL); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); } return cpu; @@ -375,21 +377,23 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) */ if (p->nr_cpus_allowed == 1 && (p->flags & PF_KTHREAD)) { stat_inc(FCG_STAT_LOCAL); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, + enq_flags); } else { stat_inc(FCG_STAT_GLOBAL); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, FALLBACK_DSQ, SCX_SLICE_DFL, + enq_flags); } return; } - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (!cgc) goto out_release; if (fifo_sched) { - scx_bpf_dispatch(p, cgrp->kn->id, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, cgrp->kn->id, SCX_SLICE_DFL, enq_flags); } else { u64 tvtime = p->scx.dsq_vtime; @@ -400,8 +404,8 @@ void BPF_STRUCT_OPS(fcg_enqueue, struct task_struct *p, u64 enq_flags) if (vtime_before(tvtime, cgc->tvtime_now - SCX_SLICE_DFL)) tvtime = cgc->tvtime_now - SCX_SLICE_DFL; - scx_bpf_dispatch_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, - tvtime, enq_flags); + scx_bpf_dsq_insert_vtime(p, cgrp->kn->id, SCX_SLICE_DFL, + tvtime, enq_flags); } cgrp_enqueued(cgrp, cgc); @@ -509,7 +513,7 @@ void BPF_STRUCT_OPS(fcg_runnable, struct task_struct *p, u64 enq_flags) { struct cgroup *cgrp; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); update_active_weight_sums(cgrp, true); bpf_cgroup_release(cgrp); } @@ -522,7 +526,7 @@ void BPF_STRUCT_OPS(fcg_running, struct task_struct *p) if (fifo_sched) return; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (cgc) { /* @@ -565,7 +569,7 @@ void BPF_STRUCT_OPS(fcg_stopping, struct task_struct *p, bool runnable) if (!taskc->bypassed_at) return; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); cgc = find_cgrp_ctx(cgrp); if (cgc) { __sync_fetch_and_add(&cgc->cvtime_delta, @@ -579,7 +583,7 @@ void BPF_STRUCT_OPS(fcg_quiescent, struct task_struct *p, u64 deq_flags) { struct cgroup *cgrp; - cgrp = scx_bpf_task_cgroup(p); + cgrp = __COMPAT_scx_bpf_task_cgroup(p); update_active_weight_sums(cgrp, false); bpf_cgroup_release(cgrp); } @@ -661,7 +665,7 @@ static bool try_pick_next_cgroup(u64 *cgidp) goto out_free; } - if (!scx_bpf_consume(cgid)) { + if (!scx_bpf_dsq_move_to_local(cgid)) { bpf_cgroup_release(cgrp); stat_inc(FCG_STAT_PNC_EMPTY); goto out_stash; @@ -741,7 +745,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) goto pick_next_cgroup; if (vtime_before(now, cpuc->cur_at + cgrp_slice_ns)) { - if (scx_bpf_consume(cpuc->cur_cgid)) { + if (scx_bpf_dsq_move_to_local(cpuc->cur_cgid)) { stat_inc(FCG_STAT_CNS_KEEP); return; } @@ -781,7 +785,7 @@ void BPF_STRUCT_OPS(fcg_dispatch, s32 cpu, struct task_struct *prev) pick_next_cgroup: cpuc->cur_at = now; - if (scx_bpf_consume(SCX_DSQ_GLOBAL)) { + if (scx_bpf_dsq_move_to_local(FALLBACK_DSQ)) { cpuc->cur_cgid = 0; return; } @@ -838,7 +842,7 @@ int BPF_STRUCT_OPS_SLEEPABLE(fcg_cgroup_init, struct cgroup *cgrp, int ret; /* - * Technically incorrect as cgroup ID is full 64bit while dq ID is + * Technically incorrect as cgroup ID is full 64bit while dsq ID is * 63bit. Should not be a problem in practice and easy to spot in the * unlikely case that it breaks. */ @@ -926,6 +930,11 @@ void BPF_STRUCT_OPS(fcg_cgroup_move, struct task_struct *p, p->scx.dsq_vtime = to_cgc->tvtime_now + vtime_delta; } +s32 BPF_STRUCT_OPS_SLEEPABLE(fcg_init) +{ + return scx_bpf_create_dsq(FALLBACK_DSQ, -1); +} + void BPF_STRUCT_OPS(fcg_exit, struct scx_exit_info *ei) { UEI_RECORD(uei, ei); @@ -944,6 +953,7 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_init = (void *)fcg_cgroup_init, .cgroup_exit = (void *)fcg_cgroup_exit, .cgroup_move = (void *)fcg_cgroup_move, + .init = (void *)fcg_init, .exit = (void *)fcg_exit, .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, .name = "flatcg"); diff --git a/tools/sched_ext/scx_qmap.bpf.c b/tools/sched_ext/scx_qmap.bpf.c index 83c8f54c1e31..ee264947e0c3 100644 --- a/tools/sched_ext/scx_qmap.bpf.c +++ b/tools/sched_ext/scx_qmap.bpf.c @@ -226,15 +226,15 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) */ if (tctx->force_local) { tctx->force_local = false; - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, enq_flags); return; } - /* if !WAKEUP, select_cpu() wasn't called, try direct dispatch */ - if (!(enq_flags & SCX_ENQ_WAKEUP) && + /* if select_cpu() wasn't called, try direct dispatch */ + if (!(enq_flags & SCX_ENQ_CPU_SELECTED) && (cpu = pick_direct_dispatch_cpu(p, scx_bpf_task_cpu(p))) >= 0) { __sync_fetch_and_add(&nr_ddsp_from_enq, 1); - scx_bpf_dispatch(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL_ON | cpu, slice_ns, enq_flags); return; } @@ -247,7 +247,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) if (enq_flags & SCX_ENQ_REENQ) { s32 cpu; - scx_bpf_dispatch(p, SHARED_DSQ, 0, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, 0, enq_flags); cpu = scx_bpf_pick_idle_cpu(p->cpus_ptr, 0); if (cpu >= 0) scx_bpf_kick_cpu(cpu, SCX_KICK_IDLE); @@ -262,7 +262,7 @@ void BPF_STRUCT_OPS(qmap_enqueue, struct task_struct *p, u64 enq_flags) /* Queue on the selected FIFO. If the FIFO overflows, punt to global. */ if (bpf_map_push_elem(ring, &pid, 0)) { - scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, slice_ns, enq_flags); return; } @@ -294,10 +294,10 @@ static void update_core_sched_head_seq(struct task_struct *p) } /* - * To demonstrate the use of scx_bpf_dispatch_from_dsq(), implement silly - * selective priority boosting mechanism by scanning SHARED_DSQ looking for - * highpri tasks, moving them to HIGHPRI_DSQ and then consuming them first. This - * makes minor difference only when dsp_batch is larger than 1. + * To demonstrate the use of scx_bpf_dsq_move(), implement silly selective + * priority boosting mechanism by scanning SHARED_DSQ looking for highpri tasks, + * moving them to HIGHPRI_DSQ and then consuming them first. This makes minor + * difference only when dsp_batch is larger than 1. * * scx_bpf_dispatch[_vtime]_from_dsq() are allowed both from ops.dispatch() and * non-rq-lock holding BPF programs. As demonstration, this function is called @@ -318,11 +318,11 @@ static bool dispatch_highpri(bool from_timer) if (tctx->highpri) { /* exercise the set_*() and vtime interface too */ - scx_bpf_dispatch_from_dsq_set_slice( + __COMPAT_scx_bpf_dsq_move_set_slice( BPF_FOR_EACH_ITER, slice_ns * 2); - scx_bpf_dispatch_from_dsq_set_vtime( + __COMPAT_scx_bpf_dsq_move_set_vtime( BPF_FOR_EACH_ITER, highpri_seq++); - scx_bpf_dispatch_vtime_from_dsq( + __COMPAT_scx_bpf_dsq_move_vtime( BPF_FOR_EACH_ITER, p, HIGHPRI_DSQ, 0); } } @@ -340,7 +340,7 @@ static bool dispatch_highpri(bool from_timer) else cpu = scx_bpf_pick_any_cpu(p->cpus_ptr, 0); - if (scx_bpf_dispatch_from_dsq(BPF_FOR_EACH_ITER, p, + if (__COMPAT_scx_bpf_dsq_move(BPF_FOR_EACH_ITER, p, SCX_DSQ_LOCAL_ON | cpu, SCX_ENQ_PREEMPT)) { if (cpu == this_cpu) { @@ -374,7 +374,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) if (dispatch_highpri(false)) return; - if (!nr_highpri_queued && scx_bpf_consume(SHARED_DSQ)) + if (!nr_highpri_queued && scx_bpf_dsq_move_to_local(SHARED_DSQ)) return; if (dsp_inf_loop_after && nr_dispatched > dsp_inf_loop_after) { @@ -385,7 +385,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) */ p = bpf_task_from_pid(2); if (p) { - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, slice_ns, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, slice_ns, 0); bpf_task_release(p); return; } @@ -431,7 +431,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) update_core_sched_head_seq(p); __sync_fetch_and_add(&nr_dispatched, 1); - scx_bpf_dispatch(p, SHARED_DSQ, slice_ns, 0); + scx_bpf_dsq_insert(p, SHARED_DSQ, slice_ns, 0); bpf_task_release(p); batch--; @@ -439,7 +439,7 @@ void BPF_STRUCT_OPS(qmap_dispatch, s32 cpu, struct task_struct *prev) if (!batch || !scx_bpf_dispatch_nr_slots()) { if (dispatch_highpri(false)) return; - scx_bpf_consume(SHARED_DSQ); + scx_bpf_dsq_move_to_local(SHARED_DSQ); return; } if (!cpuc->dsp_cnt) diff --git a/tools/sched_ext/scx_show_state.py b/tools/sched_ext/scx_show_state.py index 8bc626ede1c4..b800d4f5f2e9 100644 --- a/tools/sched_ext/scx_show_state.py +++ b/tools/sched_ext/scx_show_state.py @@ -35,6 +35,8 @@ print(f'enabled : {read_static_key("__scx_ops_enabled")}') print(f'switching_all : {read_int("scx_switching_all")}') print(f'switched_all : {read_static_key("__scx_switched_all")}') print(f'enable_state : {ops_state_str(enable_state)} ({enable_state})') -print(f'bypass_depth : {read_atomic("scx_ops_bypass_depth")}') +print(f'in_softlockup : {prog["scx_in_softlockup"].value_()}') +print(f'breather_depth: {read_atomic("scx_ops_breather_depth")}') +print(f'bypass_depth : {prog["scx_ops_bypass_depth"].value_()}') print(f'nr_rejected : {read_atomic("scx_nr_rejected")}') print(f'enable_seq : {read_atomic("scx_enable_seq")}') diff --git a/tools/sched_ext/scx_simple.bpf.c b/tools/sched_ext/scx_simple.bpf.c index ed7e8d535fc5..31f915b286c6 100644 --- a/tools/sched_ext/scx_simple.bpf.c +++ b/tools/sched_ext/scx_simple.bpf.c @@ -31,10 +31,10 @@ UEI_DEFINE(uei); /* * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues - * (meaning, cannot be dispatched to with scx_bpf_dispatch_vtime()). We + * (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We * therefore create a separate DSQ with ID 0 that we dispatch to and consume - * from. If scx_simple only supported global FIFO scheduling, then we could - * just use SCX_DSQ_GLOBAL. + * from. If scx_simple only supported global FIFO scheduling, then we could just + * use SCX_DSQ_GLOBAL. */ #define SHARED_DSQ 0 @@ -65,7 +65,7 @@ s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 w cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle); if (is_idle) { stat_inc(0); /* count local queueing */ - scx_bpf_dispatch(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); + scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0); } return cpu; @@ -76,7 +76,7 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) stat_inc(1); /* count global queueing */ if (fifo_sched) { - scx_bpf_dispatch(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); + scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags); } else { u64 vtime = p->scx.dsq_vtime; @@ -87,14 +87,14 @@ void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags) if (vtime_before(vtime, vtime_now - SCX_SLICE_DFL)) vtime = vtime_now - SCX_SLICE_DFL; - scx_bpf_dispatch_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, - enq_flags); + scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime, + enq_flags); } } void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev) { - scx_bpf_consume(SHARED_DSQ); + scx_bpf_dsq_move_to_local(SHARED_DSQ); } void BPF_STRUCT_OPS(simple_running, struct task_struct *p) diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c index 9179942d7f15..f2135d619a0b 100644 --- a/tools/spi/spidev_test.c +++ b/tools/spi/spidev_test.c @@ -42,6 +42,7 @@ static char *input_file; static char *output_file; static uint32_t speed = 500000; static uint16_t delay; +static uint16_t word_delay; static int verbose; static int transfer_size; static int iterations; @@ -124,6 +125,7 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) .rx_buf = (unsigned long)rx, .len = len, .delay_usecs = delay, + .word_delay_usecs = word_delay, .speed_hz = speed, .bits_per_word = bits, }; @@ -172,11 +174,12 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len) static void print_usage(const char *prog) { - printf("Usage: %s [-2348CDFHILMNORSZbdilopsv]\n", prog); + printf("Usage: %s [-2348CDFHILMNORSZbdilopsvw]\n", prog); puts("general device settings:\n" " -D --device device to use (default /dev/spidev1.1)\n" " -s --speed max speed (Hz)\n" " -d --delay delay (usec)\n" + " -w --word-delay word delay (usec)\n" " -l --loop loopback\n" "spi mode:\n" " -H --cpha clock phase\n" @@ -213,6 +216,7 @@ static void parse_opts(int argc, char *argv[]) { "device", 1, 0, 'D' }, { "speed", 1, 0, 's' }, { "delay", 1, 0, 'd' }, + { "word-delay", 1, 0, 'w' }, { "loop", 0, 0, 'l' }, { "cpha", 0, 0, 'H' }, { "cpol", 0, 0, 'O' }, @@ -237,7 +241,7 @@ static void parse_opts(int argc, char *argv[]) }; int c; - c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3ZFMNR248p:vS:I:", + c = getopt_long(argc, argv, "D:s:d:w:b:i:o:lHOLC3ZFMNR248p:vS:I:", lopts, NULL); if (c == -1) @@ -253,6 +257,9 @@ static void parse_opts(int argc, char *argv[]) case 'd': delay = atoi(optarg); break; + case 'w': + word_delay = atoi(optarg); + break; case 'b': bits = atoi(optarg); break; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index 90d5afd52dd0..050725afa45d 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -693,26 +693,22 @@ static int mock_decoder_commit(struct cxl_decoder *cxld) return 0; } -static int mock_decoder_reset(struct cxl_decoder *cxld) +static void mock_decoder_reset(struct cxl_decoder *cxld) { struct cxl_port *port = to_cxl_port(cxld->dev.parent); int id = cxld->id; if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) - return 0; + return; dev_dbg(&port->dev, "%s reset\n", dev_name(&cxld->dev)); - if (port->commit_end != id) { + if (port->commit_end == id) + cxl_port_commit_reap(cxld); + else dev_dbg(&port->dev, "%s: out of order reset, expected decoder%d.%d\n", dev_name(&cxld->dev), port->id, port->commit_end); - return -EBUSY; - } - - port->commit_end--; cxld->flags &= ~CXL_DECODER_F_ENABLE; - - return 0; } static void default_mock_decoder(struct cxl_decoder *cxld) @@ -1062,7 +1058,7 @@ static void mock_companion(struct acpi_device *adev, struct device *dev) #define SZ_64G (SZ_32G * 2) #endif -static __init int cxl_rch_init(void) +static __init int cxl_rch_topo_init(void) { int rc, i; @@ -1090,30 +1086,8 @@ static __init int cxl_rch_init(void) goto err_bridge; } - for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { - int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; - struct platform_device *rch = cxl_rch[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_rcd", idx); - if (!pdev) - goto err_mem; - pdev->dev.parent = &rch->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_rcd[i] = pdev; - } - return 0; -err_mem: - for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) - platform_device_unregister(cxl_rcd[i]); err_bridge: for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { struct platform_device *pdev = cxl_rch[i]; @@ -1127,12 +1101,10 @@ err_bridge: return rc; } -static void cxl_rch_exit(void) +static void cxl_rch_topo_exit(void) { int i; - for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) - platform_device_unregister(cxl_rcd[i]); for (i = ARRAY_SIZE(cxl_rch) - 1; i >= 0; i--) { struct platform_device *pdev = cxl_rch[i]; @@ -1143,7 +1115,7 @@ static void cxl_rch_exit(void) } } -static __init int cxl_single_init(void) +static __init int cxl_single_topo_init(void) { int i, rc; @@ -1228,29 +1200,8 @@ static __init int cxl_single_init(void) cxl_swd_single[i] = pdev; } - for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { - struct platform_device *dport = cxl_swd_single[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); - if (!pdev) - goto err_mem; - pdev->dev.parent = &dport->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_mem_single[i] = pdev; - } - return 0; -err_mem: - for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem_single[i]); err_dport: for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) platform_device_unregister(cxl_swd_single[i]); @@ -1273,12 +1224,10 @@ err_bridge: return rc; } -static void cxl_single_exit(void) +static void cxl_single_topo_exit(void) { int i; - for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem_single[i]); for (i = ARRAY_SIZE(cxl_swd_single) - 1; i >= 0; i--) platform_device_unregister(cxl_swd_single[i]); for (i = ARRAY_SIZE(cxl_swu_single) - 1; i >= 0; i--) @@ -1295,6 +1244,91 @@ static void cxl_single_exit(void) } } +static void cxl_mem_exit(void) +{ + int i; + + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); +} + +static int cxl_mem_init(void) +{ + int i, rc; + + for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { + struct platform_device *dport = cxl_switch_dport[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", i); + if (!pdev) + goto err_mem; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_mem; + } + cxl_mem[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_mem_single); i++) { + struct platform_device *dport = cxl_swd_single[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_mem", NR_MEM_MULTI + i); + if (!pdev) + goto err_single; + pdev->dev.parent = &dport->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_single; + } + cxl_mem_single[i] = pdev; + } + + for (i = 0; i < ARRAY_SIZE(cxl_rcd); i++) { + int idx = NR_MEM_MULTI + NR_MEM_SINGLE + i; + struct platform_device *rch = cxl_rch[i]; + struct platform_device *pdev; + + pdev = platform_device_alloc("cxl_rcd", idx); + if (!pdev) + goto err_rcd; + pdev->dev.parent = &rch->dev; + set_dev_node(&pdev->dev, i % 2); + + rc = platform_device_add(pdev); + if (rc) { + platform_device_put(pdev); + goto err_rcd; + } + cxl_rcd[i] = pdev; + } + + return 0; + +err_rcd: + for (i = ARRAY_SIZE(cxl_rcd) - 1; i >= 0; i--) + platform_device_unregister(cxl_rcd[i]); +err_single: + for (i = ARRAY_SIZE(cxl_mem_single) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem_single[i]); +err_mem: + for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) + platform_device_unregister(cxl_mem[i]); + return rc; +} + static __init int cxl_test_init(void) { int rc, i; @@ -1407,29 +1441,11 @@ static __init int cxl_test_init(void) cxl_switch_dport[i] = pdev; } - for (i = 0; i < ARRAY_SIZE(cxl_mem); i++) { - struct platform_device *dport = cxl_switch_dport[i]; - struct platform_device *pdev; - - pdev = platform_device_alloc("cxl_mem", i); - if (!pdev) - goto err_mem; - pdev->dev.parent = &dport->dev; - set_dev_node(&pdev->dev, i % 2); - - rc = platform_device_add(pdev); - if (rc) { - platform_device_put(pdev); - goto err_mem; - } - cxl_mem[i] = pdev; - } - - rc = cxl_single_init(); + rc = cxl_single_topo_init(); if (rc) - goto err_mem; + goto err_dport; - rc = cxl_rch_init(); + rc = cxl_rch_topo_init(); if (rc) goto err_single; @@ -1442,19 +1458,20 @@ static __init int cxl_test_init(void) rc = platform_device_add(cxl_acpi); if (rc) - goto err_add; + goto err_root; + + rc = cxl_mem_init(); + if (rc) + goto err_root; return 0; -err_add: +err_root: platform_device_put(cxl_acpi); err_rch: - cxl_rch_exit(); + cxl_rch_topo_exit(); err_single: - cxl_single_exit(); -err_mem: - for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem[i]); + cxl_single_topo_exit(); err_dport: for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) platform_device_unregister(cxl_switch_dport[i]); @@ -1486,11 +1503,10 @@ static __exit void cxl_test_exit(void) { int i; + cxl_mem_exit(); platform_device_unregister(cxl_acpi); - cxl_rch_exit(); - cxl_single_exit(); - for (i = ARRAY_SIZE(cxl_mem) - 1; i >= 0; i--) - platform_device_unregister(cxl_mem[i]); + cxl_rch_topo_exit(); + cxl_single_topo_exit(); for (i = ARRAY_SIZE(cxl_switch_dport) - 1; i >= 0; i--) platform_device_unregister(cxl_switch_dport[i]); for (i = ARRAY_SIZE(cxl_switch_uport) - 1; i >= 0; i--) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index ccdd6a504222..71916e0e1546 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -9,7 +9,7 @@ #include <linux/sizes.h> #include <linux/bits.h> #include <cxl/mailbox.h> -#include <asm/unaligned.h> +#include <linux/unaligned.h> #include <crypto/sha2.h> #include <cxlmem.h> @@ -1673,6 +1673,7 @@ static struct platform_driver cxl_mock_mem_driver = { .name = KBUILD_MODNAME, .dev_groups = cxl_mock_mem_groups, .groups = cxl_mock_mem_core_groups, + .probe_type = PROBE_PREFER_ASYNCHRONOUS, }, }; diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index bc74088c458a..676fa99a8b19 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -23,7 +23,7 @@ from typing import Iterable, List, Optional, Sequence, Tuple import kunit_json import kunit_kernel import kunit_parser -from kunit_printer import stdout +from kunit_printer import stdout, null_printer class KunitStatus(Enum): SUCCESS = auto() @@ -49,6 +49,8 @@ class KunitBuildRequest(KunitConfigRequest): class KunitParseRequest: raw_output: Optional[str] json: Optional[str] + summary: bool + failed: bool @dataclass class KunitExecRequest(KunitParseRequest): @@ -235,11 +237,18 @@ def parse_tests(request: KunitParseRequest, metadata: kunit_json.Metadata, input parse_time = time.time() - parse_start return KunitResult(KunitStatus.SUCCESS, parse_time), fake_test + default_printer = stdout + if request.summary or request.failed: + default_printer = null_printer # Actually parse the test results. - test = kunit_parser.parse_run_tests(input_data) + test = kunit_parser.parse_run_tests(input_data, default_printer) parse_time = time.time() - parse_start + if request.failed: + kunit_parser.print_test(test, request.failed, stdout) + kunit_parser.print_summary_line(test, stdout) + if request.json: json_str = kunit_json.get_json_result( test=test, @@ -413,6 +422,14 @@ def add_parse_opts(parser: argparse.ArgumentParser) -> None: help='Prints parsed test results as JSON to stdout or a file if ' 'a filename is specified. Does nothing if --raw_output is set.', type=str, const='stdout', default=None, metavar='FILE') + parser.add_argument('--summary', + help='Prints only the summary line for parsed test results.' + 'Does nothing if --raw_output is set.', + action='store_true') + parser.add_argument('--failed', + help='Prints only the failed parsed test results and summary line.' + 'Does nothing if --raw_output is set.', + action='store_true') def tree_from_args(cli_args: argparse.Namespace) -> kunit_kernel.LinuxSourceTree: @@ -448,6 +465,8 @@ def run_handler(cli_args: argparse.Namespace) -> None: jobs=cli_args.jobs, raw_output=cli_args.raw_output, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -495,6 +514,8 @@ def exec_handler(cli_args: argparse.Namespace) -> None: exec_request = KunitExecRequest(raw_output=cli_args.raw_output, build_dir=cli_args.build_dir, json=cli_args.json, + summary=cli_args.summary, + failed=cli_args.failed, timeout=cli_args.timeout, filter_glob=cli_args.filter_glob, filter=cli_args.filter, @@ -520,7 +541,8 @@ def parse_handler(cli_args: argparse.Namespace) -> None: # We know nothing about how the result was created! metadata = kunit_json.Metadata() request = KunitParseRequest(raw_output=cli_args.raw_output, - json=cli_args.json) + json=cli_args.json, summary=cli_args.summary, + failed=cli_args.failed) result, _ = parse_tests(request, metadata, kunit_output) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index 61931c4926fd..e76d7894b6c5 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -105,7 +105,9 @@ class LinuxSourceTreeOperationsQemu(LinuxSourceTreeOperations): self._kconfig = qemu_arch_params.kconfig self._qemu_arch = qemu_arch_params.qemu_arch self._kernel_path = qemu_arch_params.kernel_path - self._kernel_command_line = qemu_arch_params.kernel_command_line + ' kunit_shutdown=reboot' + self._kernel_command_line = qemu_arch_params.kernel_command_line + if 'kunit_shutdown=' not in self._kernel_command_line: + self._kernel_command_line += ' kunit_shutdown=reboot' self._extra_qemu_params = qemu_arch_params.extra_qemu_params self._serial = qemu_arch_params.serial diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index ce34be15c929..29fc27e8949b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -17,7 +17,7 @@ import textwrap from enum import Enum, auto from typing import Iterable, Iterator, List, Optional, Tuple -from kunit_printer import stdout +from kunit_printer import Printer, stdout class Test: """ @@ -54,10 +54,10 @@ class Test: """Returns string representation of a Test class object.""" return str(self) - def add_error(self, error_message: str) -> None: + def add_error(self, printer: Printer, error_message: str) -> None: """Records an error that occurred while parsing this test.""" self.counts.errors += 1 - stdout.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') + printer.print_with_timestamp(stdout.red('[ERROR]') + f' Test: {self.name}: {error_message}') def ok_status(self) -> bool: """Returns true if the status was ok, i.e. passed or skipped.""" @@ -251,7 +251,7 @@ KTAP_VERSIONS = [1] TAP_VERSIONS = [13, 14] def check_version(version_num: int, accepted_versions: List[int], - version_type: str, test: Test) -> None: + version_type: str, test: Test, printer: Printer) -> None: """ Adds error to test object if version number is too high or too low. @@ -263,13 +263,14 @@ def check_version(version_num: int, accepted_versions: List[int], version_type - 'KTAP' or 'TAP' depending on the type of version line. test - Test object for current test being parsed + printer - Printer object to output error """ if version_num < min(accepted_versions): - test.add_error(f'{version_type} version lower than expected!') + test.add_error(printer, f'{version_type} version lower than expected!') elif version_num > max(accepted_versions): - test.add_error(f'{version_type} version higer than expected!') + test.add_error(printer, f'{version_type} version higer than expected!') -def parse_ktap_header(lines: LineStream, test: Test) -> bool: +def parse_ktap_header(lines: LineStream, test: Test, printer: Printer) -> bool: """ Parses KTAP/TAP header line and checks version number. Returns False if fails to parse KTAP/TAP header line. @@ -281,6 +282,7 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: Parameters: lines - LineStream of KTAP output to parse test - Test object for current test being parsed + printer - Printer object to output results Return: True if successfully parsed KTAP/TAP header line @@ -289,10 +291,10 @@ def parse_ktap_header(lines: LineStream, test: Test) -> bool: tap_match = TAP_START.match(lines.peek()) if ktap_match: version_num = int(ktap_match.group(1)) - check_version(version_num, KTAP_VERSIONS, 'KTAP', test) + check_version(version_num, KTAP_VERSIONS, 'KTAP', test, printer) elif tap_match: version_num = int(tap_match.group(1)) - check_version(version_num, TAP_VERSIONS, 'TAP', test) + check_version(version_num, TAP_VERSIONS, 'TAP', test, printer) else: return False lines.pop() @@ -380,7 +382,7 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: return name == test.name def parse_test_result(lines: LineStream, test: Test, - expected_num: int) -> bool: + expected_num: int, printer: Printer) -> bool: """ Parses test result line and stores the status and name in the test object. Reports an error if the test number does not match expected @@ -398,6 +400,7 @@ def parse_test_result(lines: LineStream, test: Test, lines - LineStream of KTAP output to parse test - Test object for current test being parsed expected_num - expected test number for current test + printer - Printer object to output results Return: True if successfully parsed a test result line. @@ -420,7 +423,7 @@ def parse_test_result(lines: LineStream, test: Test, # Check test num num = int(match.group(2)) if num != expected_num: - test.add_error(f'Expected test number {expected_num} but found {num}') + test.add_error(printer, f'Expected test number {expected_num} but found {num}') # Set status of test object status = match.group(1) @@ -486,7 +489,7 @@ def format_test_divider(message: str, len_message: int) -> str: len_2 = difference - len_1 return ('=' * len_1) + f' {message} ' + ('=' * len_2) -def print_test_header(test: Test) -> None: +def print_test_header(test: Test, printer: Printer) -> None: """ Prints test header with test name and optionally the expected number of subtests. @@ -496,6 +499,7 @@ def print_test_header(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ message = test.name if message != "": @@ -507,15 +511,15 @@ def print_test_header(test: Test) -> None: message += '(1 subtest)' else: message += f'({test.expected_count} subtests)' - stdout.print_with_timestamp(format_test_divider(message, len(message))) + printer.print_with_timestamp(format_test_divider(message, len(message))) -def print_log(log: Iterable[str]) -> None: +def print_log(log: Iterable[str], printer: Printer) -> None: """Prints all strings in saved log for test in yellow.""" formatted = textwrap.dedent('\n'.join(log)) for line in formatted.splitlines(): - stdout.print_with_timestamp(stdout.yellow(line)) + printer.print_with_timestamp(printer.yellow(line)) -def format_test_result(test: Test) -> str: +def format_test_result(test: Test, printer: Printer) -> str: """ Returns string with formatted test result with colored status and test name. @@ -525,23 +529,24 @@ def format_test_result(test: Test) -> str: Parameters: test - Test object representing current test being printed + printer - Printer object to output results Return: String containing formatted test result """ if test.status == TestStatus.SUCCESS: - return stdout.green('[PASSED] ') + test.name + return printer.green('[PASSED] ') + test.name if test.status == TestStatus.SKIPPED: - return stdout.yellow('[SKIPPED] ') + test.name + return printer.yellow('[SKIPPED] ') + test.name if test.status == TestStatus.NO_TESTS: - return stdout.yellow('[NO TESTS RUN] ') + test.name + return printer.yellow('[NO TESTS RUN] ') + test.name if test.status == TestStatus.TEST_CRASHED: - print_log(test.log) + print_log(test.log, printer) return stdout.red('[CRASHED] ') + test.name - print_log(test.log) - return stdout.red('[FAILED] ') + test.name + print_log(test.log, printer) + return printer.red('[FAILED] ') + test.name -def print_test_result(test: Test) -> None: +def print_test_result(test: Test, printer: Printer) -> None: """ Prints result line with status of test. @@ -550,10 +555,11 @@ def print_test_result(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object """ - stdout.print_with_timestamp(format_test_result(test)) + printer.print_with_timestamp(format_test_result(test, printer)) -def print_test_footer(test: Test) -> None: +def print_test_footer(test: Test, printer: Printer) -> None: """ Prints test footer with status of test. @@ -562,12 +568,38 @@ def print_test_footer(test: Test) -> None: Parameters: test - Test object representing current test being printed + printer - Printer object to output results """ - message = format_test_result(test) - stdout.print_with_timestamp(format_test_divider(message, - len(message) - stdout.color_len())) + message = format_test_result(test, printer) + printer.print_with_timestamp(format_test_divider(message, + len(message) - printer.color_len())) +def print_test(test: Test, failed_only: bool, printer: Printer) -> None: + """ + Prints Test object to given printer. For a child test, the result line is + printed. For a parent test, the test header, all child test results, and + the test footer are all printed. If failed_only is true, only failed/crashed + tests will be printed. + Parameters: + test - Test object to print + failed_only - True if only failed/crashed tests should be printed. + printer - Printer object to output results + """ + if test.name == "main": + printer.print_with_timestamp(DIVIDER) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + printer.print_with_timestamp(DIVIDER) + elif test.subtests != []: + if not failed_only or not test.ok_status(): + print_test_header(test, printer) + for subtest in test.subtests: + print_test(subtest, failed_only, printer) + print_test_footer(test, printer) + else: + if not failed_only or not test.ok_status(): + print_test_result(test, printer) def _summarize_failed_tests(test: Test) -> str: """Tries to summarize all the failing subtests in `test`.""" @@ -601,7 +633,7 @@ def _summarize_failed_tests(test: Test) -> str: return 'Failures: ' + ', '.join(failures) -def print_summary_line(test: Test) -> None: +def print_summary_line(test: Test, printer: Printer) -> None: """ Prints summary line of test object. Color of line is dependent on status of test. Color is green if test passes, yellow if test is @@ -614,6 +646,7 @@ def print_summary_line(test: Test) -> None: Errors: 0" test - Test object representing current test being printed + printer - Printer object to output results """ if test.status == TestStatus.SUCCESS: color = stdout.green @@ -621,7 +654,7 @@ def print_summary_line(test: Test) -> None: color = stdout.yellow else: color = stdout.red - stdout.print_with_timestamp(color(f'Testing complete. {test.counts}')) + printer.print_with_timestamp(color(f'Testing complete. {test.counts}')) # Summarize failures that might have gone off-screen since we had a lot # of tests (arbitrarily defined as >=100 for now). @@ -630,7 +663,7 @@ def print_summary_line(test: Test) -> None: summarized = _summarize_failed_tests(test) if not summarized: return - stdout.print_with_timestamp(color(summarized)) + printer.print_with_timestamp(color(summarized)) # Other methods: @@ -654,7 +687,7 @@ def bubble_up_test_results(test: Test) -> None: elif test.counts.get_status() == TestStatus.TEST_CRASHED: test.status = TestStatus.TEST_CRASHED -def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool) -> Test: +def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: bool, printer: Printer) -> Test: """ Finds next test to parse in LineStream, creates new Test object, parses any subtests of the test, populates Test object with all @@ -710,6 +743,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: log - list of strings containing any preceding diagnostic lines corresponding to the current test is_subtest - boolean indicating whether test is a subtest + printer - Printer object to output results Return: Test object populated with characteristics and any subtests @@ -725,14 +759,14 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parsing the main/top-level test, parse KTAP version line and # test plan test.name = "main" - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) parent_test = True else: # If not the main test, attempt to parse a test header containing # the KTAP version line and/or subtest header line - ktap_line = parse_ktap_header(lines, test) + ktap_line = parse_ktap_header(lines, test, printer) subtest_line = parse_test_header(lines, test) parent_test = (ktap_line or subtest_line) if parent_test: @@ -740,7 +774,7 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # to parse test plan and print test header test.log.extend(parse_diagnostic(lines)) parse_test_plan(lines, test) - print_test_header(test) + print_test_header(test, printer) expected_count = test.expected_count subtests = [] test_num = 1 @@ -758,16 +792,16 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If parser reaches end of test before # parsing expected number of subtests, print # crashed subtest and record error - test.add_error('missing expected subtest!') + test.add_error(printer, 'missing expected subtest!') sub_test.log.extend(sub_log) test.counts.add_status( TestStatus.TEST_CRASHED) - print_test_result(sub_test) + print_test_result(sub_test, printer) else: test.log.extend(sub_log) break else: - sub_test = parse_test(lines, test_num, sub_log, True) + sub_test = parse_test(lines, test_num, sub_log, True, printer) subtests.append(sub_test) test_num += 1 test.subtests = subtests @@ -775,51 +809,51 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: # If not main test, look for test result line test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): - test.add_error('missing subtest result line!') + test.add_error(printer, 'missing subtest result line!') else: - parse_test_result(lines, test, expected_num) + parse_test_result(lines, test, expected_num, printer) # Check for there being no subtests within parent test if parent_test and len(subtests) == 0: # Don't override a bad status if this test had one reported. # Assumption: no subtests means CRASHED is from Test.__init__() if test.status in (TestStatus.TEST_CRASHED, TestStatus.SUCCESS): - print_log(test.log) + print_log(test.log, printer) test.status = TestStatus.NO_TESTS - test.add_error('0 tests run!') + test.add_error(printer, '0 tests run!') # Add statuses to TestCounts attribute in Test object bubble_up_test_results(test) if parent_test and is_subtest: # If test has subtests and is not the main test object, print # footer. - print_test_footer(test) + print_test_footer(test, printer) elif is_subtest: - print_test_result(test) + print_test_result(test, printer) return test -def parse_run_tests(kernel_output: Iterable[str]) -> Test: +def parse_run_tests(kernel_output: Iterable[str], printer: Printer) -> Test: """ Using kernel output, extract KTAP lines, parse the lines for test results and print condensed test results and summary line. Parameters: kernel_output - Iterable object contains lines of kernel output + printer - Printer object to output results Return: Test - the main test object with all subtests. """ - stdout.print_with_timestamp(DIVIDER) + printer.print_with_timestamp(DIVIDER) lines = extract_tap_lines(kernel_output) test = Test() if not lines: test.name = '<missing>' - test.add_error('Could not find any KTAP output. Did any KUnit tests run?') + test.add_error(printer, 'Could not find any KTAP output. Did any KUnit tests run?') test.status = TestStatus.FAILURE_TO_PARSE_TESTS else: - test = parse_test(lines, 0, [], False) + test = parse_test(lines, 0, [], False, printer) if test.status != TestStatus.NO_TESTS: test.status = test.counts.get_status() - stdout.print_with_timestamp(DIVIDER) - print_summary_line(test) + printer.print_with_timestamp(DIVIDER) return test diff --git a/tools/testing/kunit/kunit_printer.py b/tools/testing/kunit/kunit_printer.py index 015adf87dc2c..ca119f61fe79 100644 --- a/tools/testing/kunit/kunit_printer.py +++ b/tools/testing/kunit/kunit_printer.py @@ -15,12 +15,17 @@ _RESET = '\033[0;0m' class Printer: """Wraps a file object, providing utilities for coloring output, etc.""" - def __init__(self, output: typing.IO[str]): + def __init__(self, print: bool=True, output: typing.IO[str]=sys.stdout): self._output = output - self._use_color = output.isatty() + self._print = print + if print: + self._use_color = output.isatty() + else: + self._use_color = False def print(self, message: str) -> None: - print(message, file=self._output) + if self._print: + print(message, file=self._output) def print_with_timestamp(self, message: str) -> None: ts = datetime.datetime.now().strftime('%H:%M:%S') @@ -45,4 +50,5 @@ class Printer: return len(self.red('')) # Provides a default instance that prints to stdout -stdout = Printer(sys.stdout) +stdout = Printer() +null_printer = Printer(print=False) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 2beb7327e53f..0bcb0cc002f8 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -23,6 +23,7 @@ import kunit_parser import kunit_kernel import kunit_json import kunit +from kunit_printer import stdout test_tmpdir = '' abs_test_data_dir = '' @@ -139,28 +140,28 @@ class KUnitParserTest(unittest.TestCase): def test_parse_successful_test_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_successful_nested_tests_log(self): all_passed_log = test_data_path('test_is_test_passed-all_passed_nested.log') with open(all_passed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_kselftest_nested(self): kselftest_log = test_data_path('test_is_test_passed-kselftest.log') with open(kselftest_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual(result.counts.errors, 0) def test_parse_failed_test_log(self): failed_log = test_data_path('test_is_test_passed-failure.log') with open(failed_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(result.counts.errors, 0) @@ -168,7 +169,7 @@ class KUnitParserTest(unittest.TestCase): empty_log = test_data_path('test_is_test_passed-no_tests_run_no_header.log') with open(empty_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.FAILURE_TO_PARSE_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -179,7 +180,7 @@ class KUnitParserTest(unittest.TestCase): with open(missing_plan_log) as file: result = kunit_parser.parse_run_tests( kunit_parser.extract_tap_lines( - file.readlines())) + file.readlines()), stdout) # A missing test plan is not an error. self.assertEqual(result.counts, kunit_parser.TestCounts(passed=10, errors=0)) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -188,7 +189,7 @@ class KUnitParserTest(unittest.TestCase): header_log = test_data_path('test_is_test_passed-no_tests_run_with_header.log') with open(header_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests)) self.assertEqual(kunit_parser.TestStatus.NO_TESTS, result.status) self.assertEqual(result.counts.errors, 1) @@ -197,7 +198,7 @@ class KUnitParserTest(unittest.TestCase): no_plan_log = test_data_path('test_is_test_passed-no_tests_no_plan.log') with open(no_plan_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) self.assertEqual(0, len(result.subtests[0].subtests[0].subtests)) self.assertEqual( kunit_parser.TestStatus.NO_TESTS, @@ -210,7 +211,7 @@ class KUnitParserTest(unittest.TestCase): print_mock = mock.patch('kunit_printer.Printer.print').start() with open(crash_log) as file: result = kunit_parser.parse_run_tests( - kunit_parser.extract_tap_lines(file.readlines())) + kunit_parser.extract_tap_lines(file.readlines()), stdout) print_mock.assert_any_call(StrContains('Could not find any KTAP output.')) print_mock.stop() self.assertEqual(0, len(result.subtests)) @@ -219,7 +220,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_test(self): skipped_log = test_data_path('test_skip_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -228,7 +229,7 @@ class KUnitParserTest(unittest.TestCase): def test_skipped_all_tests(self): skipped_log = test_data_path('test_skip_all_tests.log') with open(skipped_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SKIPPED, result.status) self.assertEqual(result.counts, kunit_parser.TestCounts(skipped=5)) @@ -236,7 +237,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_hyphen(self): hyphen_log = test_data_path('test_strip_hyphen.log') with open(hyphen_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # A skipped test does not fail the whole suite. self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) @@ -250,7 +251,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_prefix_printk_time(self): prefix_log = test_data_path('test_config_printk_time.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -258,7 +259,7 @@ class KUnitParserTest(unittest.TestCase): def test_ignores_multiple_prefixes(self): prefix_log = test_data_path('test_multiple_prefixes.log') with open(prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -266,7 +267,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_mixed_kernel_output(self): mixed_prefix_log = test_data_path('test_interrupted_tap_output.log') with open(mixed_prefix_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -274,7 +275,7 @@ class KUnitParserTest(unittest.TestCase): def test_prefix_poundsign(self): pound_log = test_data_path('test_pound_sign.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -282,7 +283,7 @@ class KUnitParserTest(unittest.TestCase): def test_kernel_panic_end(self): panic_log = test_data_path('test_kernel_panic_interrupt.log') with open(panic_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertGreaterEqual(result.counts.errors, 1) @@ -290,7 +291,7 @@ class KUnitParserTest(unittest.TestCase): def test_pound_no_prefix(self): pound_log = test_data_path('test_pound_no_prefix.log') with open(pound_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) self.assertEqual('kunit-resource-test', result.subtests[0].name) self.assertEqual(result.counts.errors, 0) @@ -310,7 +311,7 @@ class KUnitParserTest(unittest.TestCase): not ok 2 - test2 not ok 1 - some_failed_suite """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.assertEqual(kunit_parser._summarize_failed_tests(result), @@ -319,7 +320,7 @@ class KUnitParserTest(unittest.TestCase): def test_ktap_format(self): ktap_log = test_data_path('test_parse_ktap_output.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) self.assertEqual(result.counts, kunit_parser.TestCounts(passed=3)) self.assertEqual('suite', result.subtests[0].name) self.assertEqual('case_1', result.subtests[0].subtests[0].name) @@ -328,13 +329,13 @@ class KUnitParserTest(unittest.TestCase): def test_parse_subtest_header(self): ktap_log = test_data_path('test_parse_subtest_header.log') with open(ktap_log) as file: - kunit_parser.parse_run_tests(file.readlines()) + kunit_parser.parse_run_tests(file.readlines(), stdout) self.print_mock.assert_any_call(StrContains('suite (1 subtest)')) def test_parse_attributes(self): ktap_log = test_data_path('test_parse_attributes.log') with open(ktap_log) as file: - result = kunit_parser.parse_run_tests(file.readlines()) + result = kunit_parser.parse_run_tests(file.readlines(), stdout) # Test should pass with no errors self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=0)) @@ -355,7 +356,7 @@ class KUnitParserTest(unittest.TestCase): Indented more. not ok 1 test1 """ - result = kunit_parser.parse_run_tests(output.splitlines()) + result = kunit_parser.parse_run_tests(output.splitlines(), stdout) self.assertEqual(kunit_parser.TestStatus.FAILURE, result.status) self.print_mock.assert_any_call(StrContains('Test output.')) @@ -544,7 +545,7 @@ class KUnitJsonTest(unittest.TestCase): def _json_for(self, log_file): with open(test_data_path(log_file)) as file: - test_result = kunit_parser.parse_run_tests(file) + test_result = kunit_parser.parse_run_tests(file, stdout) json_obj = kunit_json.get_json_result( test=test_result, metadata=kunit_json.Metadata()) @@ -810,7 +811,7 @@ class KUnitMainTest(unittest.TestCase): self.linux_source_mock.run_kernel.return_value = ['TAP version 14', 'init: random output'] + want got = kunit._list_tests(self.linux_source_mock, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'suite', False, False)) self.assertEqual(got, want) # Should respect the user's filter glob when listing tests. self.linux_source_mock.run_kernel.assert_called_once_with( @@ -823,7 +824,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*.test*', '', None, None, 'suite', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test*', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite2.test*', filter='', filter_action=None, timeout=300), @@ -836,7 +837,7 @@ class KUnitMainTest(unittest.TestCase): # Should respect the user's filter glob when listing tests. mock_tests.assert_called_once_with(mock.ANY, - kunit.KunitExecRequest(None, None, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) + kunit.KunitExecRequest(None, None, False, False, '.kunit', 300, 'suite*', '', None, None, 'test', False, False)) self.linux_source_mock.run_kernel.assert_has_calls([ mock.call(args=None, build_dir='.kunit', filter_glob='suite.test1', filter='', filter_action=None, timeout=300), mock.call(args=None, build_dir='.kunit', filter_glob='suite.test2', filter='', filter_action=None, timeout=300), diff --git a/tools/testing/kunit/qemu_configs/loongarch.py b/tools/testing/kunit/qemu_configs/loongarch.py new file mode 100644 index 000000000000..a92422967d1d --- /dev/null +++ b/tools/testing/kunit/qemu_configs/loongarch.py @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='loongarch', + kconfig=''' +CONFIG_EFI_STUB=n +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PVPANIC=y +CONFIG_PVPANIC_PCI=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='loongarch64', + kernel_path='arch/loongarch/boot/vmlinux.elf', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[ + '-machine', 'virt', + '-device', 'pvpanic-pci', + '-cpu', 'max',]) diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index 1873ddbe16cc..bc30050227fd 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -462,6 +462,28 @@ static noinline void __init check_new_node(struct maple_tree *mt) MT_BUG_ON(mt, mas_allocated(&mas) != 10 + MAPLE_ALLOC_SLOTS - 1); mas_destroy(&mas); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 2); + mas_destroy(&mas); + + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 2 + 1); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 2 + 1); + mas.node = MA_ERROR(-ENOMEM); + mas_node_count(&mas, MAPLE_ALLOC_SLOTS * 3 + 2); /* Request */ + mas_nomem(&mas, GFP_KERNEL); /* Fill request */ + mas.status = ma_start; + MT_BUG_ON(mt, mas_allocated(&mas) != MAPLE_ALLOC_SLOTS * 3 + 2); + mas_destroy(&mas); + mtree_unlock(mt); } @@ -36317,6 +36339,28 @@ static inline int check_vma_modification(struct maple_tree *mt) return 0; } +/* + * test to check that bulk stores do not use wr_rebalance as the store + * type. + */ +static inline void check_bulk_rebalance(struct maple_tree *mt) +{ + MA_STATE(mas, mt, ULONG_MAX, ULONG_MAX); + int max = 10; + + build_full_tree(mt, 0, 2); + + /* erase every entry in the tree */ + do { + /* set up bulk store mode */ + mas_expected_entries(&mas, max); + mas_erase(&mas); + MT_BUG_ON(mt, mas.store_type == wr_rebalance); + } while (mas_prev(&mas, 0) != NULL); + + mas_destroy(&mas); +} + void farmer_tests(void) { struct maple_node *node; @@ -36328,6 +36372,10 @@ void farmer_tests(void) check_vma_modification(&tree); mtree_destroy(&tree); + mt_init(&tree); + check_bulk_rebalance(&tree); + mtree_destroy(&tree); + tree.ma_root = xa_mk_value(0); mt_dump(&tree, mt_dump_dec); @@ -36406,9 +36454,93 @@ void farmer_tests(void) check_nomem(&tree); } +static unsigned long get_last_index(struct ma_state *mas) +{ + struct maple_node *node = mas_mn(mas); + enum maple_type mt = mte_node_type(mas->node); + unsigned long *pivots = ma_pivots(node, mt); + unsigned long last_index = mas_data_end(mas); + + BUG_ON(last_index == 0); + + return pivots[last_index - 1] + 1; +} + +/* + * Assert that we handle spanning stores that consume the entirety of the right + * leaf node correctly. + */ +static void test_spanning_store_regression(void) +{ + unsigned long from = 0, to = 0; + DEFINE_MTREE(tree); + MA_STATE(mas, &tree, 0, 0); + + /* + * Build a 3-level tree. We require a parent node below the root node + * and 2 leaf nodes under it, so we can span the entirety of the right + * hand node. + */ + build_full_tree(&tree, 0, 3); + + /* Descend into position at depth 2. */ + mas_reset(&mas); + mas_start(&mas); + mas_descend(&mas); + mas_descend(&mas); + + /* + * We need to establish a tree like the below. + * + * Then we can try a store in [from, to] which results in a spanned + * store across nodes B and C, with the maple state at the time of the + * write being such that only the subtree at A and below is considered. + * + * Height + * 0 Root Node + * / \ + * pivot = to / \ pivot = ULONG_MAX + * / \ + * 1 A [-----] ... + * / \ + * pivot = from / \ pivot = to + * / \ + * 2 (LEAVES) B [-----] [-----] C + * ^--- Last pivot to. + */ + while (true) { + unsigned long tmp = get_last_index(&mas); + + if (mas_next_sibling(&mas)) { + from = tmp; + to = mas.max; + } else { + break; + } + } + + BUG_ON(from == 0 && to == 0); + + /* Perform the store. */ + mas_set_range(&mas, from, to); + mas_store_gfp(&mas, xa_mk_value(0xdead), GFP_KERNEL); + + /* If the regression occurs, the validation will fail. */ + mt_validate(&tree); + + /* Cleanup. */ + __mt_destroy(&tree); +} + +static void regression_tests(void) +{ + test_spanning_store_regression(); +} + void maple_tree_tests(void) { #if !defined(BENCH) + regression_tests(); farmer_tests(); #endif maple_tree_seed(); diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b38199965f99..2401e973c359 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -72,6 +72,7 @@ TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao TARGETS += nsfs +TARGETS += pcie_bwctrl TARGETS += perf_events TARGETS += pidfd TARGETS += pid_namespace @@ -88,9 +89,10 @@ TARGETS += rlimits TARGETS += rseq TARGETS += rtc TARGETS += rust +TARGETS += sched_ext TARGETS += seccomp TARGETS += sgx -TARGETS += sigaltstack +TARGETS += signal TARGETS += size TARGETS += sparc64 TARGETS += splice @@ -129,10 +131,10 @@ ifeq ($(filter net/lib,$(TARGETS)),) endif endif -# User can optionally provide a TARGETS skiplist. By default we skip -# BPF since it has cutting edge build time dependencies which require -# more effort to install. -SKIP_TARGETS ?= bpf +# User can optionally provide a TARGETS skiplist. By default we skip +# targets using BPF since it has cutting edge build time dependencies +# which require more effort to install. +SKIP_TARGETS ?= bpf sched_ext ifneq ($(SKIP_TARGETS),) TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS)) override TARGETS := $(TMP) diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 25be68025290..944279160fed 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -1,5 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # +ifneq ($(shell pkg-config --exists alsa && echo 0 || echo 1),0) +$(error Package alsa not found, please install alsa development package or \ + add directory containing `alsa.pc` in PKG_CONFIG_PATH) +endif CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES) LDLIBS += $(shell pkg-config --libs alsa) diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 28b93cab8c0d..22029e60eff3 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi +ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi gcs else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index f2d6007a2b98..0029ed9c5c9a 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -98,6 +98,17 @@ static void fpmr_sigill(void) asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0"); } +static void gcs_sigill(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); +} + static void ilrcpc_sigill(void) { /* LDAPUR W0, [SP, #8] */ @@ -361,8 +372,8 @@ static void sveaes_sigill(void) static void sveb16b16_sigill(void) { - /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */ - asm volatile(".inst 0xC1E41C00" : : : ); + /* BFADD Z0.H, Z0.H, Z0.H */ + asm volatile(".inst 0x65000000" : : : ); } static void svepmull_sigill(void) @@ -490,7 +501,7 @@ static const struct hwcap_data { .name = "F8DP2", .at_hwcap = AT_HWCAP2, .hwcap_bit = HWCAP2_F8DP2, - .cpuinfo = "f8dp4", + .cpuinfo = "f8dp2", .sigill_fn = f8dp2_sigill, }, { @@ -535,6 +546,14 @@ static const struct hwcap_data { .sigill_reliable = true, }, { + .name = "GCS", + .at_hwcap = AT_HWCAP, + .hwcap_bit = HWCAP_GCS, + .cpuinfo = "gcs", + .sigill_fn = gcs_sigill, + .sigill_reliable = true, + }, + { .name = "JSCVT", .at_hwcap = AT_HWCAP, .hwcap_bit = HWCAP_JSCVT, diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c index d704511a0955..5ec9a18ec802 100644 --- a/tools/testing/selftests/arm64/abi/syscall-abi.c +++ b/tools/testing/selftests/arm64/abi/syscall-abi.c @@ -81,7 +81,7 @@ static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t s */ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) { if (gpr_in[i] != gpr_out[i]) { - ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n", + ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %lx != %lx\n", cfg->name, sve_vl, i, gpr_in[i], gpr_out[i]); errors++; @@ -112,7 +112,7 @@ static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, if (!sve_vl && !(svcr & SVCR_SM_MASK)) { for (i = 0; i < ARRAY_SIZE(fpr_in); i++) { if (fpr_in[i] != fpr_out[i]) { - ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n", + ksft_print_msg("%s Q%d/%d mismatch %lx != %lx\n", cfg->name, i / 2, i % 2, fpr_in[i], fpr_out[i]); @@ -294,13 +294,13 @@ static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, int errors = 0; if (svcr_out & SVCR_SM_MASK) { - ksft_print_msg("%s Still in SM, SVCR %llx\n", + ksft_print_msg("%s Still in SM, SVCR %lx\n", cfg->name, svcr_out); errors++; } if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) { - ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n", + ksft_print_msg("%s PSTATE.ZA changed, SVCR %lx != %lx\n", cfg->name, svcr_in, svcr_out); errors++; } diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h index 9b38a0da407d..1fc46a5642c2 100644 --- a/tools/testing/selftests/arm64/fp/assembler.h +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -65,4 +65,19 @@ endfunction bl puts .endm +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +.macro enable_gcs + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 +.endm + #endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S index 7ad59d92d02b..82c3ab70e1cf 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S +++ b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S @@ -15,10 +15,7 @@ // Load and save register values with pauses for ptrace // -// x0 - SVE in use -// x1 - SME in use -// x2 - SME2 in use -// x3 - FA64 supported +// x0 - HAVE_ flags indicating which features are in use .globl load_and_save load_and_save: @@ -44,7 +41,7 @@ load_and_save: ldp q30, q31, [x7, #16 * 30] // SME? - cbz x1, check_sve_in + tbz x0, #HAVE_SME_SHIFT, check_sve_in adrp x7, svcr_in ldr x7, [x7, :lo12:svcr_in] @@ -64,7 +61,7 @@ load_and_save: bne 1b // ZT? - cbz x2, check_sm_in + tbz x0, #HAVE_SME2_SHIFT, check_sm_in adrp x6, zt_in add x6, x6, :lo12:zt_in _ldr_zt 6 @@ -72,12 +69,14 @@ load_and_save: // In streaming mode? check_sm_in: tbz x7, #SVCR_SM_SHIFT, check_sve_in - mov x4, x3 // Load FFR if we have FA64 + + // Load FFR if we have FA64 + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b load_sve // SVE? check_sve_in: - cbz x0, wait_for_writes + tbz x0, #HAVE_SVE_SHIFT, check_fpmr_in mov x4, #1 load_sve: @@ -142,6 +141,13 @@ load_sve: ldr p14, [x7, #14, MUL VL] ldr p15, [x7, #15, MUL VL] + // This has to come after we set PSTATE.SM +check_fpmr_in: + tbz x0, #HAVE_FPMR_SHIFT, wait_for_writes + adrp x7, fpmr_in + ldr x7, [x7, :lo12:fpmr_in] + msr REG_FPMR, x7 + wait_for_writes: // Wait for the parent brk #0 @@ -165,8 +171,13 @@ wait_for_writes: stp q28, q29, [x7, #16 * 28] stp q30, q31, [x7, #16 * 30] - // SME? - cbz x1, check_sve_out + tbz x0, #HAVE_FPMR_SHIFT, check_sme_out + mrs x7, REG_FPMR + adrp x6, fpmr_out + str x7, [x6, :lo12:fpmr_out] + +check_sme_out: + tbz x0, #HAVE_SME_SHIFT, check_sve_out rdsvl 11, 1 adrp x6, sme_vl_out @@ -187,7 +198,7 @@ wait_for_writes: bne 1b // ZT? - cbz x2, check_sm_out + tbz x0, #HAVE_SME2_SHIFT, check_sm_out adrp x6, zt_out add x6, x6, :lo12:zt_out _str_zt 6 @@ -195,12 +206,14 @@ wait_for_writes: // In streaming mode? check_sm_out: tbz x7, #SVCR_SM_SHIFT, check_sve_out - mov x4, x3 // FFR? + + // Do we have FA64 and FFR? + ubfx x4, x0, #HAVE_FA64_SHIFT, #1 b read_sve // SVE? check_sve_out: - cbz x0, wait_for_reads + tbz x0, #HAVE_SVE_SHIFT, wait_for_reads mov x4, #1 rdvl x7, #1 @@ -271,7 +284,7 @@ wait_for_reads: brk #0 // Ensure we don't leave ourselves in streaming mode - cbz x1, out + tbz x0, #HAVE_SME_SHIFT, out msr S3_3_C4_C2_2, xzr out: diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index c7ceafe5f471..4930e03a7b99 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -31,6 +31,14 @@ #include "fp-ptrace.h" +#include <linux/bits.h> + +#define FPMR_LSCALE2_MASK GENMASK(37, 32) +#define FPMR_NSCALE_MASK GENMASK(31, 24) +#define FPMR_LSCALE_MASK GENMASK(22, 16) +#define FPMR_OSC_MASK GENMASK(15, 15) +#define FPMR_OSM_MASK GENMASK(14, 14) + /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ #ifndef NT_ARM_SVE #define NT_ARM_SVE 0x405 @@ -48,11 +56,22 @@ #define NT_ARM_ZT 0x40d #endif +#ifndef NT_ARM_FPMR +#define NT_ARM_FPMR 0x40e +#endif + #define ARCH_VQ_MAX 256 /* VL 128..2048 in powers of 2 */ #define MAX_NUM_VLS 5 +/* + * FPMR bits we can set without doing feature checks to see if values + * are valid. + */ +#define FPMR_SAFE_BITS (FPMR_LSCALE2_MASK | FPMR_NSCALE_MASK | \ + FPMR_LSCALE_MASK | FPMR_OSC_MASK | FPMR_OSM_MASK) + #define NUM_FPR 32 __uint128_t v_in[NUM_FPR]; __uint128_t v_expected[NUM_FPR]; @@ -78,11 +97,13 @@ char zt_in[ZT_SIG_REG_BYTES]; char zt_expected[ZT_SIG_REG_BYTES]; char zt_out[ZT_SIG_REG_BYTES]; +uint64_t fpmr_in, fpmr_expected, fpmr_out; + uint64_t sve_vl_out; uint64_t sme_vl_out; uint64_t svcr_in, svcr_expected, svcr_out; -void load_and_save(int sve, int sme, int sme2, int fa64); +void load_and_save(int flags); static bool got_alarm; @@ -128,6 +149,11 @@ static bool fa64_supported(void) return getauxval(AT_HWCAP2) & HWCAP2_SME_FA64; } +static bool fpmr_supported(void) +{ + return getauxval(AT_HWCAP2) & HWCAP2_FPMR; +} + static bool compare_buffer(const char *name, void *out, void *expected, size_t size) { @@ -198,7 +224,7 @@ static int vl_expected(struct test_config *config) static void run_child(struct test_config *config) { - int ret; + int ret, flags; /* Let the parent attach to us */ ret = ptrace(PTRACE_TRACEME, 0, 0, 0); @@ -224,8 +250,19 @@ static void run_child(struct test_config *config) } /* Load values and wait for the parent */ - load_and_save(sve_supported(), sme_supported(), - sme2_supported(), fa64_supported()); + flags = 0; + if (sve_supported()) + flags |= HAVE_SVE; + if (sme_supported()) + flags |= HAVE_SME; + if (sme2_supported()) + flags |= HAVE_SME2; + if (fa64_supported()) + flags |= HAVE_FA64; + if (fpmr_supported()) + flags |= HAVE_FPMR; + + load_and_save(flags); exit(0); } @@ -312,6 +349,14 @@ static void read_child_regs(pid_t child) iov_child.iov_len = sizeof(zt_out); read_one_child_regs(child, "ZT", &iov_parent, &iov_child); } + + if (fpmr_supported()) { + iov_parent.iov_base = &fpmr_out; + iov_parent.iov_len = sizeof(fpmr_out); + iov_child.iov_base = &fpmr_out; + iov_child.iov_len = sizeof(fpmr_out); + read_one_child_regs(child, "FPMR", &iov_parent, &iov_child); + } } static bool continue_breakpoint(pid_t child, @@ -586,6 +631,26 @@ static bool check_ptrace_values_zt(pid_t child, struct test_config *config) return compare_buffer("initial ZT", buf, zt_in, ZT_SIG_REG_BYTES); } +static bool check_ptrace_values_fpmr(pid_t child, struct test_config *config) +{ + uint64_t val; + struct iovec iov; + int ret; + + if (!fpmr_supported()) + return true; + + iov.iov_base = &val; + iov.iov_len = sizeof(val); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read initial FPMR: %s (%d)\n", + strerror(errno), errno); + return false; + } + + return compare_buffer("initial FPMR", &val, &fpmr_in, sizeof(val)); +} static bool check_ptrace_values(pid_t child, struct test_config *config) { @@ -620,6 +685,9 @@ static bool check_ptrace_values(pid_t child, struct test_config *config) if (!check_ptrace_values_zt(child, config)) pass = false; + if (!check_ptrace_values_fpmr(child, config)) + pass = false; + return pass; } @@ -823,11 +891,18 @@ static void set_initial_values(struct test_config *config) { int vq = __sve_vq_from_vl(vl_in(config)); int sme_vq = __sve_vq_from_vl(config->sme_vl_in); + bool sm_change; svcr_in = config->svcr_in; svcr_expected = config->svcr_expected; svcr_out = 0; + if (sme_supported() && + (svcr_in & SVCR_SM) != (svcr_expected & SVCR_SM)) + sm_change = true; + else + sm_change = false; + fill_random(&v_in, sizeof(v_in)); memcpy(v_expected, v_in, sizeof(v_in)); memset(v_out, 0, sizeof(v_out)); @@ -874,6 +949,21 @@ static void set_initial_values(struct test_config *config) memset(zt_expected, 0, ZT_SIG_REG_BYTES); memset(zt_out, 0, sizeof(zt_out)); } + + if (fpmr_supported()) { + fill_random(&fpmr_in, sizeof(fpmr_in)); + fpmr_in &= FPMR_SAFE_BITS; + + /* Entering or exiting streaming mode clears FPMR */ + if (sm_change) + fpmr_expected = 0; + else + fpmr_expected = fpmr_in; + } else { + fpmr_in = 0; + fpmr_expected = 0; + fpmr_out = 0; + } } static bool check_memory_values(struct test_config *config) @@ -924,6 +1014,12 @@ static bool check_memory_values(struct test_config *config) if (!compare_buffer("saved ZT", zt_out, zt_expected, ZT_SIG_REG_BYTES)) pass = false; + if (fpmr_out != fpmr_expected) { + ksft_print_msg("Mismatch in saved FPMR: %lx != %lx\n", + fpmr_out, fpmr_expected); + pass = false; + } + return pass; } @@ -1001,6 +1097,36 @@ static void fpsimd_write(pid_t child, struct test_config *test_config) strerror(errno), errno); } +static bool fpmr_write_supported(struct test_config *config) +{ + if (!fpmr_supported()) + return false; + + if (!sve_sme_same(config)) + return false; + + return true; +} + +static void fpmr_write_expected(struct test_config *config) +{ + fill_random(&fpmr_expected, sizeof(fpmr_expected)); + fpmr_expected &= FPMR_SAFE_BITS; +} + +static void fpmr_write(pid_t child, struct test_config *config) +{ + struct iovec iov; + int ret; + + iov.iov_len = sizeof(fpmr_expected); + iov.iov_base = &fpmr_expected; + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_FPMR, &iov); + if (ret != 0) + ksft_print_msg("Failed to write FPMR: %s (%d)\n", + strerror(errno), errno); +} + static void sve_write_expected(struct test_config *config) { int vl = vl_expected(config); @@ -1069,21 +1195,19 @@ static void sve_write(pid_t child, struct test_config *config) static bool za_write_supported(struct test_config *config) { - if (config->svcr_expected & SVCR_SM) { - if (!(config->svcr_in & SVCR_SM)) + if (config->sme_vl_in != config->sme_vl_expected) { + /* Changing the SME VL exits streaming mode. */ + if (config->svcr_expected & SVCR_SM) { return false; - - /* Changing the SME VL exits streaming mode */ - if (config->sme_vl_in != config->sme_vl_expected) { + } + } else { + /* Otherwise we can't change streaming mode */ + if ((config->svcr_in & SVCR_SM) != + (config->svcr_expected & SVCR_SM)) { return false; } } - /* Can't disable SM outside a VL change */ - if ((config->svcr_in & SVCR_SM) && - !(config->svcr_expected & SVCR_SM)) - return false; - return true; } @@ -1259,6 +1383,12 @@ static struct test_definition base_test_defs[] = { .set_expected_values = fpsimd_write_expected, .modify_values = fpsimd_write, }, + { + .name = "FPMR write", + .supported = fpmr_write_supported, + .set_expected_values = fpmr_write_expected, + .modify_values = fpmr_write, + }, }; static struct test_definition sve_test_defs[] = { @@ -1468,6 +1598,9 @@ int main(void) if (fa64_supported()) ksft_print_msg("FA64 supported\n"); + if (fpmr_supported()) + ksft_print_msg("FPMR supported\n"); + ksft_set_plan(tests); /* Get signal handers ready before we start any children */ diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.h b/tools/testing/selftests/arm64/fp/fp-ptrace.h index db4f2c4d750c..c06919aaf1f7 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.h +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.h @@ -10,4 +10,16 @@ #define SVCR_SM (1 << SVCR_SM_SHIFT) #define SVCR_ZA (1 << SVCR_ZA_SHIFT) +#define HAVE_SVE_SHIFT 0 +#define HAVE_SME_SHIFT 1 +#define HAVE_SME2_SHIFT 2 +#define HAVE_FA64_SHIFT 3 +#define HAVE_FPMR_SHIFT 4 + +#define HAVE_SVE (1 << HAVE_SVE_SHIFT) +#define HAVE_SME (1 << HAVE_SME_SHIFT) +#define HAVE_SME2 (1 << HAVE_SME2_SHIFT) +#define HAVE_FA64 (1 << HAVE_FA64_SHIFT) +#define HAVE_FPMR (1 << HAVE_FPMR_SHIFT) + #endif diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c index faac24bdefeb..74e23208b94c 100644 --- a/tools/testing/selftests/arm64/fp/fp-stress.c +++ b/tools/testing/selftests/arm64/fp/fp-stress.c @@ -28,6 +28,9 @@ #define MAX_VLS 16 +#define SIGNAL_INTERVAL_MS 25 +#define LOG_INTERVALS (1000 / SIGNAL_INTERVAL_MS) + struct child_data { char *name, *output; pid_t pid; @@ -79,7 +82,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(pipefd[1], 1); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -89,7 +92,7 @@ static void child_start(struct child_data *child, const char *program) */ ret = dup2(startup_pipe[0], 3); if (ret == -1) { - fprintf(stderr, "dup2() %d\n", errno); + printf("dup2() %d\n", errno); exit(EXIT_FAILURE); } @@ -107,16 +110,15 @@ static void child_start(struct child_data *child, const char *program) */ ret = read(3, &i, sizeof(i)); if (ret < 0) - fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", - strerror(errno), errno); + printf("read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); if (ret > 0) - fprintf(stderr, "%d bytes of data on startup pipe\n", - ret); + printf("%d bytes of data on startup pipe\n", ret); close(3); ret = execl(program, program, NULL); - fprintf(stderr, "execl(%s) failed: %d (%s)\n", - program, errno, strerror(errno)); + printf("execl(%s) failed: %d (%s)\n", + program, errno, strerror(errno)); exit(EXIT_FAILURE); } else { @@ -221,7 +223,7 @@ static void child_output(struct child_data *child, uint32_t events, static void child_tickle(struct child_data *child) { if (child->output_seen && !child->exited) - kill(child->pid, SIGUSR2); + kill(child->pid, SIGUSR1); } static void child_stop(struct child_data *child) @@ -449,7 +451,8 @@ static const struct option options[] = { int main(int argc, char **argv) { int ret; - int timeout = 10; + int timeout = 10 * (1000 / SIGNAL_INTERVAL_MS); + int poll_interval = 5000; int cpus, i, j, c; int sve_vl_count, sme_vl_count; bool all_children_started = false; @@ -505,7 +508,7 @@ int main(int argc, char **argv) have_sme2 ? "present" : "absent"); if (timeout > 0) - ksft_print_msg("Will run for %ds\n", timeout); + ksft_print_msg("Will run for %d\n", timeout); else ksft_print_msg("Will run until terminated\n"); @@ -578,14 +581,14 @@ int main(int argc, char **argv) break; /* - * Timeout is counted in seconds with no output, the - * tests print during startup then are silent when - * running so this should ensure they all ran enough - * to install the signal handler, this is especially - * useful in emulation where we will both be slow and - * likely to have a large set of VLs. + * Timeout is counted in poll intervals with no + * output, the tests print during startup then are + * silent when running so this should ensure they all + * ran enough to install the signal handler, this is + * especially useful in emulation where we will both + * be slow and likely to have a large set of VLs. */ - ret = epoll_wait(epoll_fd, evs, tests, 1000); + ret = epoll_wait(epoll_fd, evs, tests, poll_interval); if (ret < 0) { if (errno == EINTR) continue; @@ -623,10 +626,12 @@ int main(int argc, char **argv) } all_children_started = true; + poll_interval = SIGNAL_INTERVAL_MS; } - ksft_print_msg("Sending signals, timeout remaining: %d\n", - timeout); + if ((timeout % LOG_INTERVALS) == 0) + ksft_print_msg("Sending signals, timeout remaining: %d\n", + timeout); for (i = 0; i < num_children; i++) child_tickle(&children[i]); @@ -651,7 +656,5 @@ int main(int argc, char **argv) drain_output(true); - ksft_print_cnts(); - - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S index 8b960d01ed2e..f89d67894c2e 100644 --- a/tools/testing/selftests/arm64/fp/fpsimd-test.S +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -134,8 +134,7 @@ function check_vreg b memcmp endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -143,7 +142,6 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random V-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #7 movi v9.16b, #9 movi v31.8b, #31 @@ -215,6 +213,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c index e8da3b4cbd23..859345379044 100644 --- a/tools/testing/selftests/arm64/fp/kernel-test.c +++ b/tools/testing/selftests/arm64/fp/kernel-test.c @@ -267,6 +267,10 @@ int main(void) strerror(errno), errno); sa.sa_sigaction = handle_kick_signal; + ret = sigaction(SIGUSR1, &sa, NULL); + if (ret < 0) + printf("Failed to install SIGUSR1 handler: %s (%d)\n", + strerror(errno), errno); ret = sigaction(SIGUSR2, &sa, NULL); if (ret < 0) printf("Failed to install SIGUSR2 handler: %s (%d)\n", diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h index 9292bba5400b..85b9184e0835 100644 --- a/tools/testing/selftests/arm64/fp/sme-inst.h +++ b/tools/testing/selftests/arm64/fp/sme-inst.h @@ -5,6 +5,8 @@ #ifndef SME_INST_H #define SME_INST_H +#define REG_FPMR S3_3_C4_C4_2 + /* * RDSVL X\nx, #\imm */ diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 6d61992fe8a0..577b6e05e860 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -82,10 +82,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -340,7 +342,7 @@ static void ptrace_set_sve_get_sve_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -441,7 +443,7 @@ static void ptrace_set_sve_get_fpsimd_data(pid_t child, data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for %s VL %u\n", data_size, type->name, vl); return; } @@ -545,7 +547,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, read_sve = read_buf; if (read_sve->vl != vl) { - ksft_test_result_fail("Child VL != expected VL %d\n", + ksft_test_result_fail("Child VL != expected VL: %u != %u\n", read_sve->vl, vl); goto out; } @@ -555,7 +557,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_FPSIMD: expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } @@ -571,7 +573,7 @@ static void ptrace_set_fpsimd_get_sve_data(pid_t child, case SVE_PT_REGS_SVE: expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); if (read_sve_size < expected_size) { - ksft_test_result_fail("Read %d bytes, expected %d\n", + ksft_test_result_fail("Read %ld bytes, expected %ld\n", read_sve_size, expected_size); goto out; } diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S index fff60e2a25ad..80e072f221cd 100644 --- a/tools/testing/selftests/arm64/fp/sve-test.S +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -291,8 +291,7 @@ function check_ffr #endif endfunction -// Any SVE register modified here can cause corruption in the main -// thread -- but *only* the registers modified here. +// Modify live register state, the signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] @@ -300,13 +299,12 @@ function irritator_handler str x0, [x2, #ucontext_regs + 8 * 23] // Corrupt some random Z-regs - adr x0, .text + (irritator_handler - .text) / 16 * 16 movi v0.8b, #1 movi v9.16b, #2 movi v31.8b, #3 -#ifndef SSVE // And P0 - rdffr p0.b + ptrue p0.d +#ifndef SSVE // And FFR wrffr p15.b #endif @@ -378,6 +376,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // Irritation signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c index ac27d87396fc..08c777f87ea2 100644 --- a/tools/testing/selftests/arm64/fp/za-ptrace.c +++ b/tools/testing/selftests/arm64/fp/za-ptrace.c @@ -48,10 +48,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -201,7 +203,7 @@ static void ptrace_set_get_data(pid_t child, unsigned int vl) data_size = ZA_PT_SIZE(vq); write_buf = malloc(data_size); if (!write_buf) { - ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n", + ksft_test_result_fail("Error allocating %ld byte buffer for VL %u\n", data_size, vl); return; } diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S index 095b45531640..9c33e13e9dc4 100644 --- a/tools/testing/selftests/arm64/fp/za-test.S +++ b/tools/testing/selftests/arm64/fp/za-test.S @@ -148,21 +148,16 @@ function check_za b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZA data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZA to all bits 0 + smstop + smstart_za ret endfunction @@ -231,6 +226,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c index 996d9614a131..584b8d59b7ea 100644 --- a/tools/testing/selftests/arm64/fp/zt-ptrace.c +++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c @@ -43,10 +43,12 @@ static void fill_buf(char *buf, size_t size) static int do_child(void) { if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) - ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + ksft_exit_fail_msg("ptrace(PTRACE_TRACEME) failed: %s (%d)\n", + strerror(errno), errno); if (raise(SIGSTOP)) - ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + ksft_exit_fail_msg("raise(SIGSTOP) failed: %s (%d)\n", + strerror(errno), errno); return EXIT_SUCCESS; } @@ -231,7 +233,7 @@ static void ptrace_enable_za_via_zt(pid_t child) /* Should have register data */ if (za_out->size < ZA_PT_SIZE(vq)) { ksft_print_msg("ZA data less than expected: %u < %u\n", - za_out->size, ZA_PT_SIZE(vq)); + za_out->size, (unsigned int)ZA_PT_SIZE(vq)); fail = true; vq = 0; } diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S index b5c81e81a379..38080f3c3280 100644 --- a/tools/testing/selftests/arm64/fp/zt-test.S +++ b/tools/testing/selftests/arm64/fp/zt-test.S @@ -117,21 +117,16 @@ function check_zt b memcmp endfunction -// Any SME register modified here can cause corruption in the main -// thread -- but *only* the locations modified here. +// Modify the live SME register state, signal return will undo our changes function irritator_handler // Increment the irritation signal count (x23): ldr x0, [x2, #ucontext_regs + 8 * 23] add x0, x0, #1 str x0, [x2, #ucontext_regs + 8 * 23] - // Corrupt some random ZT data -#if 0 - adr x0, .text + (irritator_handler - .text) / 16 * 16 - movi v0.8b, #1 - movi v9.16b, #2 - movi v31.8b, #3 -#endif + // This will reset ZT to all bits 0 + smstop + smstart_za ret endfunction @@ -200,6 +195,8 @@ endfunction // Main program entry point .globl _start function _start + enable_gcs + mov x23, #0 // signal count mov w0, #SIGINT diff --git a/tools/testing/selftests/arm64/gcs/.gitignore b/tools/testing/selftests/arm64/gcs/.gitignore new file mode 100644 index 000000000000..bbb8e40a7e52 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/.gitignore @@ -0,0 +1,7 @@ +basic-gcs +libc-gcs +gcs-locking +gcs-stress +gcs-stress-thread +gcspushm +gcsstr diff --git a/tools/testing/selftests/arm64/gcs/Makefile b/tools/testing/selftests/arm64/gcs/Makefile new file mode 100644 index 000000000000..d2f3497a9103 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/Makefile @@ -0,0 +1,30 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2023 ARM Limited +# +# In order to avoid interaction with the toolchain and dynamic linker the +# portions of these tests that interact with the GCS are implemented using +# nolibc. +# + +TEST_GEN_PROGS := basic-gcs libc-gcs gcs-locking gcs-stress gcspushm gcsstr +TEST_GEN_PROGS_EXTENDED := gcs-stress-thread + +LDLIBS+=-lpthread + +include ../../lib.mk + +$(OUTPUT)/basic-gcs: basic-gcs.c + $(CC) -g -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + -static -include ../../../../include/nolibc/nolibc.h \ + -I../../../../../usr/include \ + -std=gnu99 -I../.. -g \ + -ffreestanding -Wall $^ -o $@ -lgcc + +$(OUTPUT)/gcs-stress-thread: gcs-stress-thread.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcspushm: gcspushm.S + $(CC) -nostdlib $^ -o $@ + +$(OUTPUT)/gcsstr: gcsstr.S + $(CC) -nostdlib $^ -o $@ diff --git a/tools/testing/selftests/arm64/gcs/asm-offsets.h b/tools/testing/selftests/arm64/gcs/asm-offsets.h new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/asm-offsets.h diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c new file mode 100644 index 000000000000..3fb9742342a3 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -0,0 +1,357 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#include <limits.h> +#include <stdbool.h> + +#include <linux/prctl.h> + +#include <sys/mman.h> +#include <asm/mman.h> +#include <linux/sched.h> + +#include "kselftest.h" +#include "gcs-util.h" + +/* nolibc doesn't have sysconf(), just hard code the maximum */ +static size_t page_size = 65536; + +static __attribute__((noinline)) void valid_gcs_function(void) +{ + /* Do something the compiler can't optimise out */ + my_syscall1(__NR_prctl, PR_SVE_GET_VL); +} + +static inline int gcs_set_status(unsigned long mode) +{ + bool enabling = mode & PR_SHADOW_STACK_ENABLE; + int ret; + unsigned long new_mode; + + /* + * The prctl takes 1 argument but we need to ensure that the + * other 3 values passed in registers to the syscall are zero + * since the kernel validates them. + */ + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, mode, + 0, 0, 0); + + if (ret == 0) { + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &new_mode, 0, 0, 0); + if (ret == 0) { + if (new_mode != mode) { + ksft_print_msg("Mode set to %lx not %lx\n", + new_mode, mode); + ret = -EINVAL; + } + } else { + ksft_print_msg("Failed to validate mode: %d\n", ret); + } + + if (enabling != chkfeat_gcs()) { + ksft_print_msg("%senabled by prctl but %senabled in CHKFEAT\n", + enabling ? "" : "not ", + chkfeat_gcs() ? "" : "not "); + ret = -EINVAL; + } + } + + return ret; +} + +/* Try to read the status */ +static bool read_status(void) +{ + unsigned long state; + int ret; + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &state, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("Failed to read state: %d\n", ret); + return false; + } + + return state & PR_SHADOW_STACK_ENABLE; +} + +/* Just a straight enable */ +static bool base_enable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE failed %d\n", ret); + return false; + } + + return true; +} + +/* Check we can read GCSPR_EL0 when GCS is enabled */ +static bool read_gcspr_el0(void) +{ + unsigned long *gcspr_el0; + + ksft_print_msg("GET GCSPR\n"); + gcspr_el0 = get_gcspr(); + ksft_print_msg("GCSPR_EL0 is %p\n", gcspr_el0); + + return true; +} + +/* Also allow writes to stack */ +static bool enable_writeable(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE writeable failed: %d\n", ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Also allow writes to stack */ +static bool enable_push_pop(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with push failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +/* Enable GCS and allow everything */ +static bool enable_all(void) +{ + int ret; + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH | + PR_SHADOW_STACK_WRITE); + if (ret) { + ksft_print_msg("PR_SHADOW_STACK_ENABLE with everything failed: %d\n", + ret); + return false; + } + + ret = gcs_set_status(PR_SHADOW_STACK_ENABLE); + if (ret) { + ksft_print_msg("failed to restore plain enable %d\n", ret); + return false; + } + + return true; +} + +static bool enable_invalid(void) +{ + int ret = gcs_set_status(ULONG_MAX); + if (ret == 0) { + ksft_print_msg("GCS_SET_STATUS %lx succeeded\n", ULONG_MAX); + return false; + } + + return true; +} + +/* Map a GCS */ +static bool map_guarded_stack(void) +{ + int ret; + uint64_t *buf; + uint64_t expected_cap; + int elem; + bool pass = true; + + buf = (void *)my_syscall3(__NR_map_shadow_stack, 0, page_size, + SHADOW_STACK_SET_MARKER | + SHADOW_STACK_SET_TOKEN); + if (buf == MAP_FAILED) { + ksft_print_msg("Failed to map %lu byte GCS: %d\n", + page_size, errno); + return false; + } + ksft_print_msg("Mapped GCS at %p-%p\n", buf, + (void *)((uint64_t)buf + page_size)); + + /* The top of the newly allocated region should be 0 */ + elem = (page_size / sizeof(uint64_t)) - 1; + if (buf[elem]) { + ksft_print_msg("Last entry is 0x%llx not 0x0\n", buf[elem]); + pass = false; + } + + /* Then a valid cap token */ + elem--; + expected_cap = ((uint64_t)buf + page_size - 16); + expected_cap &= GCS_CAP_ADDR_MASK; + expected_cap |= GCS_CAP_VALID_TOKEN; + if (buf[elem] != expected_cap) { + ksft_print_msg("Cap entry is 0x%llx not 0x%llx\n", + buf[elem], expected_cap); + pass = false; + } + ksft_print_msg("cap token is 0x%llx\n", buf[elem]); + + /* The rest should be zeros */ + for (elem = 0; elem < page_size / sizeof(uint64_t) - 2; elem++) { + if (!buf[elem]) + continue; + ksft_print_msg("GCS slot %d is 0x%llx not 0x0\n", + elem, buf[elem]); + pass = false; + } + + ret = munmap(buf, page_size); + if (ret != 0) { + ksft_print_msg("Failed to unmap %ld byte GCS: %d\n", + page_size, errno); + pass = false; + } + + return pass; +} + +/* A fork()ed process can run */ +static bool test_fork(void) +{ + unsigned long child_mode; + int ret, status; + pid_t pid; + bool pass = true; + + pid = fork(); + if (pid == -1) { + ksft_print_msg("fork() failed: %d\n", errno); + pass = false; + goto out; + } + if (pid == 0) { + /* In child, make sure we can call a function, read + * the GCS pointer and status and then exit */ + valid_gcs_function(); + get_gcspr(); + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &child_mode, 0, 0, 0); + if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in child\n"); + ret = -EINVAL; + } + + exit(ret); + } + + /* + * In parent, check we can still do function calls then block + * for the child. + */ + valid_gcs_function(); + + ksft_print_msg("Waiting for child %d\n", pid); + + ret = waitpid(pid, &status, 0); + if (ret == -1) { + ksft_print_msg("Failed to wait for child: %d\n", + errno); + return false; + } + + if (!WIFEXITED(status)) { + ksft_print_msg("Child exited due to signal %d\n", + WTERMSIG(status)); + pass = false; + } else { + if (WEXITSTATUS(status)) { + ksft_print_msg("Child exited with status %d\n", + WEXITSTATUS(status)); + pass = false; + } + } + +out: + + return pass; +} + +typedef bool (*gcs_test)(void); + +static struct { + char *name; + gcs_test test; + bool needs_enable; +} tests[] = { + { "read_status", read_status }, + { "base_enable", base_enable, true }, + { "read_gcspr_el0", read_gcspr_el0 }, + { "enable_writeable", enable_writeable, true }, + { "enable_push_pop", enable_push_pop, true }, + { "enable_all", enable_all, true }, + { "enable_invalid", enable_invalid, true }, + { "map_guarded_stack", map_guarded_stack }, + { "fork", test_fork }, +}; + +int main(void) +{ + int i, ret; + unsigned long gcs_mode; + + ksft_print_header(); + + /* + * We don't have getauxval() with nolibc so treat a failure to + * read GCS state as a lack of support and skip. + */ + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_skip("Failed to read GCS state: %d\n", ret); + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode, 0, 0, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to enable GCS: %d\n", ret); + } + + ksft_set_plan(ARRAY_SIZE(tests)); + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + ksft_test_result((*tests[i].test)(), "%s\n", tests[i].name); + } + + /* One last test: disable GCS, we can do this one time */ + my_syscall5(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0, 0, 0, 0); + if (ret != 0) + ksft_print_msg("Failed to disable GCS: %d\n", ret); + + ksft_finished(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-locking.c b/tools/testing/selftests/arm64/gcs/gcs-locking.c new file mode 100644 index 000000000000..989f75a491b7 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-locking.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + * + * Tests for GCS mode locking. These tests rely on both having GCS + * unconfigured on entry and on the kselftest harness running each + * test in a fork()ed process which will have it's own mode. + */ + +#include <limits.h> + +#include <sys/auxv.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* No mode bits are rejected for locking */ +TEST(lock_all_modes) +{ + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, ULONG_MAX, 0, 0, 0); + ASSERT_EQ(ret, 0); +} + +FIXTURE(valid_modes) +{ +}; + +FIXTURE_VARIANT(valid_modes) +{ + unsigned long mode; +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable) +{ + .mode = PR_SHADOW_STACK_ENABLE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_VARIANT_ADD(valid_modes, enable_write_push) +{ + .mode = PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | + PR_SHADOW_STACK_PUSH, +}; + +FIXTURE_SETUP(valid_modes) +{ +} + +FIXTURE_TEARDOWN(valid_modes) +{ +} + +/* We can set the mode at all */ +TEST_F(valid_modes, set) +{ + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + _exit(0); +} + +/* Enabling, locking then disabling is rejected */ +TEST_F(valid_modes, enable_lock_disable) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, 0); + ASSERT_EQ(ret, -EBUSY); + + _exit(0); +} + +/* Locking then enabling is rejected */ +TEST_F(valid_modes, lock_enable) +{ + unsigned long mode; + int ret; + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, -EBUSY); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, 0); + + _exit(0); +} + +/* Locking then changing other modes is fine */ +TEST_F(valid_modes, lock_enable_disable_others) +{ + unsigned long mode; + int ret; + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + ret = prctl(PR_LOCK_SHADOW_STACK_STATUS, variant->mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + PR_SHADOW_STACK_ALL_MODES); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, PR_SHADOW_STACK_ALL_MODES); + + + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + variant->mode); + ASSERT_EQ(ret, 0); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + ASSERT_EQ(ret, 0); + ASSERT_EQ(mode, variant->mode); + + _exit(0); +} + +int main(int argc, char **argv) +{ + unsigned long mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (mode & PR_SHADOW_STACK_ENABLE) { + ksft_print_msg("GCS was enabled, test unsupported\n"); + return KSFT_SKIP; + } + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S new file mode 100644 index 000000000000..b88b25217da5 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress-thread.S @@ -0,0 +1,311 @@ +// Program that loops for ever doing lots of recursions and system calls, +// intended to be used as part of a stress test for GCS context switching. +// +// Copyright 2015-2023 Arm Ltd + +#include <asm/unistd.h> + +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 + +#define si_code 8 + +#define SIGINT 2 +#define SIGABRT 6 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGTERM 15 +#define SEGV_CPERR 10 + +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 + +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction +.globl putdec + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction +.globl putdecn + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction +.globl memclr + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction +.globl memfill + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b abort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + + +function tickle_handler + // Perhaps collect GCSPR_EL0 here in future? + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error\n" + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +function segv_handler + // stash the siginfo_t * + mov x20, x1 + + // Disable GCS, we don't want additional faults logging things + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + + puts "Got SIGSEGV code " + + ldr x21, [x20, #si_code] + mov x0, x21 + bl putdec + + // GCS faults should have si_code SEGV_CPERR + cmp x21, #SEGV_CPERR + bne 1f + + puts " (GCS violation)" +1: + mov x0, '\n' + bl putc + b abort +endfunction + +// Recurse x20 times +.macro recurse id +function recurse\id + stp x29, x30, [sp, #-16]! + mov x29, sp + + cmp x20, 0 + beq 1f + sub x20, x20, 1 + bl recurse\id + +1: + ldp x29, x30, [sp], #16 + + // Do a syscall immediately prior to returning to try to provoke + // scheduling and migration at a point where coherency issues + // might trigger. + mov x8, #__NR_getpid + svc #0 + + ret +endfunction +.endm + +// Generate and use two copies so we're changing the GCS contents +recurse 1 +recurse 2 + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS\n" + b abort +1: + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, tickle_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov w0, #SIGSEGV + adr x1, segv_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + puts "Running\n" + +loop: + // Small recursion depth so we're frequently flipping between + // the two recursors and changing what's on the stack + mov x20, #5 + bl recurse1 + mov x20, #5 + bl recurse2 + b loop +endfunction + +abort: + mov x0, #255 + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcs-stress.c b/tools/testing/selftests/arm64/gcs/gcs-stress.c new file mode 100644 index 000000000000..bbc7f4950c13 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-stress.c @@ -0,0 +1,530 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2022-3 ARM Limited. + */ + +#define _GNU_SOURCE +#define _POSIX_C_SOURCE 199309L + +#include <errno.h> +#include <getopt.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/epoll.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/hwcap.h> + +#include "../../kselftest.h" + +struct child_data { + char *name, *output; + pid_t pid; + int stdout; + bool output_seen; + bool exited; + int exit_status; + int exit_signal; +}; + +static int epoll_fd; +static struct child_data *children; +static struct epoll_event *evs; +static int tests; +static int num_children; +static bool terminate; + +static int startup_pipe[2]; + +static int num_processors(void) +{ + long nproc = sysconf(_SC_NPROCESSORS_CONF); + if (nproc < 0) { + perror("Unable to read number of processors\n"); + exit(EXIT_FAILURE); + } + + return nproc; +} + +static void start_thread(struct child_data *child, int id) +{ + int ret, pipefd[2], i; + struct epoll_event ev; + + ret = pipe(pipefd); + if (ret != 0) + ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n", + strerror(errno), errno); + + child->pid = fork(); + if (child->pid == -1) + ksft_exit_fail_msg("fork() failed: %s (%d)\n", + strerror(errno), errno); + + if (!child->pid) { + /* + * In child, replace stdout with the pipe, errors to + * stderr from here as kselftest prints to stdout. + */ + ret = dup2(pipefd[1], 1); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Duplicate the read side of the startup pipe to + * FD 3 so we can close everything else. + */ + ret = dup2(startup_pipe[0], 3); + if (ret == -1) { + fprintf(stderr, "dup2() %d\n", errno); + exit(EXIT_FAILURE); + } + + /* + * Very dumb mechanism to clean open FDs other than + * stdio. We don't want O_CLOEXEC for the pipes... + */ + for (i = 4; i < 8192; i++) + close(i); + + /* + * Read from the startup pipe, there should be no data + * and we should block until it is closed. We just + * carry on on error since this isn't super critical. + */ + ret = read(3, &i, sizeof(i)); + if (ret < 0) + fprintf(stderr, "read(startp pipe) failed: %s (%d)\n", + strerror(errno), errno); + if (ret > 0) + fprintf(stderr, "%d bytes of data on startup pipe\n", + ret); + close(3); + + ret = execl("gcs-stress-thread", "gcs-stress-thread", NULL); + fprintf(stderr, "execl(gcs-stress-thread) failed: %d (%s)\n", + errno, strerror(errno)); + + exit(EXIT_FAILURE); + } else { + /* + * In parent, remember the child and close our copy of the + * write side of stdout. + */ + close(pipefd[1]); + child->stdout = pipefd[0]; + child->output = NULL; + child->exited = false; + child->output_seen = false; + + ev.events = EPOLLIN | EPOLLHUP; + ev.data.ptr = child; + + ret = asprintf(&child->name, "Thread-%d", id); + if (ret == -1) + ksft_exit_fail_msg("asprintf() failed\n"); + + ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev); + if (ret < 0) { + ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n", + child->name, strerror(errno), errno); + } + } + + ksft_print_msg("Started %s\n", child->name); + num_children++; +} + +static bool child_output_read(struct child_data *child) +{ + char read_data[1024]; + char work[1024]; + int ret, len, cur_work, cur_read; + + ret = read(child->stdout, read_data, sizeof(read_data)); + if (ret < 0) { + if (errno == EINTR) + return true; + + ksft_print_msg("%s: read() failed: %s (%d)\n", + child->name, strerror(errno), + errno); + return false; + } + len = ret; + + child->output_seen = true; + + /* Pick up any partial read */ + if (child->output) { + strncpy(work, child->output, sizeof(work) - 1); + cur_work = strnlen(work, sizeof(work)); + free(child->output); + child->output = NULL; + } else { + cur_work = 0; + } + + cur_read = 0; + while (cur_read < len) { + work[cur_work] = read_data[cur_read++]; + + if (work[cur_work] == '\n') { + work[cur_work] = '\0'; + ksft_print_msg("%s: %s\n", child->name, work); + cur_work = 0; + } else { + cur_work++; + } + } + + if (cur_work) { + work[cur_work] = '\0'; + ret = asprintf(&child->output, "%s", work); + if (ret == -1) + ksft_exit_fail_msg("Out of memory\n"); + } + + return false; +} + +static void child_output(struct child_data *child, uint32_t events, + bool flush) +{ + bool read_more; + + if (events & EPOLLIN) { + do { + read_more = child_output_read(child); + } while (read_more); + } + + if (events & EPOLLHUP) { + close(child->stdout); + child->stdout = -1; + flush = true; + } + + if (flush && child->output) { + ksft_print_msg("%s: %s<EOF>\n", child->name, child->output); + free(child->output); + child->output = NULL; + } +} + +static void child_tickle(struct child_data *child) +{ + if (child->output_seen && !child->exited) + kill(child->pid, SIGUSR1); +} + +static void child_stop(struct child_data *child) +{ + if (!child->exited) + kill(child->pid, SIGTERM); +} + +static void child_cleanup(struct child_data *child) +{ + pid_t ret; + int status; + bool fail = false; + + if (!child->exited) { + do { + ret = waitpid(child->pid, &status, 0); + if (ret == -1 && errno == EINTR) + continue; + + if (ret == -1) { + ksft_print_msg("waitpid(%d) failed: %s (%d)\n", + child->pid, strerror(errno), + errno); + fail = true; + break; + } + + if (WIFEXITED(status)) { + child->exit_status = WEXITSTATUS(status); + child->exited = true; + } + + if (WIFSIGNALED(status)) { + child->exit_signal = WTERMSIG(status); + ksft_print_msg("%s: Exited due to signal %d\n", + child->name, child->exit_signal); + fail = true; + child->exited = true; + } + } while (!child->exited); + } + + if (!child->output_seen) { + ksft_print_msg("%s no output seen\n", child->name); + fail = true; + } + + if (child->exit_status != 0) { + ksft_print_msg("%s exited with error code %d\n", + child->name, child->exit_status); + fail = true; + } + + ksft_test_result(!fail, "%s\n", child->name); +} + +static void handle_child_signal(int sig, siginfo_t *info, void *context) +{ + int i; + bool found = false; + + for (i = 0; i < num_children; i++) { + if (children[i].pid == info->si_pid) { + children[i].exited = true; + children[i].exit_status = info->si_status; + found = true; + break; + } + } + + if (!found) + ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n", + info->si_pid, info->si_status); +} + +static void handle_exit_signal(int sig, siginfo_t *info, void *context) +{ + int i; + + /* If we're already exiting then don't signal again */ + if (terminate) + return; + + ksft_print_msg("Got signal, exiting...\n"); + + terminate = true; + + /* + * This should be redundant, the main loop should clean up + * after us, but for safety stop everything we can here. + */ + for (i = 0; i < num_children; i++) + child_stop(&children[i]); +} + +/* Handle any pending output without blocking */ +static void drain_output(bool flush) +{ + int ret = 1; + int i; + + while (ret > 0) { + ret = epoll_wait(epoll_fd, evs, tests, 0); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_print_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + for (i = 0; i < ret; i++) + child_output(evs[i].data.ptr, evs[i].events, flush); + } +} + +static const struct option options[] = { + { "timeout", required_argument, NULL, 't' }, + { } +}; + +int main(int argc, char **argv) +{ + int seen_children; + bool all_children_started = false; + int gcs_threads; + int timeout = 10; + int ret, cpus, i, c; + struct sigaction sa; + + while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) { + switch (c) { + case 't': + ret = sscanf(optarg, "%d", &timeout); + if (ret != 1) + ksft_exit_fail_msg("Failed to parse timeout %s\n", + optarg); + break; + default: + ksft_exit_fail_msg("Unknown argument\n"); + } + } + + cpus = num_processors(); + tests = 0; + + if (getauxval(AT_HWCAP) & HWCAP_GCS) { + /* One extra thread, trying to trigger migrations */ + gcs_threads = cpus + 1; + tests += gcs_threads; + } else { + gcs_threads = 0; + } + + ksft_print_header(); + ksft_set_plan(tests); + + ksft_print_msg("%d CPUs, %d GCS threads\n", + cpus, gcs_threads); + + if (!tests) + ksft_exit_skip("No tests scheduled\n"); + + if (timeout > 0) + ksft_print_msg("Will run for %ds\n", timeout); + else + ksft_print_msg("Will run until terminated\n"); + + children = calloc(sizeof(*children), tests); + if (!children) + ksft_exit_fail_msg("Unable to allocate child data\n"); + + ret = epoll_create1(EPOLL_CLOEXEC); + if (ret < 0) + ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n", + strerror(errno), ret); + epoll_fd = ret; + + /* Create a pipe which children will block on before execing */ + ret = pipe(startup_pipe); + if (ret != 0) + ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n", + strerror(errno), errno); + + /* Get signal handers ready before we start any children */ + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handle_exit_signal; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + sigemptyset(&sa.sa_mask); + ret = sigaction(SIGINT, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n", + strerror(errno), errno); + ret = sigaction(SIGTERM, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n", + strerror(errno), errno); + sa.sa_sigaction = handle_child_signal; + ret = sigaction(SIGCHLD, &sa, NULL); + if (ret < 0) + ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n", + strerror(errno), errno); + + evs = calloc(tests, sizeof(*evs)); + if (!evs) + ksft_exit_fail_msg("Failed to allocated %d epoll events\n", + tests); + + for (i = 0; i < gcs_threads; i++) + start_thread(&children[i], i); + + /* + * All children started, close the startup pipe and let them + * run. + */ + close(startup_pipe[0]); + close(startup_pipe[1]); + + timeout *= 10; + for (;;) { + /* Did we get a signal asking us to exit? */ + if (terminate) + break; + + /* + * Timeout is counted in 100ms with no output, the + * tests print during startup then are silent when + * running so this should ensure they all ran enough + * to install the signal handler, this is especially + * useful in emulation where we will both be slow and + * likely to have a large set of VLs. + */ + ret = epoll_wait(epoll_fd, evs, tests, 100); + if (ret < 0) { + if (errno == EINTR) + continue; + ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n", + strerror(errno), errno); + } + + /* Output? */ + if (ret > 0) { + for (i = 0; i < ret; i++) { + child_output(evs[i].data.ptr, evs[i].events, + false); + } + continue; + } + + /* Otherwise epoll_wait() timed out */ + + /* + * If the child processes have not produced output they + * aren't actually running the tests yet. + */ + if (!all_children_started) { + seen_children = 0; + + for (i = 0; i < num_children; i++) + if (children[i].output_seen || + children[i].exited) + seen_children++; + + if (seen_children != num_children) { + ksft_print_msg("Waiting for %d children\n", + num_children - seen_children); + continue; + } + + all_children_started = true; + } + + ksft_print_msg("Sending signals, timeout remaining: %d00ms\n", + timeout); + + for (i = 0; i < num_children; i++) + child_tickle(&children[i]); + + /* Negative timeout means run indefinitely */ + if (timeout < 0) + continue; + if (--timeout == 0) + break; + } + + ksft_print_msg("Finishing up...\n"); + terminate = true; + + for (i = 0; i < tests; i++) + child_stop(&children[i]); + + drain_output(false); + + for (i = 0; i < tests; i++) + child_cleanup(&children[i]); + + drain_output(true); + + ksft_finished(); +} diff --git a/tools/testing/selftests/arm64/gcs/gcs-util.h b/tools/testing/selftests/arm64/gcs/gcs-util.h new file mode 100644 index 000000000000..c99a6b39ac14 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcs-util.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 ARM Limited. + */ + +#ifndef GCS_UTIL_H +#define GCS_UTIL_H + +#include <stdbool.h> + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +#ifndef NT_ARM_GCS +#define NT_ARM_GCS 0x410 + +struct user_gcs { + __u64 features_enabled; + __u64 features_locked; + __u64 gcspr_el0; +}; +#endif + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define PR_SHADOW_STACK_ALL_MODES \ + PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE | PR_SHADOW_STACK_PUSH + +#define SHADOW_STACK_SET_TOKEN (1ULL << 0) /* Set up a restore token in the shadow stack */ +#define SHADOW_STACK_SET_MARKER (1ULL << 1) /* Set up a top of stack merker in the shadow stack */ + +#define GCS_CAP_ADDR_MASK (0xfffffffffffff000UL) +#define GCS_CAP_TOKEN_MASK (0x0000000000000fffUL) +#define GCS_CAP_VALID_TOKEN 1 +#define GCS_CAP_IN_PROGRESS_TOKEN 5 + +#define GCS_CAP(x) (((unsigned long)(x) & GCS_CAP_ADDR_MASK) | \ + GCS_CAP_VALID_TOKEN) + +static inline unsigned long *get_gcspr(void) +{ + unsigned long *gcspr; + + asm volatile( + "mrs %0, S3_3_C2_C5_1" + : "=r" (gcspr) + : + : "cc"); + + return gcspr; +} + +static inline void __attribute__((always_inline)) gcsss1(unsigned long *Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static inline unsigned long __attribute__((always_inline)) *gcsss2(void) +{ + unsigned long *Xt; + + asm volatile( + "SYSL %0, #3, C7, C7, #3\n" + : "=r" (Xt) + : + : "memory"); + + return Xt; +} + +static inline bool chkfeat_gcs(void) +{ + register long val __asm__ ("x16") = 1; + + /* CHKFEAT x16 */ + asm volatile( + "hint #0x28\n" + : "=r" (val) + : "r" (val)); + + return val != 1; +} + +#endif diff --git a/tools/testing/selftests/arm64/gcs/gcspushm.S b/tools/testing/selftests/arm64/gcs/gcspushm.S new file mode 100644 index 000000000000..bbe17c1325ac --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcspushm.S @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS push permissions then use them + +#include <asm/unistd.h> + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_PUSH + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with push permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + sys #3, c7, c7, #0, x0 // GCSPUSHM + sysl x0, #3, c7, c7, #1 // GCSPOPM + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/gcsstr.S b/tools/testing/selftests/arm64/gcs/gcsstr.S new file mode 100644 index 000000000000..a42bba6e30b1 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/gcsstr.S @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-only +// +// Copyright 2024 Arm Limited +// +// Give ourselves GCS write permissions then use them + +#include <asm/unistd.h> + +/* Shadow Stack/Guarded Control Stack interface */ +#define PR_GET_SHADOW_STACK_STATUS 74 +#define PR_SET_SHADOW_STACK_STATUS 75 +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +#define GCSPR_EL0 S3_3_C2_C5_1 + +#define KSFT_SKIP 4 + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction +.globl putc + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction +.globl puts + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", @progbits, 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +.globl _start +function _start + // Run with GCS + mov x0, PR_SET_SHADOW_STACK_STATUS + mov x1, PR_SHADOW_STACK_ENABLE | PR_SHADOW_STACK_WRITE + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x8, #__NR_prctl + svc #0 + cbz x0, 1f + puts "Failed to enable GCS with write permission\n" + mov x0, #KSFT_SKIP + b 2f +1: + mrs x0, GCSPR_EL0 + sub x0, x0, #8 + .inst 0xd91f1c01 // GCSSTR x1, x0 + + mov x0, #0 +2: + mov x8, #__NR_exit + svc #0 diff --git a/tools/testing/selftests/arm64/gcs/libc-gcs.c b/tools/testing/selftests/arm64/gcs/libc-gcs.c new file mode 100644 index 000000000000..17b2fabfec38 --- /dev/null +++ b/tools/testing/selftests/arm64/gcs/libc-gcs.c @@ -0,0 +1,728 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2023 ARM Limited. + */ + +#define _GNU_SOURCE + +#include <pthread.h> +#include <stdbool.h> + +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/uio.h> + +#include <asm/hwcap.h> +#include <asm/mman.h> + +#include <linux/compiler.h> + +#include "kselftest_harness.h" + +#include "gcs-util.h" + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("x8") = (num); \ + register long _arg1 __asm__ ("x0") = (long)(arg1); \ + register long _arg2 __asm__ ("x1") = (long)(arg2); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +static noinline void gcs_recurse(int depth) +{ + if (depth) + gcs_recurse(depth - 1); + + /* Prevent tail call optimization so we actually recurse */ + asm volatile("dsb sy" : : : "memory"); +} + +/* Smoke test that a function call and return works*/ +TEST(can_call_function) +{ + gcs_recurse(0); +} + +static void *gcs_test_thread(void *arg) +{ + int ret; + unsigned long mode; + + /* + * Some libcs don't seem to fill unused arguments with 0 but + * the kernel validates this so we supply all 5 arguments. + */ + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &mode, 0, 0, 0); + if (ret != 0) { + ksft_print_msg("PR_GET_SHADOW_STACK_STATUS failed: %d\n", ret); + return NULL; + } + + if (!(mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in thread, mode is %lu\n", + mode); + return NULL; + } + + /* Just in case... */ + gcs_recurse(0); + + /* Use a non-NULL value to indicate a pass */ + return &gcs_test_thread; +} + +/* Verify that if we start a new thread it has GCS enabled */ +TEST(gcs_enabled_thread) +{ + pthread_t thread; + void *thread_ret; + int ret; + + ret = pthread_create(&thread, NULL, gcs_test_thread, NULL); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ret = pthread_join(thread, &thread_ret); + ASSERT_TRUE(ret == 0); + if (ret != 0) + return; + + ASSERT_TRUE(thread_ret != NULL); +} + +/* Read the GCS until we find the terminator */ +TEST(gcs_find_terminator) +{ + unsigned long *gcs, *cur; + + gcs = get_gcspr(); + cur = gcs; + while (*cur) + cur++; + + ksft_print_msg("GCS in use from %p-%p\n", gcs, cur); + + /* + * We should have at least whatever called into this test so + * the two pointer should differ. + */ + ASSERT_TRUE(gcs != cur); +} + +/* + * We can access a GCS via ptrace + * + * This could usefully have a fixture but note that each test is + * fork()ed into a new child whcih causes issues. Might be better to + * lift at least some of this out into a separate, non-harness, test + * program. + */ +TEST(ptrace_read_write) +{ + pid_t child, pid; + int ret, status; + siginfo_t si; + uint64_t val, rval, gcspr; + struct user_gcs child_gcs; + struct iovec iov, local_iov, remote_iov; + + child = fork(); + if (child == -1) { + ksft_print_msg("fork() failed: %d (%s)\n", + errno, strerror(errno)); + ASSERT_NE(child, -1); + } + + if (child == 0) { + /* + * In child, make sure there's something on the stack and + * ask to be traced. + */ + gcs_recurse(0); + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME %s", + strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP) %s", + strerror(errno)); + + return; + } + + ksft_print_msg("Child: %d\n", child); + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + ksft_print_msg("wait() failed: %s", + strerror(errno)); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_print_msg("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) { + ASSERT_NE(errno, ESRCH); + return; + } + + ksft_print_msg("PTRACE_CONT: %s\n", strerror(errno)); + goto error; + } + } + + /* Where is the child GCS? */ + iov.iov_base = &child_gcs; + iov.iov_len = sizeof(child_gcs); + ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_GCS, &iov); + if (ret != 0) { + ksft_print_msg("Failed to read child GCS state: %s (%d)\n", + strerror(errno), errno); + goto error; + } + + /* We should have inherited GCS over fork(), confirm */ + if (!(child_gcs.features_enabled & PR_SHADOW_STACK_ENABLE)) { + ASSERT_TRUE(child_gcs.features_enabled & + PR_SHADOW_STACK_ENABLE); + goto error; + } + + gcspr = child_gcs.gcspr_el0; + ksft_print_msg("Child GCSPR 0x%lx, flags %llx, locked %llx\n", + gcspr, child_gcs.features_enabled, + child_gcs.features_locked); + + /* Ideally we'd cross check with the child memory map */ + + errno = 0; + val = ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL); + ret = errno; + if (ret != 0) + ksft_print_msg("PTRACE_PEEKDATA failed: %s (%d)\n", + strerror(ret), ret); + EXPECT_EQ(ret, 0); + + /* The child should be in a function, the GCSPR shouldn't be 0 */ + EXPECT_NE(val, 0); + + /* Same thing via process_vm_readv() */ + local_iov.iov_base = &rval; + local_iov.iov_len = sizeof(rval); + remote_iov.iov_base = (void *)gcspr; + remote_iov.iov_len = sizeof(rval); + ret = process_vm_readv(child, &local_iov, 1, &remote_iov, 1, 0); + if (ret == -1) + ksft_print_msg("process_vm_readv() failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, sizeof(rval)); + EXPECT_EQ(val, rval); + + /* Write data via a peek */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, NULL); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(0, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* Restore what we had before */ + ret = ptrace(PTRACE_POKEDATA, child, (void *)gcspr, val); + if (ret == -1) + ksft_print_msg("PTRACE_POKEDATA failed: %s (%d)\n", + strerror(errno), errno); + EXPECT_EQ(ret, 0); + EXPECT_EQ(val, ptrace(PTRACE_PEEKDATA, child, (void *)gcspr, NULL)); + + /* That's all, folks */ + kill(child, SIGKILL); + return; + +error: + kill(child, SIGKILL); + ASSERT_FALSE(true); +} + +FIXTURE(map_gcs) +{ + unsigned long *stack; +}; + +FIXTURE_VARIANT(map_gcs) +{ + size_t stack_size; + unsigned long flags; +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_cap) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k_marker) +{ + .stack_size = 2 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s2k) +{ + .stack_size = 2 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k_cap) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s3k_marker) +{ + .stack_size = 4 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s4k) +{ + .stack_size = 4 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_cap) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k_marker) +{ + .stack_size = 16 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s16k) +{ + .stack_size = 16 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_cap) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k_marker) +{ + .stack_size = 64 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s64k) +{ + .stack_size = 64 * 1024, + .flags = 0, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_cap) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_TOKEN, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k_marker) +{ + .stack_size = 128 * 1024, + .flags = SHADOW_STACK_SET_MARKER, +}; + +FIXTURE_VARIANT_ADD(map_gcs, s128k) +{ + .stack_size = 128 * 1024, + .flags = 0, +}; + +FIXTURE_SETUP(map_gcs) +{ + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, + variant->flags); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + variant->stack_size); +} + +FIXTURE_TEARDOWN(map_gcs) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, variant->stack_size); + ASSERT_EQ(ret, 0); + } +} + +/* The stack has a cap token */ +TEST_F(map_gcs, stack_capped) +{ + unsigned long *stack = self->stack; + size_t cap_index; + + cap_index = (variant->stack_size / sizeof(unsigned long)); + + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + + ASSERT_EQ(stack[cap_index], GCS_CAP(&stack[cap_index])); +} + +/* The top of the stack is 0 */ +TEST_F(map_gcs, stack_terminated) +{ + unsigned long *stack = self->stack; + size_t term_index; + + if (!(variant->flags & SHADOW_STACK_SET_MARKER)) + return; + + term_index = (variant->stack_size / sizeof(unsigned long)) - 1; + + ASSERT_EQ(stack[term_index], 0); +} + +/* Writes should fault */ +TEST_F_SIGNAL(map_gcs, not_writeable, SIGSEGV) +{ + self->stack[0] = 0; +} + +/* Put it all together, we can safely switch to and from the stack */ +TEST_F(map_gcs, stack_switch) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test */ + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + get_gcspr(), orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* We should be able to use all but 2 slots of the new stack */ + ksft_print_msg("Recursing %zu levels\n", cap_index - 1); + gcs_recurse(cap_index - 1); + + /* Pivot back to the original GCS */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + + gcs_recurse(0); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +/* We fault if we try to go beyond the end of the stack */ +TEST_F_SIGNAL(map_gcs, stack_overflow, SIGSEGV) +{ + size_t cap_index; + cap_index = (variant->stack_size / sizeof(unsigned long)); + unsigned long *orig_gcspr_el0, *pivot_gcspr_el0; + + /* Skip over the stack terminator and point at the cap */ + switch (variant->flags & (SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN)) { + case SHADOW_STACK_SET_MARKER | SHADOW_STACK_SET_TOKEN: + cap_index -= 2; + break; + case SHADOW_STACK_SET_TOKEN: + cap_index -= 1; + break; + case SHADOW_STACK_SET_MARKER: + case 0: + /* No cap, no test but we need to SEGV to avoid a false fail */ + orig_gcspr_el0 = get_gcspr(); + *orig_gcspr_el0 = 0; + return; + } + pivot_gcspr_el0 = &self->stack[cap_index]; + + /* Pivot to the new GCS */ + ksft_print_msg("Pivoting to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, get_gcspr(), + *pivot_gcspr_el0); + gcsss1(pivot_gcspr_el0); + orig_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted to %p from %p, target has value 0x%lx\n", + pivot_gcspr_el0, orig_gcspr_el0, + *pivot_gcspr_el0); + + ksft_print_msg("Pivoted, GCSPR_EL0 now %p\n", get_gcspr()); + + /* New GCS must be in the new buffer */ + ASSERT_TRUE((unsigned long)get_gcspr() > (unsigned long)self->stack); + ASSERT_TRUE((unsigned long)get_gcspr() <= + (unsigned long)self->stack + variant->stack_size); + + /* Now try to recurse, we should fault doing this. */ + ksft_print_msg("Recursing %zu levels...\n", cap_index + 1); + gcs_recurse(cap_index + 1); + ksft_print_msg("...done\n"); + + /* Clean up properly to try to guard against spurious passes. */ + gcsss1(orig_gcspr_el0); + pivot_gcspr_el0 = gcsss2(); + ksft_print_msg("Pivoted back to GCSPR_EL0 0x%p\n", get_gcspr()); +} + +FIXTURE(map_invalid_gcs) +{ +}; + +FIXTURE_VARIANT(map_invalid_gcs) +{ + size_t stack_size; +}; + +FIXTURE_SETUP(map_invalid_gcs) +{ +} + +FIXTURE_TEARDOWN(map_invalid_gcs) +{ +} + +/* GCS must be larger than 16 bytes */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, too_small) +{ + .stack_size = 8, +}; + +/* GCS size must be 16 byte aligned */ +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_1) { .stack_size = 1024 + 1 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_2) { .stack_size = 1024 + 2 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_3) { .stack_size = 1024 + 3 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_4) { .stack_size = 1024 + 4 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_5) { .stack_size = 1024 + 5 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_6) { .stack_size = 1024 + 6 }; +FIXTURE_VARIANT_ADD(map_invalid_gcs, unligned_7) { .stack_size = 1024 + 7 }; + +TEST_F(map_invalid_gcs, do_map) +{ + void *stack; + + stack = (void *)syscall(__NR_map_shadow_stack, 0, + variant->stack_size, 0); + ASSERT_TRUE(stack == MAP_FAILED); + if (stack != MAP_FAILED) + munmap(stack, variant->stack_size); +} + +FIXTURE(invalid_mprotect) +{ + unsigned long *stack; + size_t stack_size; +}; + +FIXTURE_VARIANT(invalid_mprotect) +{ + unsigned long flags; +}; + +FIXTURE_SETUP(invalid_mprotect) +{ + self->stack_size = sysconf(_SC_PAGE_SIZE); + self->stack = (void *)syscall(__NR_map_shadow_stack, 0, + self->stack_size, 0); + ASSERT_FALSE(self->stack == MAP_FAILED); + ksft_print_msg("Allocated stack from %p-%p\n", self->stack, + self->stack + self->stack_size); +} + +FIXTURE_TEARDOWN(invalid_mprotect) +{ + int ret; + + if (self->stack != MAP_FAILED) { + ret = munmap(self->stack, self->stack_size); + ASSERT_EQ(ret, 0); + } +} + +FIXTURE_VARIANT_ADD(invalid_mprotect, exec) +{ + .flags = PROT_EXEC, +}; + +TEST_F(invalid_mprotect, do_map) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, variant->flags); + ASSERT_EQ(ret, -1); +} + +TEST_F(invalid_mprotect, do_map_read) +{ + int ret; + + ret = mprotect(self->stack, self->stack_size, + variant->flags | PROT_READ); + ASSERT_EQ(ret, -1); +} + +int main(int argc, char **argv) +{ + unsigned long gcs_mode; + int ret; + + if (!(getauxval(AT_HWCAP) & HWCAP_GCS)) + ksft_exit_skip("SKIP GCS not supported\n"); + + /* + * Force shadow stacks on, our tests *should* be fine with or + * without libc support and with or without this having ended + * up tagged for GCS and enabled by the dynamic linker. We + * can't use the libc prctl() function since we can't return + * from enabling the stack. + */ + ret = my_syscall2(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, &gcs_mode); + if (ret) { + ksft_print_msg("Failed to read GCS state: %d\n", ret); + return EXIT_FAILURE; + } + + if (!(gcs_mode & PR_SHADOW_STACK_ENABLE)) { + gcs_mode = PR_SHADOW_STACK_ENABLE; + ret = my_syscall2(__NR_prctl, PR_SET_SHADOW_STACK_STATUS, + gcs_mode); + if (ret) { + ksft_print_msg("Failed to configure GCS: %d\n", ret); + return EXIT_FAILURE; + } + } + + /* Avoid returning in case libc doesn't understand GCS */ + exit(test_harness_run(argc, argv)); +} diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 1dbbbd47dd50..2ee7f114d7fa 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -91,7 +91,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } @@ -189,7 +189,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, for (j = 0; j < sizes[i]; j++) { if (ptr[j] != '1') { err = true; - ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%p\n", j, ptr); break; } diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c new file mode 100644 index 000000000000..303260a6dc65 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Ampere Computing LLC + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static unsigned long default_huge_page_size(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return 0; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + return hps; +} + +static bool is_hugetlb_allocated(void) +{ + unsigned long hps = 0; + char *line = NULL; + size_t linelen = 0; + FILE *f = fopen("/proc/meminfo", "r"); + + if (!f) + return false; + while (getline(&line, &linelen, f) > 0) { + if (sscanf(line, "Hugetlb: %lu kB", &hps) == 1) { + hps <<= 10; + break; + } + } + + free(line); + fclose(f); + + if (hps > 0) + return true; + + return false; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void allocate_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 2); +} + +static void free_hugetlb() +{ + write_sysfs("/proc/sys/vm/nr_hugepages", 0); +} + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0; i < size; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag (%d) mismatch\n", i); + fault = 1; + goto check_child_tag_inheritance_err; + } + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)map_ptr, map_size); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)map_ptr, map_size); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, map_size, mode, tag_check); + mte_clear_tags((void *)ptr, map_size); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *map_ptr; + int prot_flag, result; + unsigned long map_size; + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + map_size = default_huge_page_size(); + map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(map_ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, + 0, 0); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(map_ptr, map_size, mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)map_ptr, map_size, mem_type, 0, 0); + if (result != KSFT_PASS) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int result; + unsigned long map_size; + + map_size = default_huge_page_size(); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, + 0, 0); + if (check_allocated_memory_range(ptr, map_size, mem_type, + 0, 0) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, map_size, mode); + mte_free_memory_tag_range((void *)ptr, map_size, mem_type, 0, 0); + if (result == KSFT_FAIL) + return result; + + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + allocate_hugetlb(); + + if (!is_hugetlb_allocated()) { + ksft_print_msg("ERR: Unable allocate hugetlb pages\n"); + return KSFT_FAIL; + } + + /* Set test plan */ + ksft_set_plan(12); + + mte_enable_pstate_tco(); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_OFF), + "Check hugetlb memory with private mapping, no error mode, mmap memory and tag check off\n"); + + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB, TAG_CHECK_ON), + "Check hugetlb memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_hugetlb_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE | MAP_HUGETLB), + "Check child hugetlb memory with private mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + free_hugetlb(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index f139a33a43ef..4c89e9538ca0 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -85,7 +85,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", - (ret & PR_MTE_TCF_MASK), mask); + (ret & (int)PR_MTE_TCF_MASK), mask); ksft_test_result_fail("%s\n", name); } } diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index 2b1425b92b69..a3d1e23fe02a 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -65,7 +65,7 @@ static int check_single_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%x\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAG(tag)); result = KSFT_FAIL; @@ -97,7 +97,7 @@ static int check_multiple_included_tags(int mem_type, int mode) ptr = mte_insert_tags(ptr, BUFFER_SIZE); /* Check tag value */ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { - ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + ksft_print_msg("FAIL: wrong tag = 0x%lx with include mask=0x%lx\n", MT_FETCH_TAG((uintptr_t)ptr), MT_INCLUDE_VALID_TAGS(excl_mask)); result = KSFT_FAIL; diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index 00ffd34c66d3..a1dc2fe5285b 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -38,7 +38,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) if (cur_mte_cxt.trig_si_code == si->si_code) cur_mte_cxt.fault_valid = true; else - ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n", + ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=%llx, fault addr=%lx\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr); return; @@ -64,7 +64,7 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) exit(1); } } else if (signum == SIGBUS) { - ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && @@ -100,7 +100,7 @@ void *mte_insert_tags(void *ptr, size_t size) int align_size; if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return NULL; } align_size = MT_ALIGN_UP(size); @@ -112,7 +112,7 @@ void *mte_insert_tags(void *ptr, size_t size) void mte_clear_tags(void *ptr, size_t size) { if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { - ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + ksft_print_msg("FAIL: Addr=%p: invalid\n", ptr); return; } size = MT_ALIGN_UP(size); @@ -150,13 +150,13 @@ static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, map_flag |= MAP_PRIVATE; ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); if (ptr == MAP_FAILED) { - ksft_print_msg("FAIL: mmap allocation\n"); + ksft_perror("mmap()"); return NULL; } if (mem_type == USE_MPROTECT) { if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + ksft_perror("mprotect(PROT_MTE)"); munmap(ptr, size); - ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); return NULL; } } @@ -190,13 +190,13 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) { if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, size - index) != size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); @@ -217,12 +217,12 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, lseek(fd, 0, SEEK_SET); for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } index -= INIT_BUFFER_SIZE; if (write(fd, buffer, map_size - index) != map_size - index) { - perror("initialising buffer"); + ksft_perror("initialising buffer"); return NULL; } return __mte_allocate_memory_range(size, mem_type, mapping, range_before, @@ -319,10 +319,9 @@ int mte_default_setup(void) unsigned long en = 0; int ret; - if (!(hwcaps2 & HWCAP2_MTE)) { - ksft_print_msg("SKIP: MTE features unavailable\n"); - return KSFT_SKIP; - } + if (!(hwcaps2 & HWCAP2_MTE)) + ksft_exit_skip("MTE features unavailable\n"); + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -359,7 +358,7 @@ int create_temp_file(void) /* Create a file in the tmpfs filesystem */ fd = mkstemp(&filename[0]); if (fd == -1) { - perror(filename); + ksft_perror(filename); ksft_print_msg("FAIL: Unable to open temporary file\n"); return 0; } diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index 2d3e71724e55..a0017a303beb 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -77,13 +77,13 @@ static inline void evaluate_test(int err, const char *msg) { switch (err) { case KSFT_PASS: - ksft_test_result_pass(msg); + ksft_test_result_pass("%s", msg); break; case KSFT_FAIL: - ksft_test_result_fail(msg); + ksft_test_result_fail("%s", msg); break; case KSFT_SKIP: - ksft_test_result_skip(msg); + ksft_test_result_skip("%s", msg); break; default: ksft_test_result_error("Unknown return code %d from %s", diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile index 72e290b0b10c..b5a1c80e0ead 100644 --- a/tools/testing/selftests/arm64/pauth/Makefile +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -7,8 +7,14 @@ CC := $(CROSS_COMPILE)gcc endif CFLAGS += -mbranch-protection=pac-ret + +# All supported LLVMs have PAC, test for GCC +ifeq ($(LLVM),1) +pauth_cc_support := 1 +else # check if the compiler supports ARMv8.3 and branch protection with PAuth pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) +endif ifeq ($(pauth_cc_support),1) TEST_GEN_PROGS := pac diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c index b743daa772f5..6d21b2fc758d 100644 --- a/tools/testing/selftests/arm64/pauth/pac.c +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -13,7 +13,7 @@ #include "../../kselftest_harness.h" #include "helper.h" -#define PAC_COLLISION_ATTEMPTS 10 +#define PAC_COLLISION_ATTEMPTS 1000 /* * The kernel sets TBID by default. So bits 55 and above should remain * untouched no matter what. @@ -182,6 +182,9 @@ int exec_sign_all(struct signatures *signed_vals, size_t val) return -1; } + close(new_stdin[1]); + close(new_stdout[0]); + return 0; } diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore index b2f2bfd5c6aa..b257db665a35 100644 --- a/tools/testing/selftests/arm64/signal/.gitignore +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -3,6 +3,7 @@ mangle_* fake_sigreturn_* fpmr_* poe_* +gcs_* sme_* ssve_* sve_* diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index edb3613513b8..1381039fb36f 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -2,7 +2,7 @@ # Copyright (C) 2019 ARM Limited # Additional include paths needed by kselftest.h and local headers -CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. +CFLAGS += -std=gnu99 -I. SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) PROGS := $(patsubst %.c,%,$(SRCS)) diff --git a/tools/testing/selftests/arm64/signal/sve_helpers.h b/tools/testing/selftests/arm64/signal/sve_helpers.h index 50948ce471cc..ca133b93375f 100644 --- a/tools/testing/selftests/arm64/signal/sve_helpers.h +++ b/tools/testing/selftests/arm64/signal/sve_helpers.h @@ -18,4 +18,17 @@ extern unsigned int nvls; int sve_fill_vls(bool use_sme, int min_vls); +static inline uint64_t get_svcr(void) +{ + uint64_t val; + + asm volatile ( + "mrs %0, S3_3_C4_C2_2\n" + : "=r"(val) + : + : "cc"); + + return val; +} + #endif diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index 00051b40d71e..1304c8ec0f2f 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -7,6 +7,10 @@ * Each test provides its own tde struct tdescr descriptor to link with * this wrapper. Framework provides common helpers. */ + +#include <sys/auxv.h> +#include <sys/prctl.h> + #include <kselftest.h> #include "test_signals.h" @@ -16,6 +20,16 @@ struct tdescr *current = &tde; int main(int argc, char *argv[]) { + /* + * Ensure GCS is at least enabled throughout the tests if + * supported, otherwise the inability to return from the + * function that enabled GCS makes it very inconvenient to set + * up test cases. The prctl() may fail if GCS was locked by + * libc setup code. + */ + if (getauxval(AT_HWCAP) & HWCAP_GCS) + gcs_set_state(PR_SHADOW_STACK_ENABLE); + ksft_print_msg("%s :: %s\n", current->name, current->descr); if (test_setup(current) && test_init(current)) { test_run(current); @@ -23,5 +37,6 @@ int main(int argc, char *argv[]) } test_result(current); - return current->result; + /* Do not return in case GCS was enabled */ + exit(current->result); } diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index 1e6273d81575..ee75a2c25ce7 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -35,6 +35,7 @@ enum { FSME_BIT, FSME_FA64_BIT, FSME2_BIT, + FGCS_BIT, FMAX_END }; @@ -43,6 +44,7 @@ enum { #define FEAT_SME (1UL << FSME_BIT) #define FEAT_SME_FA64 (1UL << FSME_FA64_BIT) #define FEAT_SME2 (1UL << FSME2_BIT) +#define FEAT_GCS (1UL << FGCS_BIT) /* * A descriptor used to describe and configure a test case. @@ -69,6 +71,10 @@ struct tdescr { * Zero when no signal is expected on success */ int sig_ok; + /* + * expected si_code for sig_ok, or 0 to not check + */ + int sig_ok_code; /* signum expected on unsupported CPU features. */ int sig_unsupp; /* a timeout in second for test completion */ diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 0dc948db3a4a..5d3621921cfe 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -30,6 +30,7 @@ static char const *const feats_names[FMAX_END] = { " SME ", " FA64 ", " SME2 ", + " GCS ", }; #define MAX_FEATS_SZ 128 @@ -142,16 +143,25 @@ static bool handle_signal_ok(struct tdescr *td, "current->token ZEROED...test is probably broken!\n"); abort(); } - /* - * Trying to narrow down the SEGV to the ones generated by Kernel itself - * via arm64_notify_segfault(). This is a best-effort check anyway, and - * the si_code check may need to change if this aspect of the kernel - * ABI changes. - */ - if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { - fprintf(stdout, - "si_code != SEGV_ACCERR...test is probably broken!\n"); - abort(); + if (td->sig_ok_code) { + if (si->si_code != td->sig_ok_code) { + fprintf(stdout, "si_code is %d not %d\n", + si->si_code, td->sig_ok_code); + abort(); + } + } else { + /* + * Trying to narrow down the SEGV to the ones + * generated by Kernel itself via + * arm64_notify_segfault(). This is a best-effort + * check anyway, and the si_code check may need to + * change if this aspect of the kernel ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } } td->pass = 1; /* @@ -329,6 +339,8 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SME_FA64; if (getauxval(AT_HWCAP2) & HWCAP2_SME2) td->feats_supported |= FEAT_SME2; + if (getauxval(AT_HWCAP) & HWCAP_GCS) + td->feats_supported |= FEAT_GCS; if (feats_ok(td)) { if (td->feats_required & td->feats_supported) fprintf(stderr, diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 762c8fe9c54a..36fc12b3cd60 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -6,6 +6,7 @@ #include <assert.h> #include <stdio.h> +#include <stdint.h> #include <string.h> #include <linux/compiler.h> @@ -18,6 +19,44 @@ void test_cleanup(struct tdescr *td); int test_run(struct tdescr *td); void test_result(struct tdescr *td); +#ifndef __NR_prctl +#define __NR_prctl 167 +#endif + +/* + * The prctl takes 1 argument but we need to ensure that the other + * values passed in registers to the syscall are zero since the kernel + * validates them. + */ +#define gcs_set_state(state) \ + ({ \ + register long _num __asm__ ("x8") = __NR_prctl; \ + register long _arg1 __asm__ ("x0") = PR_SET_SHADOW_STACK_STATUS; \ + register long _arg2 __asm__ ("x1") = (long)(state); \ + register long _arg3 __asm__ ("x2") = 0; \ + register long _arg4 __asm__ ("x3") = 0; \ + register long _arg5 __asm__ ("x4") = 0; \ + \ + __asm__ volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ + }) + +static inline __attribute__((always_inline)) uint64_t get_gcspr_el0(void) +{ + uint64_t val; + + asm volatile("mrs %0, S3_3_C2_C5_1" : "=r" (val)); + + return val; +} + static inline bool feats_ok(struct tdescr *td) { if (td->feats_incompatible & td->feats_supported) diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c new file mode 100644 index 000000000000..6228448b2ae7 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_exception_fault.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +/* + * We should get this from asm/siginfo.h but the testsuite is being + * clever with redefining siginfo_t. + */ +#ifndef SEGV_CPERR +#define SEGV_CPERR 10 +#endif + +static inline void gcsss1(uint64_t Xt) +{ + asm volatile ( + "sys #3, C7, C7, #2, %0\n" + : + : "rZ" (Xt) + : "memory"); +} + +static int gcs_op_fault_trigger(struct tdescr *td) +{ + /* + * The slot below our current GCS should be in a valid GCS but + * must not have a valid cap in it. + */ + gcsss1(get_gcspr_el0() - 8); + + return 0; +} + +static int gcs_op_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + +struct tdescr tde = { + .name = "Invalid GCS operation", + .descr = "An invalid GCS operation generates the expected signal", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sig_ok_code = SEGV_CPERR, + .sanity_disabled = true, + .trigger = gcs_op_fault_trigger, + .run = gcs_op_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c new file mode 100644 index 000000000000..b405d82321da --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_frame.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <signal.h> +#include <ucontext.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static union { + ucontext_t uc; + char buf[1024 * 64]; +} context; + +static int gcs_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc) +{ + size_t offset; + struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context); + struct gcs_context *gcs; + unsigned long expected, gcspr; + uint64_t *u64_val; + int ret; + + ret = prctl(PR_GET_SHADOW_STACK_STATUS, &expected, 0, 0, 0); + if (ret != 0) { + fprintf(stderr, "Unable to query GCS status\n"); + return 1; + } + + /* We expect a cap to be added to the GCS in the signal frame */ + gcspr = get_gcspr_el0(); + gcspr -= 8; + fprintf(stderr, "Expecting GCSPR_EL0 %lx\n", gcspr); + + if (!get_current_context(td, &context.uc, sizeof(context))) { + fprintf(stderr, "Failed getting context\n"); + return 1; + } + + /* Ensure that the signal restore token was consumed */ + u64_val = (uint64_t *)get_gcspr_el0() + 1; + if (*u64_val) { + fprintf(stderr, "GCS value at %p is %lx not 0\n", + u64_val, *u64_val); + return 1; + } + + fprintf(stderr, "Got context\n"); + + head = get_header(head, GCS_MAGIC, GET_BUF_RESV_SIZE(context), + &offset); + if (!head) { + fprintf(stderr, "No GCS context\n"); + return 1; + } + + gcs = (struct gcs_context *)head; + + /* Basic size validation is done in get_current_context() */ + + if (gcs->features_enabled != expected) { + fprintf(stderr, "Features enabled %llx but expected %lx\n", + gcs->features_enabled, expected); + return 1; + } + + if (gcs->gcspr != gcspr) { + fprintf(stderr, "Got GCSPR %llx but expected %lx\n", + gcs->gcspr, gcspr); + return 1; + } + + fprintf(stderr, "GCS context validated\n"); + td->pass = 1; + + return 0; +} + +struct tdescr tde = { + .name = "GCS basics", + .descr = "Validate a GCS signal context", + .feats_required = FEAT_GCS, + .timeout = 3, + .run = gcs_regs, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c new file mode 100644 index 000000000000..faeabb18c4b2 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/gcs_write_fault.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2023 ARM Limited + */ + +#include <errno.h> +#include <signal.h> +#include <unistd.h> + +#include <sys/mman.h> +#include <sys/prctl.h> + +#include "test_signals_utils.h" +#include "testcases.h" + +static uint64_t *gcs_page; + +#ifndef __NR_map_shadow_stack +#define __NR_map_shadow_stack 453 +#endif + +static bool alloc_gcs(struct tdescr *td) +{ + long page_size = sysconf(_SC_PAGE_SIZE); + + gcs_page = (void *)syscall(__NR_map_shadow_stack, 0, + page_size, 0); + if (gcs_page == MAP_FAILED) { + fprintf(stderr, "Failed to map %ld byte GCS: %d\n", + page_size, errno); + return false; + } + + return true; +} + +static int gcs_write_fault_trigger(struct tdescr *td) +{ + /* Verify that the page is readable (ie, not completely unmapped) */ + fprintf(stderr, "Read value 0x%lx\n", gcs_page[0]); + + /* A regular write should trigger a fault */ + gcs_page[0] = EINVAL; + + return 0; +} + +static int gcs_write_fault_signal(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + return 1; +} + + +struct tdescr tde = { + .name = "GCS write fault", + .descr = "Normal writes to a GCS segfault", + .feats_required = FEAT_GCS, + .timeout = 3, + .sig_ok = SIGSEGV, + .sanity_disabled = true, + .init = alloc_gcs, + .trigger = gcs_write_fault_trigger, + .run = gcs_write_fault_signal, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c index 6dbe48cf8b09..1dbca9afb13c 100644 --- a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c @@ -85,6 +85,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, fprintf(stderr, "Got expected size %u and VL %d\n", head->size, ssve->vl); + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e6daa94fcd2e..0c1a6b26afac 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -198,6 +198,13 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) *err = "Bad size for fpmr_context"; new_flags |= FPMR_CTX; break; + case GCS_MAGIC: + if (flags & GCS_CTX) + *err = "Multiple GCS_MAGIC"; + if (head->size != sizeof(struct gcs_context)) + *err = "Bad size for gcs_context"; + new_flags |= GCS_CTX; + break; case EXTRA_MAGIC: if (flags & EXTRA_CTX) *err = "Multiple EXTRA_MAGIC"; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 9872b8912714..98b97efdda23 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -20,6 +20,7 @@ #define EXTRA_CTX (1 << 3) #define ZT_CTX (1 << 4) #define FPMR_CTX (1 << 5) +#define GCS_CTX (1 << 6) #define KSFT_BAD_MAGIC 0xdeadbeef diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c index b9e13f27f1f9..badaead5326a 100644 --- a/tools/testing/selftests/arm64/signal/testcases/za_regs.c +++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c @@ -91,6 +91,11 @@ static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc, return 1; } + if (get_svcr() != 0) { + fprintf(stderr, "Unexpected SVCR %lx\n", get_svcr()); + return 1; + } + return 0; } diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e6533b3400de..c2a1842c3d8b 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -16,7 +16,6 @@ fixdep /test_progs-cpuv4 test_verifier_log feature -test_sock urandom_read test_sockmap test_lirc_mode2_user @@ -24,7 +23,6 @@ test_flow_dissector flow_dissector_load test_tcpnotify_user test_libbpf -test_tcp_check_syncookie_user test_sysctl xdping test_cpp diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index f04af11df8eb..6ad3b1ba1920 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -10,6 +10,7 @@ TOOLSDIR := $(abspath ../../..) LIBDIR := $(TOOLSDIR)/lib BPFDIR := $(LIBDIR)/bpf TOOLSINCDIR := $(TOOLSDIR)/include +TOOLSARCHINCDIR := $(TOOLSDIR)/arch/$(SRCARCH)/include BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool APIDIR := $(TOOLSINCDIR)/uapi ifneq ($(O),) @@ -44,7 +45,7 @@ CFLAGS += -g $(OPT_FLAGS) -rdynamic \ -Wall -Werror -fno-omit-frame-pointer \ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \ - -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) + -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT) LDFLAGS += $(SAN_LDFLAGS) LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread @@ -83,7 +84,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ - test_sock test_sockmap \ + test_sockmap \ test_tcpnotify_user test_sysctl \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -132,12 +133,10 @@ TEST_PROGS := test_kmod.sh \ test_tunnel.sh \ test_lwt_seg6local.sh \ test_lirc_mode2.sh \ - test_skb_cgroup_id.sh \ test_flow_dissector.sh \ test_xdp_vlan_mode_generic.sh \ test_xdp_vlan_mode_native.sh \ test_lwt_ip_encap.sh \ - test_tcp_check_syncookie.sh \ test_tc_tunnel.sh \ test_tc_edt.sh \ test_xdping.sh \ @@ -154,10 +153,23 @@ TEST_PROGS_EXTENDED := with_addr.sh \ # Compile but not part of 'make run_tests' TEST_GEN_PROGS_EXTENDED = \ - flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ - test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \ - xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \ - xdp_features bpf_test_no_cfi.ko + bench \ + bpf_testmod.ko \ + bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko \ + bpf_test_no_cfi.ko \ + flow_dissector_load \ + runqslower \ + test_cpp \ + test_flow_dissector \ + test_lirc_mode2_user \ + veristat \ + xdp_features \ + xdp_hw_metadata \ + xdp_redirect_multi \ + xdp_synproxy \ + xdping \ + xskxceiver TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi @@ -263,7 +275,7 @@ $(OUTPUT)/%:%.c ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv)) LLD := lld else -LLD := ld +LLD := $(shell command -v $(LD)) endif # Filter out -static for liburandom_read.so and its dependent targets so that static builds @@ -273,6 +285,7 @@ $(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \ + -Wno-unused-command-line-argument \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,--version-script=liburandom_read.map \ -fPIC -shared -o $@ @@ -281,6 +294,7 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r $(call msg,BINARY,,$@) $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \ + -Wno-unused-command-line-argument \ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \ -Wl,-rpath=. -o $@ @@ -294,15 +308,36 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod + $(Q)$(MAKE) $(submake_extras) -C bpf_testmod \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' $(Q)cp bpf_testmod/bpf_testmod.ko $@ $(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch]) $(call msg,MOD,,$@) $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation - $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi + $(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@ +$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' + $(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@ + +$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch]) + $(call msg,MOD,,$@) + $(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation + $(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \ + RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) \ + EXTRA_CFLAGS='' EXTRA_LDFLAGS='' + $(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@ + + DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool ifneq ($(CROSS_COMPILE),) CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool @@ -319,8 +354,8 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT) BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ - EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' && \ cp $(RUNQSLOWER_OUTPUT)runqslower $@ TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL) @@ -335,7 +370,6 @@ JSON_WRITER := $(OUTPUT)/json_writer.o CAP_HELPERS := $(OUTPUT)/cap_helpers.o NETWORK_HELPERS := $(OUTPUT)/network_helpers.o -$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) @@ -347,14 +381,14 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS) $(OUTPUT)/test_maps: $(TESTING_HELPERS) $(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS) $(OUTPUT)/xsk.o: $(BPFOBJ) -$(OUTPUT)/test_tcp_check_syncookie_user: $(NETWORK_HELPERS) BPFTOOL ?= $(DEFAULT_BPFTOOL) $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \ @@ -365,7 +399,8 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(BPFOBJ) | $(BUILD_DIR)/bpftool $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(BUILD_DIR)/bpftool/ \ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \ @@ -388,8 +423,8 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \ - EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' \ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers ifneq ($(BPFOBJ),$(HOST_BPFOBJ)) @@ -397,7 +432,9 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(HOST_BUILD_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \ - EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \ + ARCH= CROSS_COMPILE= \ + EXTRA_CFLAGS='-g $(OPT_FLAGS) $(EXTRA_CFLAGS)' \ + EXTRA_LDFLAGS='$(EXTRA_LDFLAGS)' \ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \ CC="$(HOSTCC)" LD="$(HOSTLD)" \ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers @@ -446,6 +483,7 @@ endef IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__') MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) +BPF_TARGET_ENDIAN=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb) ifneq ($(CROSS_COMPILE),) CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) @@ -473,17 +511,17 @@ $(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h # $4 - binary name define CLANG_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v3 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v2 -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4 define CLANG_CPUV4_BPF_BUILD_RULE $(call msg,CLNG-BPF,$4,$2) - $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2 + $(Q)$(CLANG) $3 -O2 $(BPF_TARGET_ENDIAN) -c $1 -mcpu=v4 -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE @@ -623,10 +661,11 @@ $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_OUTPUT)/%: $$$$(%-deps) $(BPFTOOL) | $(TR # When the compiler generates a %.d file, only skel basenames (not # full paths) are specified as prerequisites for corresponding %.o -# file. This target makes %.skel.h basename dependent on full paths, -# linking generated %.d dependency with actual %.skel.h files. -$(notdir %.skel.h): $(TRUNNER_OUTPUT)/%.skel.h - @true +# file. vpath directives below instruct make to search for skel files +# in TRUNNER_OUTPUT, if they are not present in the working directory. +vpath %.skel.h $(TRUNNER_OUTPUT) +vpath %.lskel.h $(TRUNNER_OUTPUT) +vpath %.subskel.h $(TRUNNER_OUTPUT) endif @@ -713,6 +752,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ unpriv_helpers.c \ netlink_helpers.c \ jit_disasm_helpers.c \ + io_helpers.c \ test_loader.c \ xsk.c \ disasm.c \ @@ -722,6 +762,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c \ ip_check_defrag_frags.h TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \ $(OUTPUT)/bpf_test_no_cfi.ko \ + $(OUTPUT)/bpf_test_modorder_x.ko \ + $(OUTPUT)/bpf_test_modorder_y.ko \ $(OUTPUT)/liburandom_read.so \ $(OUTPUT)/xdp_synproxy \ $(OUTPUT)/sign-file \ @@ -856,6 +898,8 @@ EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \ $(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h \ no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \ bpf_test_no_cfi.ko \ + bpf_test_modorder_x.ko \ + bpf_test_modorder_y.ko \ liburandom_read.so) \ $(OUTPUT)/FEATURE-DUMP.selftests diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 2ed0ef6f21ee..32e9f194d449 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -4,6 +4,7 @@ #include <argp.h> #include <unistd.h> #include <stdint.h> +#include "bpf_util.h" #include "bench.h" #include "trigger_bench.skel.h" #include "trace_helpers.h" @@ -72,7 +73,7 @@ static __always_inline void inc_counter(struct counter *counters) unsigned slot; if (unlikely(tid == 0)) - tid = syscall(SYS_gettid); + tid = sys_gettid(); /* multiplicative hashing, it's fast */ slot = 2654435769U * tid; diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index b0668f29f7b3..cd8ecd39c3f3 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -582,4 +582,10 @@ extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) + +struct bpf_iter_kmem_cache; +extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym; +extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile new file mode 100644 index 000000000000..40b25b98ad1b --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile @@ -0,0 +1,19 @@ +BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_modorder_x.ko + +obj-m += bpf_test_modorder_x.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c new file mode 100644 index 000000000000..0cc747fa912f --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/module.h> +#include <linux/init.h> + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_retx(void) +{ + return 'x'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_x_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_retx); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_x_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_x_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_x_ids, +}; + +static int __init bpf_test_modorder_x_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_x_set); +} + +static void __exit bpf_test_modorder_x_exit(void) +{ +} + +module_init(bpf_test_modorder_x_init); +module_exit(bpf_test_modorder_x_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module X"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile new file mode 100644 index 000000000000..52c3ab9d84e2 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile @@ -0,0 +1,19 @@ +BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..) + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = bpf_test_modorder_y.ko + +obj-m += bpf_test_modorder_y.o + +all: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean + diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c new file mode 100644 index 000000000000..c627ee085d13 --- /dev/null +++ b/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/module.h> +#include <linux/init.h> + +__bpf_kfunc_start_defs(); + +__bpf_kfunc int bpf_test_modorder_rety(void) +{ + return 'y'; +} + +__bpf_kfunc_end_defs(); + +BTF_KFUNCS_START(bpf_test_modorder_kfunc_y_ids) +BTF_ID_FLAGS(func, bpf_test_modorder_rety); +BTF_KFUNCS_END(bpf_test_modorder_kfunc_y_ids) + +static const struct btf_kfunc_id_set bpf_test_modorder_y_set = { + .owner = THIS_MODULE, + .set = &bpf_test_modorder_kfunc_y_ids, +}; + +static int __init bpf_test_modorder_y_init(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, + &bpf_test_modorder_y_set); +} + +static void __exit bpf_test_modorder_y_exit(void) +{ +} + +module_init(bpf_test_modorder_y_init); +module_exit(bpf_test_modorder_y_exit); + +MODULE_DESCRIPTION("BPF selftest ordertest module Y"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h index 6c3b4d4f173a..aeef86b3da74 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h @@ -40,6 +40,14 @@ DECLARE_TRACE(bpf_testmod_test_nullable_bare, TP_ARGS(ctx__nullable) ); +struct sk_buff; + +DECLARE_TRACE(bpf_testmod_test_raw_tp_null, + TP_PROTO(struct sk_buff *skb), + TP_ARGS(skb) +); + + #undef BPF_TESTMOD_DECLARE_TRACE #ifdef DECLARE_TRACE_WRITABLE #define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \ diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 8835761d9a12..cc9dde507aba 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -245,6 +245,39 @@ __bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) call_rcu(&ctx->rcu, testmod_free_cb); } +static struct bpf_testmod_ops3 *st_ops3; + +static int bpf_testmod_test_3(void) +{ + return 0; +} + +static int bpf_testmod_test_4(void) +{ + return 0; +} + +static struct bpf_testmod_ops3 __bpf_testmod_ops3 = { + .test_1 = bpf_testmod_test_3, + .test_2 = bpf_testmod_test_4, +}; + +static void bpf_testmod_test_struct_ops3(void) +{ + if (st_ops3) + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_1(void) +{ + st_ops3->test_1(); +} + +__bpf_kfunc void bpf_testmod_ops3_call_test_2(void) +{ + st_ops3->test_2(); +} + struct bpf_testmod_btf_type_tag_1 { int a; }; @@ -380,6 +413,10 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2); + (void)trace_bpf_testmod_test_raw_tp_null(NULL); + + bpf_testmod_test_struct_ops3(); + struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) + sizeof(int)), GFP_KERNEL); if (struct_arg3 != NULL) { @@ -461,7 +498,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = { static int uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func, - struct pt_regs *regs) + struct pt_regs *regs, __u64 *data) { regs->ax = 0x12345678deadbeef; @@ -584,6 +621,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU) BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL) BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1) +BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_2) BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids) BTF_ID_LIST(bpf_testmod_dtor_ids) @@ -1094,6 +1133,10 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { .is_valid_access = bpf_testmod_ops_is_valid_access, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops3 = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + static int bpf_dummy_reg(void *kdata, struct bpf_link *link) { struct bpf_testmod_ops *ops = kdata; @@ -1173,6 +1216,68 @@ struct bpf_struct_ops bpf_testmod_ops2 = { .owner = THIS_MODULE, }; +static int st_ops3_reg(void *kdata, struct bpf_link *link) +{ + int err = 0; + + mutex_lock(&st_ops_mutex); + if (st_ops3) { + pr_err("st_ops has already been registered\n"); + err = -EEXIST; + goto unlock; + } + st_ops3 = kdata; + +unlock: + mutex_unlock(&st_ops_mutex); + return err; +} + +static void st_ops3_unreg(void *kdata, struct bpf_link *link) +{ + mutex_lock(&st_ops_mutex); + st_ops3 = NULL; + mutex_unlock(&st_ops_mutex); +} + +static void test_1_recursion_detected(struct bpf_prog *prog) +{ + struct bpf_prog_stats *stats; + + stats = this_cpu_ptr(prog->stats); + printk("bpf_testmod: oh no, recursing into test_1, recursion_misses %llu", + u64_stats_read(&stats->misses)); +} + +static int st_ops3_check_member(const struct btf_type *t, + const struct btf_member *member, + const struct bpf_prog *prog) +{ + u32 moff = __btf_member_bit_offset(t, member) / 8; + + switch (moff) { + case offsetof(struct bpf_testmod_ops3, test_1): + prog->aux->priv_stack_requested = true; + prog->aux->recursion_detected = test_1_recursion_detected; + fallthrough; + default: + break; + } + return 0; +} + +struct bpf_struct_ops bpf_testmod_ops3 = { + .verifier_ops = &bpf_testmod_verifier_ops3, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = st_ops3_reg, + .unreg = st_ops3_unreg, + .check_member = st_ops3_check_member, + .cfi_stubs = &__bpf_testmod_ops3, + .name = "bpf_testmod_ops3", + .owner = THIS_MODULE, +}; + static int bpf_test_mod_st_ops__test_prologue(struct st_ops_args *args) { return 0; @@ -1331,6 +1436,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_testmod_kfunc_set); ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); + ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index fb7dff47597a..356803d1c10e 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -94,6 +94,11 @@ struct bpf_testmod_ops2 { int (*test_1)(void); }; +struct bpf_testmod_ops3 { + int (*test_1)(void); + int (*test_2)(void); +}; + struct st_ops_args { u64 a; }; diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h index 10587a29b967..5f6963a320d7 100644 --- a/tools/testing/selftests/bpf/bpf_util.h +++ b/tools/testing/selftests/bpf/bpf_util.h @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <errno.h> +#include <syscall.h> #include <bpf/libbpf.h> /* libbpf_num_possible_cpus */ static inline unsigned int bpf_num_possible_cpus(void) @@ -59,4 +60,15 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz) (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) #endif +/* Availability of gettid across glibc versions is hit-and-miss, therefore + * fallback to syscall in this macro and use it everywhere. + */ +#ifndef sys_gettid +#define sys_gettid() syscall(SYS_gettid) +#endif + +#ifndef ENOTSUPP +#define ENOTSUPP 524 +#endif + #endif /* __BPF_UTIL__ */ diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm index a9746ca78777..da543b24c144 100644 --- a/tools/testing/selftests/bpf/config.vm +++ b/tools/testing/selftests/bpf/config.vm @@ -1,12 +1,15 @@ -CONFIG_9P_FS=y CONFIG_9P_FS_POSIX_ACL=y CONFIG_9P_FS_SECURITY=y +CONFIG_9P_FS=y CONFIG_CRYPTO_DEV_VIRTIO=y -CONFIG_NET_9P=y +CONFIG_FUSE_FS=y +CONFIG_FUSE_PASSTHROUGH=y CONFIG_NET_9P_VIRTIO=y +CONFIG_NET_9P=y CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_BLK=y CONFIG_VIRTIO_CONSOLE=y +CONFIG_VIRTIO_FS=y CONFIG_VIRTIO_NET=y CONFIG_VIRTIO_PCI=y CONFIG_VIRTIO_VSOCKETS_COMMON=y diff --git a/tools/testing/selftests/bpf/io_helpers.c b/tools/testing/selftests/bpf/io_helpers.c new file mode 100644 index 000000000000..4ada0a74aa1f --- /dev/null +++ b/tools/testing/selftests/bpf/io_helpers.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <sys/select.h> +#include <unistd.h> +#include <errno.h> + +int read_with_timeout(int fd, char *buf, size_t count, long usec) +{ + const long M = 1000 * 1000; + struct timeval tv = { usec / M, usec % M }; + fd_set fds; + int err; + + FD_ZERO(&fds); + FD_SET(fd, &fds); + err = select(fd + 1, &fds, NULL, NULL, &tv); + if (err < 0) + return err; + if (FD_ISSET(fd, &fds)) + return read(fd, buf, count); + return -EAGAIN; +} diff --git a/tools/testing/selftests/bpf/io_helpers.h b/tools/testing/selftests/bpf/io_helpers.h new file mode 100644 index 000000000000..21e1134cd3ce --- /dev/null +++ b/tools/testing/selftests/bpf/io_helpers.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <unistd.h> + +/* As a regular read(2), but allows to specify a timeout in micro-seconds. + * Returns -EAGAIN on timeout. + */ +int read_with_timeout(int fd, char *buf, size_t count, long usec); diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c new file mode 100644 index 000000000000..0ba015686492 --- /dev/null +++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_get_next_key.c @@ -0,0 +1,109 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <linux/bpf.h> +#include <stdio.h> +#include <stdbool.h> +#include <unistd.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <pthread.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> + +#include <test_maps.h> + +struct test_lpm_key { + __u32 prefix; + __u32 data; +}; + +struct get_next_key_ctx { + struct test_lpm_key key; + bool start; + bool stop; + int map_fd; + int loop; +}; + +static void *get_next_key_fn(void *arg) +{ + struct get_next_key_ctx *ctx = arg; + struct test_lpm_key next_key; + int i = 0; + + while (!ctx->start) + usleep(1); + + while (!ctx->stop && i++ < ctx->loop) + bpf_map_get_next_key(ctx->map_fd, &ctx->key, &next_key); + + return NULL; +} + +static void abort_get_next_key(struct get_next_key_ctx *ctx, pthread_t *tids, + unsigned int nr) +{ + unsigned int i; + + ctx->stop = true; + ctx->start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); +} + +/* This test aims to prevent regression of future. As long as the kernel does + * not panic, it is considered as success. + */ +void test_lpm_trie_map_get_next_key(void) +{ +#define MAX_NR_THREADS 8 + LIBBPF_OPTS(bpf_map_create_opts, create_opts, + .map_flags = BPF_F_NO_PREALLOC); + struct test_lpm_key key = {}; + __u32 val = 0; + int map_fd; + const __u32 max_prefixlen = 8 * (sizeof(key) - sizeof(key.prefix)); + const __u32 max_entries = max_prefixlen + 1; + unsigned int i, nr = MAX_NR_THREADS, loop = 65536; + pthread_t tids[MAX_NR_THREADS]; + struct get_next_key_ctx ctx; + int err; + + map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map", + sizeof(struct test_lpm_key), sizeof(__u32), + max_entries, &create_opts); + CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n", + strerror(errno)); + + for (i = 0; i <= max_prefixlen; i++) { + key.prefix = i; + err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY); + CHECK(err, "bpf_map_update_elem()", "error:%s\n", + strerror(errno)); + } + + ctx.start = false; + ctx.stop = false; + ctx.map_fd = map_fd; + ctx.loop = loop; + memcpy(&ctx.key, &key, sizeof(key)); + + for (i = 0; i < nr; i++) { + err = pthread_create(&tids[i], NULL, get_next_key_fn, &ctx); + if (err) { + abort_get_next_key(&ctx, tids, i); + CHECK(err, "pthread_create", "error %d\n", err); + } + } + + ctx.start = true; + for (i = 0; i < nr; i++) + pthread_join(tids[i], NULL); + + printf("%s:PASS\n", __func__); + + close(map_fd); +} diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c index 7d050364efca..62971dbf2996 100644 --- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c +++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c @@ -12,6 +12,7 @@ #include <bpf/bpf.h> #include <bpf/libbpf.h> +#include "bpf_util.h" #include "test_maps.h" #include "task_local_storage_helpers.h" #include "read_bpf_task_storage_busy.skel.h" @@ -115,7 +116,7 @@ void test_task_storage_map_stress_lookup(void) CHECK(err, "attach", "error %d\n", err); /* Trigger program */ - syscall(SYS_gettid); + sys_gettid(); skel->bss->pid = 0; CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy); diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c72c16e1aff8..5764155b6d25 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NETWORK_HELPERS_H #define __NETWORK_HELPERS_H +#include <arpa/inet.h> #include <sys/socket.h> #include <sys/types.h> #include <linux/types.h> diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 070c52c312e5..6befa870434b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -690,7 +690,7 @@ void test_bpf_cookie(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->my_tid = syscall(SYS_gettid); + skel->bss->my_tid = sys_gettid(); if (test__start_subtest("kprobe")) kprobe_subtest(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 52e6f7570475..6f1bfacd7375 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -226,7 +226,7 @@ static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts, ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL), "pthread_create"); - skel->bss->tid = getpid(); + skel->bss->tid = sys_gettid(); do_dummy_read_opts(skel->progs.dump_task, opts); @@ -249,25 +249,42 @@ static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid"); } -static void test_task_tid(void) +static void *run_test_task_tid(void *arg) { LIBBPF_OPTS(bpf_iter_attach_opts, opts); union bpf_iter_link_info linfo; int num_unknown_tid, num_known_tid; + ASSERT_NEQ(getpid(), sys_gettid(), "check_new_thread_id"); + memset(&linfo, 0, sizeof(linfo)); - linfo.task.tid = getpid(); + linfo.task.tid = sys_gettid(); opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); test_task_common(&opts, 0, 1); linfo.task.tid = 0; linfo.task.pid = getpid(); - test_task_common(&opts, 1, 1); + /* This includes the parent thread, this thread, watchdog timer thread + * and the do_nothing_wait thread + */ + test_task_common(&opts, 3, 1); test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid); - ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid"); + ASSERT_GT(num_unknown_tid, 2, "check_num_unknown_tid"); ASSERT_EQ(num_known_tid, 1, "check_num_known_tid"); + + return NULL; +} + +static void test_task_tid(void) +{ + pthread_t thread_id; + + /* Create a new thread so pid and tid aren't the same */ + ASSERT_OK(pthread_create(&thread_id, NULL, &run_test_task_tid, NULL), + "pthread_create"); + ASSERT_FALSE(pthread_join(thread_id, NULL), "pthread_join"); } static void test_task_pid(void) @@ -280,7 +297,7 @@ static void test_task_pid(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); } static void test_task_pidfd(void) @@ -298,7 +315,7 @@ static void test_task_pidfd(void) opts.link_info = &linfo; opts.link_info_len = sizeof(linfo); - test_task_common(&opts, 1, 1); + test_task_common(&opts, 2, 1); close(pidfd); } diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c index 409a06975823..b7d1b52309d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c @@ -16,10 +16,6 @@ #include "tcp_ca_kfunc.skel.h" #include "bpf_cc_cubic.skel.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - static const unsigned int total_bytes = 10 * 1024 * 1024; static int expected_stg = 0xeB9F; diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c index ef4d6a3ae423..cf15cc3be491 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -17,32 +17,37 @@ #include "test_progs.h" #include "test_btf_skc_cls_ingress.skel.h" -static struct test_btf_skc_cls_ingress *skel; -static struct sockaddr_in6 srv_sa6; -static __u32 duration; +#define TEST_NS "skc_cls_ingress" -static int prepare_netns(void) +#define BIT(n) (1 << (n)) +#define TEST_MODE_IPV4 BIT(0) +#define TEST_MODE_IPV6 BIT(1) +#define TEST_MODE_DUAL (TEST_MODE_IPV4 | TEST_MODE_IPV6) + +#define SERVER_ADDR_IPV4 "127.0.0.1" +#define SERVER_ADDR_IPV6 "::1" +#define SERVER_ADDR_DUAL "::0" +/* RFC791, 576 for minimal IPv4 datagram, minus 40 bytes of TCP header */ +#define MIN_IPV4_MSS 536 + +static struct netns_obj *prepare_netns(struct test_btf_skc_cls_ingress *skel) { LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_attach, .prog_fd = bpf_program__fd(skel->progs.cls_ingress)); + struct netns_obj *ns = NULL; - if (CHECK(unshare(CLONE_NEWNET), "create netns", - "unshare(CLONE_NEWNET): %s (%d)", - strerror(errno), errno)) - return -1; - - if (CHECK(system("ip link set dev lo up"), - "ip link set dev lo up", "failed\n")) - return -1; + ns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(ns, "create and join netns")) + return ns; qdisc_lo.ifindex = if_nametoindex("lo"); if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) - return -1; + goto free_ns; if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), "filter add dev lo ingress")) - return -1; + goto free_ns; /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the * bpf_tcp_gen_syncookie() helper. @@ -50,71 +55,142 @@ static int prepare_netns(void) if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") || write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") || write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1")) - return -1; + goto free_ns; + + return ns; - return 0; +free_ns: + netns_free(ns); + return NULL; } -static void reset_test(void) +static void reset_test(struct test_btf_skc_cls_ingress *skel) { + memset(&skel->bss->srv_sa4, 0, sizeof(skel->bss->srv_sa4)); memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6)); skel->bss->listen_tp_sport = 0; skel->bss->req_sk_sport = 0; skel->bss->recv_cookie = 0; skel->bss->gen_cookie = 0; skel->bss->linum = 0; + skel->bss->mss = 0; } -static void print_err_line(void) +static void print_err_line(struct test_btf_skc_cls_ingress *skel) { if (skel->bss->linum) printf("bpf prog error at line %u\n", skel->bss->linum); } -static void test_conn(void) +static int v6only_true(int fd, void *opts) +{ + int mode = true; + + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); +} + +static int v6only_false(int fd, void *opts) { + int mode = false; + + return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); +} + +static void run_test(struct test_btf_skc_cls_ingress *skel, bool gen_cookies, + int ip_mode) +{ + const char *tcp_syncookies = gen_cookies ? "2" : "1"; int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; - socklen_t addrlen = sizeof(srv_sa6); + struct network_helper_opts opts = { 0 }; + struct sockaddr_storage *addr; + struct sockaddr_in6 srv_sa6; + struct sockaddr_in srv_sa4; + socklen_t addr_len; + int sock_family; + char *srv_addr; int srv_port; - if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + switch (ip_mode) { + case TEST_MODE_IPV4: + sock_family = AF_INET; + srv_addr = SERVER_ADDR_IPV4; + addr = (struct sockaddr_storage *)&srv_sa4; + addr_len = sizeof(srv_sa4); + break; + case TEST_MODE_IPV6: + opts.post_socket_cb = v6only_true; + sock_family = AF_INET6; + srv_addr = SERVER_ADDR_IPV6; + addr = (struct sockaddr_storage *)&srv_sa6; + addr_len = sizeof(srv_sa6); + break; + case TEST_MODE_DUAL: + opts.post_socket_cb = v6only_false; + sock_family = AF_INET6; + srv_addr = SERVER_ADDR_DUAL; + addr = (struct sockaddr_storage *)&srv_sa6; + addr_len = sizeof(srv_sa6); + break; + default: + PRINT_FAIL("Unknown IP mode %d", ip_mode); return; + } - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (CHECK_FAIL(listen_fd == -1)) + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", tcp_syncookies)) return; - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, - errno)) - goto done; - memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); - srv_port = ntohs(srv_sa6.sin6_port); + listen_fd = start_server_str(sock_family, SOCK_STREAM, srv_addr, 0, + &opts); + if (!ASSERT_OK_FD(listen_fd, "start server")) + return; - cli_fd = connect_to_fd(listen_fd, 0); - if (CHECK_FAIL(cli_fd == -1)) + err = getsockname(listen_fd, (struct sockaddr *)addr, &addr_len); + if (!ASSERT_OK(err, "getsockname(listen_fd)")) goto done; - srv_fd = accept(listen_fd, NULL, NULL); - if (CHECK_FAIL(srv_fd == -1)) + switch (ip_mode) { + case TEST_MODE_IPV4: + memcpy(&skel->bss->srv_sa4, &srv_sa4, sizeof(srv_sa4)); + srv_port = ntohs(srv_sa4.sin_port); + break; + case TEST_MODE_IPV6: + case TEST_MODE_DUAL: + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + break; + default: goto done; + } - if (CHECK(skel->bss->listen_tp_sport != srv_port || - skel->bss->req_sk_sport != srv_port, - "Unexpected sk src port", - "listen_tp_sport:%u req_sk_sport:%u expected:%u\n", - skel->bss->listen_tp_sport, skel->bss->req_sk_sport, - srv_port)) + cli_fd = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(cli_fd, "connect client")) goto done; - if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie, - "Unexpected syncookie states", - "gen_cookie:%u recv_cookie:%u\n", - skel->bss->gen_cookie, skel->bss->recv_cookie)) + srv_fd = accept(listen_fd, NULL, NULL); + if (!ASSERT_OK_FD(srv_fd, "accept connection")) goto done; - CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", - skel->bss->linum); + ASSERT_EQ(skel->bss->listen_tp_sport, srv_port, "listen tp src port"); + + if (!gen_cookies) { + ASSERT_EQ(skel->bss->req_sk_sport, srv_port, + "request socket source port with syncookies disabled"); + ASSERT_EQ(skel->bss->gen_cookie, 0, + "generated syncookie with syncookies disabled"); + ASSERT_EQ(skel->bss->recv_cookie, 0, + "received syncookie with syncookies disabled"); + } else { + ASSERT_EQ(skel->bss->req_sk_sport, 0, + "request socket source port with syncookies enabled"); + ASSERT_NEQ(skel->bss->gen_cookie, 0, + "syncookie properly generated"); + ASSERT_EQ(skel->bss->gen_cookie, skel->bss->recv_cookie, + "matching syncookies on client and server"); + ASSERT_GT(skel->bss->mss, MIN_IPV4_MSS, + "MSS in cookie min value"); + ASSERT_LT(skel->bss->mss, USHRT_MAX, + "MSS in cookie max value"); + } done: if (listen_fd != -1) @@ -125,96 +201,74 @@ done: close(srv_fd); } -static void test_syncookie(void) +static void test_conn_ipv4(struct test_btf_skc_cls_ingress *skel) { - int listen_fd = -1, cli_fd = -1, srv_fd = -1, err; - socklen_t addrlen = sizeof(srv_sa6); - int srv_port; - - /* Enforce syncookie mode */ - if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) - return; - - listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); - if (CHECK_FAIL(listen_fd == -1)) - return; - - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, - errno)) - goto done; - memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); - srv_port = ntohs(srv_sa6.sin6_port); - - cli_fd = connect_to_fd(listen_fd, 0); - if (CHECK_FAIL(cli_fd == -1)) - goto done; - - srv_fd = accept(listen_fd, NULL, NULL); - if (CHECK_FAIL(srv_fd == -1)) - goto done; + run_test(skel, false, TEST_MODE_IPV4); +} - if (CHECK(skel->bss->listen_tp_sport != srv_port, - "Unexpected tp src port", - "listen_tp_sport:%u expected:%u\n", - skel->bss->listen_tp_sport, srv_port)) - goto done; +static void test_conn_ipv6(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, false, TEST_MODE_IPV6); +} - if (CHECK(skel->bss->req_sk_sport, - "Unexpected req_sk src port", - "req_sk_sport:%u expected:0\n", - skel->bss->req_sk_sport)) - goto done; +static void test_conn_dual(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, false, TEST_MODE_DUAL); +} - if (CHECK(!skel->bss->gen_cookie || - skel->bss->gen_cookie != skel->bss->recv_cookie, - "Unexpected syncookie states", - "gen_cookie:%u recv_cookie:%u\n", - skel->bss->gen_cookie, skel->bss->recv_cookie)) - goto done; +static void test_syncookie_ipv4(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_IPV4); +} - CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", - skel->bss->linum); +static void test_syncookie_ipv6(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_IPV6); +} -done: - if (listen_fd != -1) - close(listen_fd); - if (cli_fd != -1) - close(cli_fd); - if (srv_fd != -1) - close(srv_fd); +static void test_syncookie_dual(struct test_btf_skc_cls_ingress *skel) +{ + run_test(skel, true, TEST_MODE_DUAL); } struct test { const char *desc; - void (*run)(void); + void (*run)(struct test_btf_skc_cls_ingress *skel); }; #define DEF_TEST(name) { #name, test_##name } static struct test tests[] = { - DEF_TEST(conn), - DEF_TEST(syncookie), + DEF_TEST(conn_ipv4), + DEF_TEST(conn_ipv6), + DEF_TEST(conn_dual), + DEF_TEST(syncookie_ipv4), + DEF_TEST(syncookie_ipv6), + DEF_TEST(syncookie_dual), }; void test_btf_skc_cls_ingress(void) { + struct test_btf_skc_cls_ingress *skel; + struct netns_obj *ns; int i; skel = test_btf_skc_cls_ingress__open_and_load(); - if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) + if (!ASSERT_OK_PTR(skel, "test_btf_skc_cls_ingress__open_and_load")) return; for (i = 0; i < ARRAY_SIZE(tests); i++) { if (!test__start_subtest(tests[i].desc)) continue; - if (prepare_netns()) + ns = prepare_netns(skel); + if (!ns) break; - tests[i].run(); + tests[i].run(skel); - print_err_line(); - reset_test(); + print_err_line(skel); + reset_test(skel); + netns_free(ns); } test_btf_skc_cls_ingress__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c index 3bff680de16c..c40df623a8f7 100644 --- a/tools/testing/selftests/bpf/prog_tests/cb_refs.c +++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c @@ -11,8 +11,8 @@ struct { const char *prog_name; const char *err_msg; } cb_refs_tests[] = { - { "underflow_prog", "reference has not been acquired before" }, - { "leak_prog", "Unreleased reference" }, + { "underflow_prog", "must point to scalar, or struct with scalar" }, + { "leak_prog", "Possibly NULL pointer passed to helper arg2" }, { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */ }; diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c index 9250a1e9f9af..3f9ffdf71343 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_ancestor.c @@ -35,7 +35,7 @@ static int send_datagram(void) if (!ASSERT_OK_FD(sock, "create socket")) return sock; - if (!ASSERT_OK(connect(sock, &addr, sizeof(addr)), "connect")) { + if (!ASSERT_OK(connect(sock, (struct sockaddr *)&addr, sizeof(addr)), "connect")) { close(sock); return -1; } diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 747761572098..9015e2c2ab12 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -63,14 +63,14 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK(err, "map_delete_elem")) goto out; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = cgrp_ls_tp_btf__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); skel->bss->target_pid = 0; @@ -154,7 +154,7 @@ static void test_recursion(int cgroup_fd) goto out; /* trigger sys_enter, make sure it does not cause deadlock */ - syscall(SYS_gettid); + sys_gettid(); out: cgrp_ls_recursion__destroy(skel); @@ -224,7 +224,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) return; CGROUP_MODE_SET(skel); - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 26019313e1fc..1c682550e0e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -1010,7 +1010,7 @@ static void run_core_reloc_tests(bool use_btfgen) struct data *data; void *mmap_data = NULL; - my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32); + my_pid_tgid = getpid() | ((uint64_t)sys_gettid() << 32); for (i = 0; i < ARRAY_SIZE(test_cases); i++) { char btf_file[] = "/tmp/core_reloc.btf.XXXXXX"; diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 2570bd4b0cb2..e58a04654238 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -23,6 +23,7 @@ static const char * const cpumask_success_testcases[] = { "test_global_mask_array_l2_rcu", "test_global_mask_nested_rcu", "test_global_mask_nested_deep_rcu", + "test_global_mask_nested_deep_array_rcu", "test_cpumask_weight", }; diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index f3932941bbaa..d50cbd8040d4 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -67,8 +67,9 @@ again: ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + ASSERT_EQ(info.perf_event.kprobe.name_len, strlen(KPROBE_FUNC) + 1, + "name_len"); if (!info.perf_event.kprobe.func_name) { - ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); info.perf_event.kprobe.name_len = sizeof(buf); goto again; @@ -79,8 +80,9 @@ again: ASSERT_EQ(err, 0, "cmp_kprobe_func_name"); break; case BPF_PERF_EVENT_TRACEPOINT: + ASSERT_EQ(info.perf_event.tracepoint.name_len, strlen(TP_NAME) + 1, + "name_len"); if (!info.perf_event.tracepoint.tp_name) { - ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len"); info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf); info.perf_event.tracepoint.name_len = sizeof(buf); goto again; @@ -96,8 +98,9 @@ again: case BPF_PERF_EVENT_URETPROBE: ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset"); + ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1, + "name_len"); if (!info.perf_event.uprobe.file_name) { - ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len"); info.perf_event.uprobe.file_name = ptr_to_u64(&buf); info.perf_event.uprobe.name_len = sizeof(buf); goto again; @@ -417,6 +420,15 @@ verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets, if (!ASSERT_NEQ(err, -1, "readlink")) return -1; + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "bpf_link_get_info_by_fd")) + return -1; + + ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count"); + ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, + "info.uprobe_multi.path_size"); + for (bit = 0; bit < 8; bit++) { memset(&info, 0, sizeof(info)); info.uprobe_multi.path = ptr_to_u64(path_buf); diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c index 89ff23c4a8bc..3cea71f9c500 100644 --- a/tools/testing/selftests/bpf/prog_tests/iters.c +++ b/tools/testing/selftests/bpf/prog_tests/iters.c @@ -192,8 +192,8 @@ static void subtest_task_iters(void) syscall(SYS_getpgid); iters_task__detach(skel); ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt"); - ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt"); - ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt"); + ASSERT_EQ(skel->bss->threads_cnt, thread_num + 2, "threads_cnt"); + ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 2, "proc_threads_cnt"); ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt"); pthread_mutex_unlock(&do_nothing_mutex); for (int i = 0; i < thread_num; i++) diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c new file mode 100644 index 000000000000..48c0560d398e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kfunc_module_order.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <testing_helpers.h> + +#include "kfunc_module_order.skel.h" + +static int test_run_prog(const struct bpf_program *prog, + struct bpf_test_run_opts *opts) +{ + int err; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), opts); + if (!ASSERT_OK(err, "bpf_prog_test_run_opts")) + return err; + + if (!ASSERT_EQ((int)opts->retval, 0, bpf_program__name(prog))) + return -EINVAL; + + return 0; +} + +void test_kfunc_module_order(void) +{ + struct kfunc_module_order *skel; + char pkt_data[64] = {}; + int err = 0; + + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, test_opts, .data_in = pkt_data, + .data_size_in = sizeof(pkt_data)); + + err = load_module("bpf_test_modorder_x.ko", + env_verbosity > VERBOSE_NONE); + if (!ASSERT_OK(err, "load bpf_test_modorder_x.ko")) + return; + + err = load_module("bpf_test_modorder_y.ko", + env_verbosity > VERBOSE_NONE); + if (!ASSERT_OK(err, "load bpf_test_modorder_y.ko")) + goto exit_modx; + + skel = kfunc_module_order__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kfunc_module_order__open_and_load()")) { + err = -EINVAL; + goto exit_mods; + } + + test_run_prog(skel->progs.call_kfunc_xy, &test_opts); + test_run_prog(skel->progs.call_kfunc_yx, &test_opts); + + kfunc_module_order__destroy(skel); +exit_mods: + unload_module("bpf_test_modorder_y", env_verbosity > VERBOSE_NONE); +exit_modx: + unload_module("bpf_test_modorder_x", env_verbosity > VERBOSE_NONE); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c new file mode 100644 index 000000000000..8e13a3416a21 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c @@ -0,0 +1,126 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "kmem_cache_iter.skel.h" + +#define SLAB_NAME_MAX 32 + +struct kmem_cache_result { + char name[SLAB_NAME_MAX]; + long obj_size; +}; + +static void subtest_kmem_cache_iter_check_task_struct(struct kmem_cache_iter *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .flags = 0, /* Run it with the current task */ + ); + int prog_fd = bpf_program__fd(skel->progs.check_task_struct); + + /* Get task_struct and check it if's from a slab cache */ + ASSERT_OK(bpf_prog_test_run_opts(prog_fd, &opts), "prog_test_run"); + + /* The BPF program should set 'found' variable */ + ASSERT_EQ(skel->bss->task_struct_found, 1, "task_struct_found"); +} + +static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel) +{ + FILE *fp; + int map_fd; + char name[SLAB_NAME_MAX]; + unsigned long objsize; + char rest_of_line[1000]; + struct kmem_cache_result r; + int seen = 0; + + fp = fopen("/proc/slabinfo", "r"); + if (fp == NULL) { + /* CONFIG_SLUB_DEBUG is not enabled */ + return; + } + + map_fd = bpf_map__fd(skel->maps.slab_result); + + /* Ignore first two lines for header */ + fscanf(fp, "slabinfo - version: %*d.%*d\n"); + fscanf(fp, "# %*s %*s %*s %*s %*s %*s : %[^\n]\n", rest_of_line); + + /* Compare name and objsize only - others can be changes frequently */ + while (fscanf(fp, "%s %*u %*u %lu %*u %*u : %[^\n]\n", + name, &objsize, rest_of_line) == 3) { + int ret = bpf_map_lookup_elem(map_fd, &seen, &r); + + if (!ASSERT_OK(ret, "kmem_cache_lookup")) + break; + + ASSERT_STREQ(r.name, name, "kmem_cache_name"); + ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize"); + + seen++; + } + + ASSERT_EQ(skel->bss->kmem_cache_seen, seen, "kmem_cache_seen_eq"); + + fclose(fp); +} + +static void subtest_kmem_cache_iter_open_coded(struct kmem_cache_iter *skel) +{ + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err, fd; + + /* No need to attach it, just run it directly */ + fd = bpf_program__fd(skel->progs.open_coded_iter); + + err = bpf_prog_test_run_opts(fd, &topts); + if (!ASSERT_OK(err, "test_run_opts err")) + return; + if (!ASSERT_OK(topts.retval, "test_run_opts retval")) + return; + + /* It should be same as we've seen from the explicit iterator */ + ASSERT_EQ(skel->bss->open_coded_seen, skel->bss->kmem_cache_seen, "open_code_seen_eq"); +} + +void test_kmem_cache_iter(void) +{ + struct kmem_cache_iter *skel = NULL; + char buf[256]; + int iter_fd; + + skel = kmem_cache_iter__open_and_load(); + if (!ASSERT_OK_PTR(skel, "kmem_cache_iter__open_and_load")) + return; + + if (!ASSERT_OK(kmem_cache_iter__attach(skel), "skel_attach")) + goto destroy; + + iter_fd = bpf_iter_create(bpf_link__fd(skel->links.slab_info_collector)); + if (!ASSERT_GE(iter_fd, 0, "iter_create")) + goto destroy; + + memset(buf, 0, sizeof(buf)); + while (read(iter_fd, buf, sizeof(buf) > 0)) { + /* Read out all contents */ + printf("%s", buf); + } + + /* Next reads should return 0 */ + ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read"); + + if (test__start_subtest("check_task_struct")) + subtest_kmem_cache_iter_check_task_struct(skel); + if (test__start_subtest("check_slabinfo")) + subtest_kmem_cache_iter_check_slabinfo(skel); + if (test__start_subtest("open_coded_iter")) + subtest_kmem_cache_iter_open_coded(skel); + + close(iter_fd); + +destroy: + kmem_cache_iter__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 960c9323d1e0..66ab1cae923e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -6,6 +6,7 @@ #include "kprobe_multi_override.skel.h" #include "kprobe_multi_session.skel.h" #include "kprobe_multi_session_cookie.skel.h" +#include "kprobe_multi_verifier.skel.h" #include "bpf/libbpf_internal.h" #include "bpf/hashmap.h" @@ -764,4 +765,5 @@ void test_kprobe_multi_test(void) test_session_skel_api(); if (test__start_subtest("session_cookie")) test_session_cookie_skel_api(); + RUN_TESTS(kprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c index cad664546912..fa639b021f7e 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c @@ -20,7 +20,7 @@ void test_linked_funcs(void) bpf_program__set_autoload(skel->progs.handler1, true); bpf_program__set_autoload(skel->progs.handler2, true); - skel->rodata->my_tid = syscall(SYS_gettid); + skel->rodata->my_tid = sys_gettid(); skel->bss->syscall_id = SYS_getpgid; err = linked_funcs__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 27676a04d0b6..169ce689b97c 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -169,7 +169,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_GE(fd, 0, "good_fd1"); if (fd >= 0) close(fd); - fd = -1; /* log_level == 2 should always fill log_buf, even for good prog */ log_buf[0] = '\0'; @@ -180,7 +179,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_GE(fd, 0, "good_fd2"); if (fd >= 0) close(fd); - fd = -1; /* log_level == 0 should fill log_buf for bad prog */ log_buf[0] = '\0'; @@ -191,7 +189,6 @@ static void bpf_prog_load_log_buf(void) ASSERT_LT(fd, 0, "bad_fd"); if (fd >= 0) close(fd); - fd = -1; free(log_buf); } diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c index 130a3b21e467..6df25de8f080 100644 --- a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c +++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c @@ -10,10 +10,6 @@ #include "cgroup_helpers.h" #include "network_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - static struct btf *btf; static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func) diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c index d2a10eb4e5b5..286a9fb469e2 100644 --- a/tools/testing/selftests/bpf/prog_tests/map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c @@ -5,7 +5,9 @@ #include <sys/syscall.h> #include <test_progs.h> #include <bpf/btf.h> + #include "access_map_in_map.skel.h" +#include "update_map_in_htab.skel.h" struct thread_ctx { pthread_barrier_t barrier; @@ -127,6 +129,131 @@ out: access_map_in_map__destroy(skel); } +static void add_del_fd_htab(int outer_fd) +{ + int inner_fd, err; + int key = 1; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* Delete */ + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); +} + +static void overwrite_fd_htab(int outer_fd) +{ + int inner_fd, err; + int key = 1; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* Overwrite */ + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner2")) + goto out; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_EXIST); + close(inner_fd); + if (!ASSERT_OK(err, "overwrite")) + goto out; + + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); + return; +out: + bpf_map_delete_elem(outer_fd, &key); +} + +static void lookup_delete_fd_htab(int outer_fd) +{ + int key = 1, value; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + err = bpf_map_update_elem(outer_fd, &key, &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add")) + return; + + /* lookup_and_delete is not supported for htab of maps */ + err = bpf_map_lookup_and_delete_elem(outer_fd, &key, &value); + ASSERT_EQ(err, -ENOTSUPP, "lookup_del"); + + err = bpf_map_delete_elem(outer_fd, &key); + ASSERT_OK(err, "del"); +} + +static void batched_lookup_delete_fd_htab(int outer_fd) +{ + int keys[2] = {1, 2}, values[2]; + unsigned int cnt, batch; + int inner_fd, err; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr1", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner1")) + return; + + err = bpf_map_update_elem(outer_fd, &keys[0], &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add1")) + return; + + inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "arr2", 4, 4, 1, NULL); + if (!ASSERT_OK_FD(inner_fd, "inner2")) + goto out; + err = bpf_map_update_elem(outer_fd, &keys[1], &inner_fd, BPF_NOEXIST); + close(inner_fd); + if (!ASSERT_OK(err, "add2")) + goto out; + + /* batched lookup_and_delete */ + cnt = ARRAY_SIZE(keys); + err = bpf_map_lookup_and_delete_batch(outer_fd, NULL, &batch, keys, values, &cnt, NULL); + ASSERT_TRUE((!err || err == -ENOENT), "delete_batch ret"); + ASSERT_EQ(cnt, ARRAY_SIZE(keys), "delete_batch cnt"); + +out: + bpf_map_delete_elem(outer_fd, &keys[0]); +} + +static void test_update_map_in_htab(bool preallocate) +{ + struct update_map_in_htab *skel; + int err, fd; + + skel = update_map_in_htab__open(); + if (!ASSERT_OK_PTR(skel, "open")) + return; + + err = update_map_in_htab__load(skel); + if (!ASSERT_OK(err, "load")) + goto out; + + fd = preallocate ? bpf_map__fd(skel->maps.outer_htab_map) : + bpf_map__fd(skel->maps.outer_alloc_htab_map); + + add_del_fd_htab(fd); + overwrite_fd_htab(fd); + lookup_delete_fd_htab(fd); + batched_lookup_delete_fd_htab(fd); +out: + update_map_in_htab__destroy(skel); +} + void test_map_in_map(void) { if (test__start_subtest("acc_map_in_array")) @@ -137,5 +264,8 @@ void test_map_in_map(void) test_map_in_map_access("access_map_in_htab", "outer_htab_map"); if (test__start_subtest("sleepable_acc_map_in_htab")) test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); + if (test__start_subtest("update_map_in_htab")) + test_update_map_in_htab(true); + if (test__start_subtest("update_map_in_alloc_htab")) + test_update_map_in_htab(false); } - diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index d2ca32fa3b21..f8eb7f9d4fd2 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -5,12 +5,17 @@ #include <linux/const.h> #include <netinet/in.h> #include <test_progs.h> +#include <unistd.h> #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" +#include "mptcp_subflow.skel.h" #define NS_TEST "mptcp_ns" +#define ADDR_1 "10.0.1.1" +#define ADDR_2 "10.0.1.2" +#define PORT_1 10001 #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 @@ -64,24 +69,6 @@ struct mptcp_storage { char ca_name[TCP_CA_NAME_MAX]; }; -static struct nstoken *create_netns(void) -{ - SYS(fail, "ip netns add %s", NS_TEST); - SYS(fail, "ip -net %s link set dev lo up", NS_TEST); - - return open_netns(NS_TEST); -fail: - return NULL; -} - -static void cleanup_netns(struct nstoken *nstoken) -{ - if (nstoken) - close_netns(nstoken); - - SYS_NOFAIL("ip netns del %s", NS_TEST); -} - static int start_mptcp_server(int family, const char *addr_str, __u16 port, int timeout_ms) { @@ -201,15 +188,15 @@ out: static void test_base(void) { - struct nstoken *nstoken = NULL; + struct netns_obj *netns = NULL; int server_fd, cgroup_fd; cgroup_fd = test__join_cgroup("/mptcp"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - nstoken = create_netns(); - if (!ASSERT_OK_PTR(nstoken, "create_netns")) + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) goto fail; /* without MPTCP */ @@ -232,7 +219,7 @@ with_mptcp: close(server_fd); fail: - cleanup_netns(nstoken); + netns_free(netns); close(cgroup_fd); } @@ -317,21 +304,135 @@ out: static void test_mptcpify(void) { - struct nstoken *nstoken = NULL; + struct netns_obj *netns = NULL; int cgroup_fd; cgroup_fd = test__join_cgroup("/mptcpify"); if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup")) return; - nstoken = create_netns(); - if (!ASSERT_OK_PTR(nstoken, "create_netns")) + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new")) goto fail; ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify"); fail: - cleanup_netns(nstoken); + netns_free(netns); + close(cgroup_fd); +} + +static int endpoint_init(char *flags) +{ + SYS(fail, "ip -net %s link add veth1 type veth peer name veth2", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth1", NS_TEST, ADDR_1); + SYS(fail, "ip -net %s link set dev veth1 up", NS_TEST); + SYS(fail, "ip -net %s addr add %s/24 dev veth2", NS_TEST, ADDR_2); + SYS(fail, "ip -net %s link set dev veth2 up", NS_TEST); + if (SYS_NOFAIL("ip -net %s mptcp endpoint add %s %s", NS_TEST, ADDR_2, flags)) { + printf("'ip mptcp' not supported, skip this test.\n"); + test__skip(); + goto fail; + } + + return 0; +fail: + return -1; +} + +static void wait_for_new_subflows(int fd) +{ + socklen_t len; + u8 subflows; + int err, i; + + len = sizeof(subflows); + /* Wait max 5 sec for new subflows to be created */ + for (i = 0; i < 50; i++) { + err = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &subflows, &len); + if (!err && subflows > 0) + break; + + usleep(100000); /* 0.1s */ + } +} + +static void run_subflow(void) +{ + int server_fd, client_fd, err; + char new[TCP_CA_NAME_MAX]; + char cc[TCP_CA_NAME_MAX]; + unsigned int mark; + socklen_t len; + + server_fd = start_mptcp_server(AF_INET, ADDR_1, PORT_1, 0); + if (!ASSERT_OK_FD(server_fd, "start_mptcp_server")) + return; + + client_fd = connect_to_fd(server_fd, 0); + if (!ASSERT_OK_FD(client_fd, "connect_to_fd")) + goto close_server; + + send_byte(client_fd); + wait_for_new_subflows(client_fd); + + len = sizeof(mark); + err = getsockopt(client_fd, SOL_SOCKET, SO_MARK, &mark, &len); + if (ASSERT_OK(err, "getsockopt(client_fd, SO_MARK)")) + ASSERT_EQ(mark, 0, "mark"); + + len = sizeof(new); + err = getsockopt(client_fd, SOL_TCP, TCP_CONGESTION, new, &len); + if (ASSERT_OK(err, "getsockopt(client_fd, TCP_CONGESTION)")) { + get_msk_ca_name(cc); + ASSERT_STREQ(new, cc, "cc"); + } + + close(client_fd); +close_server: + close(server_fd); +} + +static void test_subflow(void) +{ + struct mptcp_subflow *skel; + struct netns_obj *netns; + int cgroup_fd; + + cgroup_fd = test__join_cgroup("/mptcp_subflow"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_subflow")) + return; + + skel = mptcp_subflow__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_subflow")) + goto close_cgroup; + + skel->bss->pid = getpid(); + + skel->links.mptcp_subflow = + bpf_program__attach_cgroup(skel->progs.mptcp_subflow, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_subflow, "attach mptcp_subflow")) + goto skel_destroy; + + skel->links._getsockopt_subflow = + bpf_program__attach_cgroup(skel->progs._getsockopt_subflow, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links._getsockopt_subflow, "attach _getsockopt_subflow")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_subflow")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + run_subflow(); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_subflow__destroy(skel); +close_cgroup: close(cgroup_fd); } @@ -341,4 +442,6 @@ void test_mptcp(void) test_base(); if (test__start_subtest("mptcpify")) test_mptcpify(); + if (test__start_subtest("subflow")) + test_subflow(); } diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c index 4297a2a4cb11..2f52fa2641ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c @@ -26,10 +26,43 @@ static const struct nf_link_test nf_hook_link_tests[] = { { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", }, - { .pf = NFPROTO_IPV4, .priority = -10000, .expect_success = true, .name = "attach ipv4", }, - { .pf = NFPROTO_IPV6, .priority = 10001, .expect_success = true, .name = "attach ipv6", }, + { + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = -10000, + .flags = 0, + .expect_success = true, + .name = "attach ipv4", + }, + { + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_FORWARD, + .priority = 10001, + .flags = BPF_F_NETFILTER_IP_DEFRAG, + .expect_success = true, + .name = "attach ipv6", + }, }; +static void verify_netfilter_link_info(struct bpf_link *link, const struct nf_link_test nf_expected) +{ + struct bpf_link_info info; + __u32 len = sizeof(info); + int err, fd; + + memset(&info, 0, len); + + fd = bpf_link__fd(link); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_OK(err, "get_link_info"); + + ASSERT_EQ(info.type, BPF_LINK_TYPE_NETFILTER, "info link type"); + ASSERT_EQ(info.netfilter.pf, nf_expected.pf, "info nf protocol family"); + ASSERT_EQ(info.netfilter.hooknum, nf_expected.hooknum, "info nf hooknum"); + ASSERT_EQ(info.netfilter.priority, nf_expected.priority, "info nf priority"); + ASSERT_EQ(info.netfilter.flags, nf_expected.flags, "info nf flags"); +} + void test_netfilter_link_attach(void) { struct test_netfilter_link_attach *skel; @@ -64,6 +97,8 @@ void test_netfilter_link_attach(void) if (!ASSERT_OK_PTR(link, "program attach successful")) continue; + verify_netfilter_link_info(link, nf_hook_link_tests[i]); + link2 = bpf_program__attach_netfilter(prog, &opts); ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority"); @@ -73,6 +108,9 @@ void test_netfilter_link_attach(void) link2 = bpf_program__attach_netfilter(prog, &opts); if (!ASSERT_OK_PTR(link2, "program reattach successful")) continue; + + verify_netfilter_link_info(link2, nf_hook_link_tests[i]); + if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy")) break; } else { diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c index 71d8f3ba7d6b..ac3c3c097c0e 100644 --- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c @@ -8,12 +8,16 @@ #define SO_NETNS_COOKIE 71 #endif +#define loopback 1 + static int duration; void test_netns_cookie(void) { + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); int server_fd = -1, client_fd = -1, cgroup_fd = -1; - int err, val, ret, map, verdict; + int err, val, ret, map, verdict, tc_fd; struct netns_cookie_prog *skel; uint64_t cookie_expected_value; socklen_t vallen = sizeof(cookie_expected_value); @@ -38,36 +42,47 @@ void test_netns_cookie(void) if (!ASSERT_OK(err, "prog_attach")) goto done; + tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx); + err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta); + if (!ASSERT_OK(err, "prog_attach")) + goto done; + server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno)) - goto done; + goto cleanup_tc; client_fd = connect_to_fd(server_fd, 0); if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno)) - goto done; + goto cleanup_tc; ret = send(client_fd, send_msg, sizeof(send_msg), 0); if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret)) - goto done; + goto cleanup_tc; err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)")) - goto done; + goto cleanup_tc; err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE, &cookie_expected_value, &vallen); if (!ASSERT_OK(err, "getsockopt")) - goto done; + goto cleanup_tc; ASSERT_EQ(val, cookie_expected_value, "cookie_value"); err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies), &client_fd, &val); if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)")) - goto done; + goto cleanup_tc; ASSERT_EQ(val, cookie_expected_value, "cookie_value"); + ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value"); + ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value"); + +cleanup_tc: + err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd); + ASSERT_OK(err, "prog_detach"); done: if (server_fd != -1) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c index c29787e092d6..761ce24bce38 100644 --- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c +++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c @@ -23,7 +23,7 @@ static int get_pid_tgid(pid_t *pid, pid_t *tgid, struct stat st; int err; - *pid = syscall(SYS_gettid); + *pid = sys_gettid(); *tgid = getpid(); err = stat("/proc/self/ns/pid", &st); diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c index 3a25f1c743a1..d940ff87fa08 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_link.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c @@ -4,8 +4,12 @@ #include <pthread.h> #include <sched.h> #include <test_progs.h> +#include "testing_helpers.h" #include "test_perf_link.skel.h" +#define BURN_TIMEOUT_MS 100 +#define BURN_TIMEOUT_NS BURN_TIMEOUT_MS * 1000000 + static void burn_cpu(void) { volatile int j = 0; @@ -32,6 +36,7 @@ void serial_test_perf_link(void) int run_cnt_before, run_cnt_after; struct bpf_link_info info; __u32 info_len = sizeof(info); + __u64 timeout_time_ns; /* create perf event */ memset(&attr, 0, sizeof(attr)); @@ -63,8 +68,14 @@ void serial_test_perf_link(void) ASSERT_GT(info.prog_id, 0, "link_prog_id"); /* ensure we get at least one perf_event prog execution */ - burn_cpu(); - ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt"); + timeout_time_ns = get_time_ns() + BURN_TIMEOUT_NS; + while (true) { + burn_cpu(); + if (skel->bss->run_cnt > 0) + break; + if (!ASSERT_LT(get_time_ns(), timeout_time_ns, "run_cnt_timeout")) + break; + } /* perf_event is still active, but we close link and BPF program * shouldn't be executed anymore diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c new file mode 100644 index 000000000000..6fa19449297e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "raw_tp_null.skel.h" + +void test_raw_tp_null(void) +{ + struct raw_tp_null *skel; + + skel = raw_tp_null__open_and_load(); + if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load")) + return; + + skel->bss->tid = sys_gettid(); + + if (!ASSERT_OK(raw_tp_null__attach(skel), "raw_tp_null__attach")) + goto end; + + ASSERT_OK(trigger_module_test_read(2), "trigger testmod read"); + ASSERT_EQ(skel->bss->i, 3, "invocations"); + +end: + raw_tp_null__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c index a1f7e7378a64..ebe0c12b5536 100644 --- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c @@ -21,7 +21,7 @@ static void test_success(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.get_cgroup_id, true); bpf_program__set_autoload(skel->progs.task_succ, true); @@ -58,7 +58,7 @@ static void test_rcuptr_acquire(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); bpf_program__set_autoload(skel->progs.task_acquire, true); err = rcu_read_lock__load(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c index 6cc69900b310..1702aa592c2c 100644 --- a/tools/testing/selftests/bpf/prog_tests/send_signal.c +++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c @@ -3,22 +3,32 @@ #include <sys/time.h> #include <sys/resource.h> #include "test_send_signal_kern.skel.h" +#include "io_helpers.h" static int sigusr1_received; static void sigusr1_handler(int signum) { - sigusr1_received = 1; + sigusr1_received = 8; +} + +static void sigusr1_siginfo_handler(int s, siginfo_t *i, void *v) +{ + sigusr1_received = (int)(long long)i->si_value.sival_ptr; } static void test_send_signal_common(struct perf_event_attr *attr, - bool signal_thread) + bool signal_thread, bool remote) { struct test_send_signal_kern *skel; + struct sigaction sa; int pipe_c2p[2], pipe_p2c[2]; int err = -1, pmu_fd = -1; + volatile int j = 0; + int retry_count; char buf[256]; pid_t pid; + int old_prio; if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p")) return; @@ -39,11 +49,14 @@ static void test_send_signal_common(struct perf_event_attr *attr, } if (pid == 0) { - int old_prio; - volatile int j = 0; - /* install signal handler and notify parent */ - ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal"); + if (remote) { + sa.sa_sigaction = sigusr1_siginfo_handler; + sa.sa_flags = SA_RESTART | SA_SIGINFO; + ASSERT_NEQ(sigaction(SIGUSR1, &sa, NULL), -1, "sigaction"); + } else { + ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal"); + } close(pipe_c2p[0]); /* close read */ close(pipe_p2c[1]); /* close write */ @@ -52,10 +65,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, * that if an interrupt happens, the underlying task * is this process. */ - errno = 0; - old_prio = getpriority(PRIO_PROCESS, 0); - ASSERT_OK(errno, "getpriority"); - ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + if (!remote) { + errno = 0; + old_prio = getpriority(PRIO_PROCESS, 0); + ASSERT_OK(errno, "getpriority"); + ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + } /* notify parent signal handler is installed */ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); @@ -66,20 +81,25 @@ static void test_send_signal_common(struct perf_event_attr *attr, /* wait a little for signal handler */ for (int i = 0; i < 1000000000 && !sigusr1_received; i++) { j /= i + j + 1; - if (!attr) - /* trigger the nanosleep tracepoint program. */ - usleep(1); + if (remote) + sleep(1); + else + if (!attr) + /* trigger the nanosleep tracepoint program. */ + usleep(1); } - buf[0] = sigusr1_received ? '2' : '0'; - ASSERT_EQ(sigusr1_received, 1, "sigusr1_received"); + buf[0] = sigusr1_received; + + ASSERT_EQ(sigusr1_received, 8, "sigusr1_received"); ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write"); /* wait for parent notification and exit */ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read"); /* restore the old priority */ - ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); + if (!remote) + ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); close(pipe_c2p[1]); close(pipe_p2c[0]); @@ -93,6 +113,17 @@ static void test_send_signal_common(struct perf_event_attr *attr, if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) goto skel_open_load_failure; + /* boost with a high priority so we got a higher chance + * that if an interrupt happens, the underlying task + * is this process. + */ + if (remote) { + errno = 0; + old_prio = getpriority(PRIO_PROCESS, 0); + ASSERT_OK(errno, "getpriority"); + ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority"); + } + if (!attr) { err = test_send_signal_kern__attach(skel); if (!ASSERT_OK(err, "skel_attach")) { @@ -100,8 +131,12 @@ static void test_send_signal_common(struct perf_event_attr *attr, goto destroy_skel; } } else { - pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */, - -1 /* group id */, 0 /* flags */); + if (!remote) + pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */, + -1 /* group id */, 0 /* flags */); + else + pmu_fd = syscall(__NR_perf_event_open, attr, getpid(), -1 /* cpu */, + -1 /* group id */, 0 /* flags */); if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) { err = -1; goto destroy_skel; @@ -119,13 +154,36 @@ static void test_send_signal_common(struct perf_event_attr *attr, /* trigger the bpf send_signal */ skel->bss->signal_thread = signal_thread; skel->bss->sig = SIGUSR1; - skel->bss->pid = pid; + if (!remote) { + skel->bss->target_pid = 0; + skel->bss->pid = pid; + } else { + skel->bss->target_pid = pid; + skel->bss->pid = getpid(); + } /* notify child that bpf program can send_signal now */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); - /* wait for result */ - err = read(pipe_c2p[0], buf, 1); + for (retry_count = 0;;) { + /* For the remote test, the BPF program is triggered from this + * process but the other process/thread is signaled. + */ + if (remote) { + if (!attr) { + for (int i = 0; i < 10; i++) + usleep(1); + } else { + for (int i = 0; i < 100000000; i++) + j /= i + 1; + } + } + /* wait for result */ + err = read_with_timeout(pipe_c2p[0], buf, 1, 100); + if (err == -EAGAIN && retry_count++ < 10000) + continue; + break; + } if (!ASSERT_GE(err, 0, "reading pipe")) goto disable_pmu; if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) { @@ -133,7 +191,7 @@ static void test_send_signal_common(struct perf_event_attr *attr, goto disable_pmu; } - ASSERT_EQ(buf[0], '2', "incorrect result"); + ASSERT_EQ(buf[0], 8, "incorrect result"); /* notify child safe to exit */ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write"); @@ -142,18 +200,21 @@ disable_pmu: close(pmu_fd); destroy_skel: test_send_signal_kern__destroy(skel); + /* restore the old priority */ + if (remote) + ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority"); skel_open_load_failure: close(pipe_c2p[0]); close(pipe_p2c[1]); wait(NULL); } -static void test_send_signal_tracepoint(bool signal_thread) +static void test_send_signal_tracepoint(bool signal_thread, bool remote) { - test_send_signal_common(NULL, signal_thread); + test_send_signal_common(NULL, signal_thread, remote); } -static void test_send_signal_perf(bool signal_thread) +static void test_send_signal_perf(bool signal_thread, bool remote) { struct perf_event_attr attr = { .freq = 1, @@ -162,13 +223,14 @@ static void test_send_signal_perf(bool signal_thread) .config = PERF_COUNT_SW_CPU_CLOCK, }; - test_send_signal_common(&attr, signal_thread); + test_send_signal_common(&attr, signal_thread, remote); } -static void test_send_signal_nmi(bool signal_thread) +static void test_send_signal_nmi(bool signal_thread, bool remote) { struct perf_event_attr attr = { - .sample_period = 1, + .freq = 1, + .sample_freq = 1000, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; @@ -191,21 +253,35 @@ static void test_send_signal_nmi(bool signal_thread) close(pmu_fd); } - test_send_signal_common(&attr, signal_thread); + test_send_signal_common(&attr, signal_thread, remote); } void test_send_signal(void) { if (test__start_subtest("send_signal_tracepoint")) - test_send_signal_tracepoint(false); + test_send_signal_tracepoint(false, false); if (test__start_subtest("send_signal_perf")) - test_send_signal_perf(false); + test_send_signal_perf(false, false); if (test__start_subtest("send_signal_nmi")) - test_send_signal_nmi(false); + test_send_signal_nmi(false, false); if (test__start_subtest("send_signal_tracepoint_thread")) - test_send_signal_tracepoint(true); + test_send_signal_tracepoint(true, false); if (test__start_subtest("send_signal_perf_thread")) - test_send_signal_perf(true); + test_send_signal_perf(true, false); if (test__start_subtest("send_signal_nmi_thread")) - test_send_signal_nmi(true); + test_send_signal_nmi(true, false); + + /* Signal remote thread and thread group */ + if (test__start_subtest("send_signal_tracepoint_remote")) + test_send_signal_tracepoint(false, true); + if (test__start_subtest("send_signal_perf_remote")) + test_send_signal_perf(false, true); + if (test__start_subtest("send_signal_nmi_remote")) + test_send_signal_nmi(false, true); + if (test__start_subtest("send_signal_tracepoint_thread_remote")) + test_send_signal_tracepoint(true, true); + if (test__start_subtest("send_signal_perf_thread_remote")) + test_send_signal_perf(true, true); + if (test__start_subtest("send_signal_nmi_thread_remote")) + test_send_signal_nmi(true, true); } diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c index a6ee7f8d4f79..b2efabbed220 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_addr.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c @@ -23,10 +23,6 @@ #include "getpeername_unix_prog.skel.h" #include "network_helpers.h" -#ifndef ENOTSUPP -# define ENOTSUPP 524 -#endif - #define TEST_NS "sock_addr" #define TEST_IF_PREFIX "test_sock_addr" #define TEST_IPV4 "127.0.0.4" diff --git a/tools/testing/selftests/bpf/prog_tests/sock_create.c b/tools/testing/selftests/bpf/prog_tests/sock_create.c new file mode 100644 index 000000000000..187ffc5e60c4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_create.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <test_progs.h> +#include "cgroup_helpers.h" + +static char bpf_log_buf[4096]; +static bool verbose; + +enum sock_create_test_error { + OK = 0, + DENY_CREATE, +}; + +static struct sock_create_test { + const char *descr; + const struct bpf_insn insns[64]; + enum bpf_attach_type attach_type; + enum bpf_attach_type expected_attach_type; + + int domain; + int type; + int protocol; + + int optname; + int optval; + enum sock_create_test_error error; +} tests[] = { + { + .descr = "AF_INET set priority", + .insns = { + /* r3 = 123 (priority) */ + BPF_MOV64_IMM(BPF_REG_3, 123), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, priority)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_PRIORITY, + .optval = 123, + }, + { + .descr = "AF_INET6 set priority", + .insns = { + /* r3 = 123 (priority) */ + BPF_MOV64_IMM(BPF_REG_3, 123), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, priority)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_PRIORITY, + .optval = 123, + }, + { + .descr = "AF_INET set mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* get uid of process */ + BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark(666), else use uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 666), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_MARK, + .optval = 666, + }, + { + .descr = "AF_INET6 set mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + + /* get uid of process */ + BPF_EMIT_CALL(BPF_FUNC_get_current_uid_gid), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffffffff), + + /* if uid is 0, use given mark(666), else use uid as the mark */ + BPF_MOV64_REG(BPF_REG_3, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_MOV64_IMM(BPF_REG_3, 666), + + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, mark)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_MARK, + .optval = 666, + }, + { + .descr = "AF_INET bound to iface", + .insns = { + /* r3 = 1 (lo interface) */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, bound_dev_if)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + + .optname = SO_BINDTOIFINDEX, + .optval = 1, + }, + { + .descr = "AF_INET6 bound to iface", + .insns = { + /* r3 = 1 (lo interface) */ + BPF_MOV64_IMM(BPF_REG_3, 1), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, + offsetof(struct bpf_sock, bound_dev_if)), + + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET6, + .type = SOCK_DGRAM, + + .optname = SO_BINDTOIFINDEX, + .optval = 1, + }, + { + .descr = "block AF_INET, SOCK_DGRAM, IPPROTO_ICMP socket", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + + /* sock->family == AF_INET */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET, 5), + + /* sock->type == SOCK_DGRAM */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3), + + /* sock->protocol == IPPROTO_ICMP */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, protocol)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMP, 1), + + /* return 0 (block) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMP, + + .error = DENY_CREATE, + }, + { + .descr = "block AF_INET6, SOCK_DGRAM, IPPROTO_ICMPV6 socket", + .insns = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + + /* sock->family == AF_INET6 */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, family)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, AF_INET6, 5), + + /* sock->type == SOCK_DGRAM */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, type)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, SOCK_DGRAM, 3), + + /* sock->protocol == IPPROTO_ICMPV6 */ + BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, + offsetof(struct bpf_sock, protocol)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_2, IPPROTO_ICMPV6, 1), + + /* return 0 (block) */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_DGRAM, + .protocol = IPPROTO_ICMPV6, + + .error = DENY_CREATE, + }, + { + .descr = "load w/o expected_attach_type (compat mode)", + .insns = { + /* return 1 */ + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .expected_attach_type = 0, + .attach_type = BPF_CGROUP_INET_SOCK_CREATE, + + .domain = AF_INET, + .type = SOCK_STREAM, + }, +}; + +static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = expected_attach_type, + .log_level = 2, + .log_buf = bpf_log_buf, + .log_size = sizeof(bpf_log_buf), + ); + int fd, insns_cnt = 0; + + for (; + insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); + insns_cnt++) { + } + insns_cnt++; + + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, + insns_cnt, &opts); + if (verbose && fd < 0) + fprintf(stderr, "%s\n", bpf_log_buf); + + return fd; +} + +static int run_test(int cgroup_fd, struct sock_create_test *test) +{ + int sock_fd, err, prog_fd, optval, ret = -1; + socklen_t optlen = sizeof(optval); + + prog_fd = load_prog(test->insns, test->expected_attach_type); + if (prog_fd < 0) { + log_err("Failed to load BPF program"); + return -1; + } + + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (err < 0) { + log_err("Failed to attach BPF program"); + goto close_prog_fd; + } + + sock_fd = socket(test->domain, test->type, test->protocol); + if (sock_fd < 0) { + if (test->error == DENY_CREATE) + ret = 0; + else + log_err("Failed to create socket"); + + goto detach_prog; + } + + if (test->optname) { + err = getsockopt(sock_fd, SOL_SOCKET, test->optname, &optval, &optlen); + if (err) { + log_err("Failed to call getsockopt"); + goto cleanup; + } + + if (optval != test->optval) { + errno = 0; + log_err("getsockopt returned unexpected optval"); + goto cleanup; + } + } + + ret = test->error != OK; + +cleanup: + close(sock_fd); +detach_prog: + bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type); +close_prog_fd: + close(prog_fd); + return ret; +} + +void test_sock_create(void) +{ + int cgroup_fd, i; + + cgroup_fd = test__join_cgroup("/sock_create"); + if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup")) + return; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].descr)) + continue; + + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + } + + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c index 810c3740b2cc..788135c9c673 100644 --- a/tools/testing/selftests/bpf/test_sock.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_post_bind.c @@ -1,132 +1,35 @@ // SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook - -#include <stdio.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <sys/types.h> -#include <sys/socket.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> - +#include <linux/bpf.h> +#include <test_progs.h> #include "cgroup_helpers.h" -#include <bpf/bpf_endian.h> -#include "bpf_util.h" -#define CG_PATH "/foo" -#define MAX_INSNS 512 +#define TEST_NS "sock_post_bind" -char bpf_log_buf[BPF_LOG_BUF_SIZE]; -static bool verbose = false; +static char bpf_log_buf[4096]; -struct sock_test { - const char *descr; +static struct sock_post_bind_test { + const char *descr; /* BPF prog properties */ - struct bpf_insn insns[MAX_INSNS]; - enum bpf_attach_type expected_attach_type; - enum bpf_attach_type attach_type; + const struct bpf_insn insns[64]; + enum bpf_attach_type attach_type; + enum bpf_attach_type expected_attach_type; /* Socket properties */ - int domain; - int type; + int domain; + int type; /* Endpoint to bind() to */ const char *ip; unsigned short port; unsigned short port_retry; + /* Expected test result */ enum { - LOAD_REJECT, ATTACH_REJECT, BIND_REJECT, SUCCESS, RETRY_SUCCESS, RETRY_REJECT } result; -}; - -static struct sock_test tests[] = { - { - .descr = "bind4 load with invalid access: src_ip6", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip6[0])), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "bind4 load with invalid access: mark", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, mark)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET4_POST_BIND, - .attach_type = BPF_CGROUP_INET4_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "bind6 load with invalid access: src_ip4", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_ip4)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET6_POST_BIND, - .attach_type = BPF_CGROUP_INET6_POST_BIND, - .result = LOAD_REJECT, - }, - { - .descr = "sock_create load with invalid access: src_port", - .insns = { - BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), - BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6, - offsetof(struct bpf_sock, src_port)), - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .result = LOAD_REJECT, - }, - { - .descr = "sock_create load w/o expected_attach_type (compat mode)", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = 0, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, - }, - { - .descr = "sock_create load w/ expected_attach_type", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 1), - BPF_EXIT_INSN(), - }, - .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .attach_type = BPF_CGROUP_INET_SOCK_CREATE, - .domain = AF_INET, - .type = SOCK_STREAM, - .ip = "127.0.0.1", - .port = 8097, - .result = SUCCESS, - }, +} tests[] = { { .descr = "attach type mismatch bind4 vs bind6", .insns = { @@ -374,40 +277,29 @@ static struct sock_test tests[] = { }, }; -static size_t probe_prog_length(const struct bpf_insn *fp) -{ - size_t len; - - for (len = MAX_INSNS - 1; len > 0; --len) - if (fp[len].code != 0 || fp[len].imm != 0) - break; - return len + 1; -} - -static int load_sock_prog(const struct bpf_insn *prog, - enum bpf_attach_type attach_type) +static int load_prog(const struct bpf_insn *insns, + enum bpf_attach_type expected_attach_type) { - LIBBPF_OPTS(bpf_prog_load_opts, opts); - int ret, insn_cnt; - - insn_cnt = probe_prog_length(prog); - - opts.expected_attach_type = attach_type; - opts.log_buf = bpf_log_buf; - opts.log_size = BPF_LOG_BUF_SIZE; - opts.log_level = 2; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = expected_attach_type, + .log_level = 2, + .log_buf = bpf_log_buf, + .log_size = sizeof(bpf_log_buf), + ); + int fd, insns_cnt = 0; + + for (; + insns[insns_cnt].code != (BPF_JMP | BPF_EXIT); + insns_cnt++) { + } + insns_cnt++; - ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts); - if (verbose && ret < 0) + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, + insns_cnt, &opts); + if (fd < 0) fprintf(stderr, "%s\n", bpf_log_buf); - return ret; -} - -static int attach_sock_prog(int cgfd, int progfd, - enum bpf_attach_type attach_type) -{ - return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE); + return fd; } static int bind_sock(int domain, int type, const char *ip, @@ -477,22 +369,16 @@ out: return res; } -static int run_test_case(int cgfd, const struct sock_test *test) +static int run_test(int cgroup_fd, struct sock_post_bind_test *test) { - int progfd = -1; - int err = 0; - int res; - - printf("Test case: %s .. ", test->descr); - progfd = load_sock_prog(test->insns, test->expected_attach_type); - if (progfd < 0) { - if (test->result == LOAD_REJECT) - goto out; - else - goto err; - } + int err, prog_fd, res, ret = 0; - if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) { + prog_fd = load_prog(test->insns, test->expected_attach_type); + if (prog_fd < 0) + goto err; + + err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0); + if (err < 0) { if (test->result == ATTACH_REJECT) goto out; else @@ -503,54 +389,38 @@ static int run_test_case(int cgfd, const struct sock_test *test) test->port_retry); if (res > 0 && test->result == res) goto out; - err: - err = -1; + ret = -1; out: /* Detaching w/o checking return code: best effort attempt. */ - if (progfd != -1) - bpf_prog_detach(cgfd, test->attach_type); - close(progfd); - printf("[%s]\n", err ? "FAIL" : "PASS"); - return err; + if (prog_fd != -1) + bpf_prog_detach(cgroup_fd, test->attach_type); + close(prog_fd); + return ret; } -static int run_tests(int cgfd) +void test_sock_post_bind(void) { - int passes = 0; - int fails = 0; + struct netns_obj *ns; + int cgroup_fd; int i; - for (i = 0; i < ARRAY_SIZE(tests); ++i) { - if (run_test_case(cgfd, &tests[i])) - ++fails; - else - ++passes; - } - printf("Summary: %d PASSED, %d FAILED\n", passes, fails); - return fails ? -1 : 0; -} - -int main(int argc, char **argv) -{ - int cgfd = -1; - int err = 0; + cgroup_fd = test__join_cgroup("/post_bind"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup")) + return; - cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + ns = netns_new(TEST_NS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + goto cleanup; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].descr)) + continue; - if (run_tests(cgfd)) - goto err; + ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr); + } - goto out; -err: - err = -1; -out: - close(cgfd); - cleanup_cgroup_environment(); - return err; +cleanup: + netns_free(ns); + close(cgroup_fd); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 82bfb266741c..a2041f8e32eb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -501,6 +501,58 @@ out: test_sockmap_pass_prog__destroy(skel); } +static void test_sockmap_stream_pass(void) +{ + int zero = 0, sent, recvd; + int verdict, parser; + int err, map; + int c = -1, p = -1; + struct test_sockmap_pass_prog *pass = NULL; + char snd[256] = "0123456789"; + char rcv[256] = "0"; + + pass = test_sockmap_pass_prog__open_and_load(); + verdict = bpf_program__fd(pass->progs.prog_skb_verdict); + parser = bpf_program__fd(pass->progs.prog_skb_parser); + map = bpf_map__fd(pass->maps.sock_map_rx); + + err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream parser")) + goto out; + + err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto out; + + err = create_pair(AF_INET, SOCK_STREAM, &c, &p); + if (err) + goto out; + + /* sk_data_ready of 'p' will be replaced by strparser handler */ + err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(p)")) + goto out_close; + + /* + * as 'prog_skb_parser' return the original skb len and + * 'prog_skb_verdict' return SK_PASS, the kernel will just + * pass it through to original socket 'p' + */ + sent = xsend(c, snd, sizeof(snd), 0); + ASSERT_EQ(sent, sizeof(snd), "xsend(c)"); + + recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK, + IO_TIMEOUT_SEC); + ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)"); + +out_close: + close(c); + close(p); + +out: + test_sockmap_pass_prog__destroy(pass); +} + static void test_sockmap_skb_verdict_fionread(bool pass_prog) { int err, map, verdict, c0 = -1, c1 = -1, p0 = -1, p1 = -1; @@ -923,6 +975,8 @@ void test_sockmap_basic(void) test_sockmap_progs_query(BPF_SK_SKB_VERDICT); if (test__start_subtest("sockmap skb_verdict shutdown")) test_sockmap_skb_verdict_shutdown(); + if (test__start_subtest("sockmap stream parser and verdict pass")) + test_sockmap_stream_pass(); if (test__start_subtest("sockmap skb_verdict fionread")) test_sockmap_skb_verdict_fionread(true); if (test__start_subtest("sockmap skb_verdict fionread on drop")) diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c new file mode 100644 index 000000000000..4006879ca3fe --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_private_stack.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> +#include "struct_ops_private_stack.skel.h" +#include "struct_ops_private_stack_fail.skel.h" +#include "struct_ops_private_stack_recur.skel.h" + +static void test_private_stack(void) +{ + struct struct_ops_private_stack *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_i, 3, "val_i"); + ASSERT_EQ(skel->bss->val_j, 8, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack__destroy(skel); +} + +static void test_private_stack_fail(void) +{ + struct struct_ops_private_stack_fail *skel; + int err; + + skel = struct_ops_private_stack_fail__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_fail__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_fail__load(skel); + if (!ASSERT_ERR(err, "struct_ops_private_stack_fail__load")) + goto cleanup; + return; + +cleanup: + struct_ops_private_stack_fail__destroy(skel); +} + +static void test_private_stack_recur(void) +{ + struct struct_ops_private_stack_recur *skel; + struct bpf_link *link; + int err; + + skel = struct_ops_private_stack_recur__open(); + if (!ASSERT_OK_PTR(skel, "struct_ops_private_stack_recur__open")) + return; + + if (skel->data->skip) { + test__skip(); + goto cleanup; + } + + err = struct_ops_private_stack_recur__load(skel); + if (!ASSERT_OK(err, "struct_ops_private_stack_recur__load")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + if (!ASSERT_OK_PTR(link, "attach_struct_ops")) + goto cleanup; + + ASSERT_OK(trigger_module_test_read(256), "trigger_read"); + + ASSERT_EQ(skel->bss->val_j, 3, "val_j"); + + bpf_link__destroy(link); + +cleanup: + struct_ops_private_stack_recur__destroy(skel); +} + +void test_struct_ops_private_stack(void) +{ + if (test__start_subtest("private_stack")) + test_private_stack(); + if (test__start_subtest("private_stack_fail")) + test_private_stack_fail(); + if (test__start_subtest("private_stack_recur")) + test_private_stack_recur(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/subskeleton.c b/tools/testing/selftests/bpf/prog_tests/subskeleton.c index 9c31b7004f9c..fdf13ed0152a 100644 --- a/tools/testing/selftests/bpf/prog_tests/subskeleton.c +++ b/tools/testing/selftests/bpf/prog_tests/subskeleton.c @@ -46,7 +46,8 @@ static int subskeleton_lib_subresult(struct bpf_object *obj) return result; } -void test_subskeleton(void) +/* initialize and load through skeleton, then instantiate subskeleton out of it */ +static void subtest_skel_subskeleton(void) { int err, result; struct test_subskeleton *skel; @@ -76,3 +77,76 @@ void test_subskeleton(void) cleanup: test_subskeleton__destroy(skel); } + +/* initialize and load through generic bpf_object API, then instantiate subskeleton out of it */ +static void subtest_obj_subskeleton(void) +{ + int err, result; + const void *elf_bytes; + size_t elf_bytes_sz = 0, rodata_sz = 0, bss_sz = 0; + struct bpf_object *obj; + const struct bpf_map *map; + const struct bpf_program *prog; + struct bpf_link *link = NULL; + struct test_subskeleton__rodata *rodata; + struct test_subskeleton__bss *bss; + + elf_bytes = test_subskeleton__elf_bytes(&elf_bytes_sz); + if (!ASSERT_OK_PTR(elf_bytes, "elf_bytes")) + return; + + obj = bpf_object__open_mem(elf_bytes, elf_bytes_sz, NULL); + if (!ASSERT_OK_PTR(obj, "obj_open_mem")) + return; + + map = bpf_object__find_map_by_name(obj, ".rodata"); + if (!ASSERT_OK_PTR(map, "rodata_map_by_name")) + goto cleanup; + + rodata = bpf_map__initial_value(map, &rodata_sz); + if (!ASSERT_OK_PTR(rodata, "rodata_get")) + goto cleanup; + + rodata->rovar1 = 10; + rodata->var1 = 1; + subskeleton_lib_setup(obj); + + err = bpf_object__load(obj); + if (!ASSERT_OK(err, "obj_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(obj, "handler1"); + if (!ASSERT_OK_PTR(prog, "prog_by_name")) + goto cleanup; + + link = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(link, "prog_attach")) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + map = bpf_object__find_map_by_name(obj, ".bss"); + if (!ASSERT_OK_PTR(map, "bss_map_by_name")) + goto cleanup; + + bss = bpf_map__initial_value(map, &bss_sz); + if (!ASSERT_OK_PTR(rodata, "rodata_get")) + goto cleanup; + + result = subskeleton_lib_subresult(obj) * 10; + ASSERT_EQ(bss->out1, result, "out1"); + +cleanup: + bpf_link__destroy(link); + bpf_object__close(obj); +} + + +void test_subskeleton(void) +{ + if (test__start_subtest("skel_subskel")) + subtest_skel_subskeleton(); + if (test__start_subtest("obj_subskel")) + subtest_obj_subskeleton(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 21c5a37846ad..544144620ca6 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -7,6 +7,7 @@ #include "tailcall_bpf2bpf_hierarchy3.skel.h" #include "tailcall_freplace.skel.h" #include "tc_bpf2bpf.skel.h" +#include "tailcall_fail.skel.h" /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -1496,8 +1497,8 @@ static void test_tailcall_bpf2bpf_hierarchy_3(void) RUN_TESTS(tailcall_bpf2bpf_hierarchy3); } -/* test_tailcall_freplace checks that the attached freplace prog is OK to - * update the prog_array map. +/* test_tailcall_freplace checks that the freplace prog fails to update the + * prog_array map, no matter whether the freplace prog attaches to its target. */ static void test_tailcall_freplace(void) { @@ -1505,7 +1506,7 @@ static void test_tailcall_freplace(void) struct bpf_link *freplace_link = NULL; struct bpf_program *freplace_prog; struct tc_bpf2bpf *tc_skel = NULL; - int prog_fd, map_fd; + int prog_fd, tc_prog_fd, map_fd; char buff[128] = {}; int err, key; @@ -1523,9 +1524,10 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + tc_prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); freplace_prog = freplace_skel->progs.entry_freplace; - err = bpf_program__set_attach_target(freplace_prog, prog_fd, "subprog"); + err = bpf_program__set_attach_target(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK(err, "set_attach_target")) goto out; @@ -1533,27 +1535,121 @@ static void test_tailcall_freplace(void) if (!ASSERT_OK(err, "tailcall_freplace__load")) goto out; - freplace_link = bpf_program__attach_freplace(freplace_prog, prog_fd, - "subprog"); + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); + prog_fd = bpf_program__fd(freplace_prog); + key = 0; + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + + freplace_link = bpf_program__attach_freplace(freplace_prog, tc_prog_fd, + "subprog_tc"); if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) goto out; - map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); - prog_fd = bpf_program__fd(freplace_prog); + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + ASSERT_ERR(err, "update jmp_table failure"); + +out: + bpf_link__destroy(freplace_link); + tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +/* test_tailcall_bpf2bpf_freplace checks the failure that fails to attach a tail + * callee prog with freplace prog or fails to update an extended prog to + * prog_array map. + */ +static void test_tailcall_bpf2bpf_freplace(void) +{ + struct tailcall_freplace *freplace_skel = NULL; + struct bpf_link *freplace_link = NULL; + struct tc_bpf2bpf *tc_skel = NULL; + char buff[128] = {}; + int prog_fd, map_fd; + int err, key; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = buff, + .data_size_in = sizeof(buff), + .repeat = 1, + ); + + tc_skel = tc_bpf2bpf__open_and_load(); + if (!ASSERT_OK_PTR(tc_skel, "tc_bpf2bpf__open_and_load")) + goto out; + + prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); + freplace_skel = tailcall_freplace__open(); + if (!ASSERT_OK_PTR(freplace_skel, "tailcall_freplace__open")) + goto out; + + err = bpf_program__set_attach_target(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK(err, "set_attach_target")) + goto out; + + err = tailcall_freplace__load(freplace_skel); + if (!ASSERT_OK(err, "tailcall_freplace__load")) + goto out; + + /* OK to attach then detach freplace prog. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_link__destroy(freplace_link); + if (!ASSERT_OK(err, "destroy link")) + goto out; + + /* OK to update prog_array map then delete element from the map. */ + key = 0; + map_fd = bpf_map__fd(freplace_skel->maps.jmp_table); err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); if (!ASSERT_OK(err, "update jmp_table")) goto out; - prog_fd = bpf_program__fd(tc_skel->progs.entry_tc); - err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_EQ(topts.retval, 34, "test_run retval"); + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to attach a tail callee prog with freplace prog. */ + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_OK(err, "update jmp_table")) + goto out; + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_ERR_PTR(freplace_link, "attach_freplace failure")) + goto out; + + err = bpf_map_delete_elem(map_fd, &key); + if (!ASSERT_OK(err, "delete_elem from jmp_table")) + goto out; + + /* Fail to update an extended prog to prog_array map. */ + + freplace_link = bpf_program__attach_freplace(freplace_skel->progs.entry_freplace, + prog_fd, "subprog_tc"); + if (!ASSERT_OK_PTR(freplace_link, "attach_freplace")) + goto out; + + err = bpf_map_update_elem(map_fd, &key, &prog_fd, BPF_ANY); + if (!ASSERT_ERR(err, "update jmp_table failure")) + goto out; out: bpf_link__destroy(freplace_link); - tc_bpf2bpf__destroy(tc_skel); tailcall_freplace__destroy(freplace_skel); + tc_bpf2bpf__destroy(tc_skel); +} + +static void test_tailcall_failure() +{ + RUN_TESTS(tailcall_fail); } void test_tailcalls(void) @@ -1606,4 +1702,8 @@ void test_tailcalls(void) test_tailcall_bpf2bpf_hierarchy_3(); if (test__start_subtest("tailcall_freplace")) test_tailcall_freplace(); + if (test__start_subtest("tailcall_bpf2bpf_freplace")) + test_tailcall_bpf2bpf_freplace(); + if (test__start_subtest("tailcall_failure")) + test_tailcall_failure(); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c index d4579f735398..83b90335967a 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c +++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c @@ -68,6 +68,74 @@ cleanup: task_kfunc_success__destroy(skel); } +static int run_vpid_test(void *prog_name) +{ + struct task_kfunc_success *skel; + struct bpf_program *prog; + int prog_fd, err = 0; + + if (getpid() != 1) + return 1; + + skel = open_load_task_kfunc_skel(); + if (!skel) + return 2; + + if (skel->bss->err) { + err = 3; + goto cleanup; + } + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!prog) { + err = 4; + goto cleanup; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + err = 5; + goto cleanup; + } + + if (bpf_prog_test_run_opts(prog_fd, NULL)) { + err = 6; + goto cleanup; + } + + if (skel->bss->err) + err = 7 + skel->bss->err; +cleanup: + task_kfunc_success__destroy(skel); + return err; +} + +static void run_vpid_success_test(const char *prog_name) +{ + const int stack_size = 1024 * 1024; + int child_pid, wstatus; + char *stack; + + stack = (char *)malloc(stack_size); + if (!ASSERT_OK_PTR(stack, "clone_stack")) + return; + + child_pid = clone(run_vpid_test, stack + stack_size, + CLONE_NEWPID | SIGCHLD, (void *)prog_name); + if (!ASSERT_GT(child_pid, -1, "child_pid")) + goto cleanup; + + if (!ASSERT_GT(waitpid(child_pid, &wstatus, 0), -1, "waitpid")) + goto cleanup; + + if (WEXITSTATUS(wstatus) > 7) + ASSERT_OK(WEXITSTATUS(wstatus) - 7, "vpid_test_failure"); + else + ASSERT_OK(WEXITSTATUS(wstatus), "run_vpid_test_err"); +cleanup: + free(stack); +} + static const char * const success_tests[] = { "test_task_acquire_release_argument", "test_task_acquire_release_current", @@ -83,6 +151,11 @@ static const char * const success_tests[] = { "test_task_kfunc_flavor_relo_not_found", }; +static const char * const vpid_success_tests[] = { + "test_task_from_vpid_current", + "test_task_from_vpid_invalid", +}; + void test_task_kfunc(void) { int i; @@ -94,5 +167,12 @@ void test_task_kfunc(void) run_success_test(success_tests[i]); } + for (i = 0; i < ARRAY_SIZE(vpid_success_tests); i++) { + if (!test__start_subtest(vpid_success_tests[i])) + continue; + + run_vpid_success_test(vpid_success_tests[i]); + } + RUN_TESTS(task_kfunc_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c index c33c05161a9e..60f474d965a9 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c @@ -7,12 +7,20 @@ #include <pthread.h> #include <sys/syscall.h> /* For SYS_xxx definitions */ #include <sys/types.h> +#include <sys/eventfd.h> +#include <sys/mman.h> #include <test_progs.h> +#include <bpf/btf.h> #include "task_local_storage_helpers.h" #include "task_local_storage.skel.h" #include "task_local_storage_exit_creds.skel.h" #include "task_ls_recursion.skel.h" #include "task_storage_nodeadlock.skel.h" +#include "uptr_test_common.h" +#include "task_ls_uptr.skel.h" +#include "uptr_update_failure.skel.h" +#include "uptr_failure.skel.h" +#include "uptr_map_failure.skel.h" static void test_sys_enter_exit(void) { @@ -23,14 +31,14 @@ static void test_sys_enter_exit(void) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; - skel->bss->target_pid = syscall(SYS_gettid); + skel->bss->target_pid = sys_gettid(); err = task_local_storage__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; - syscall(SYS_gettid); - syscall(SYS_gettid); + sys_gettid(); + sys_gettid(); /* 3x syscalls: 1x attach and 2x gettid */ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt"); @@ -99,7 +107,7 @@ static void test_recursion(void) /* trigger sys_enter, make sure it does not cause deadlock */ skel->bss->test_pid = getpid(); - syscall(SYS_gettid); + sys_gettid(); skel->bss->test_pid = 0; task_ls_recursion__detach(skel); @@ -227,6 +235,259 @@ done: sched_setaffinity(getpid(), sizeof(old), &old); } +static struct user_data udata __attribute__((aligned(16))) = { + .a = 1, + .b = 2, +}; + +static struct user_data udata2 __attribute__((aligned(16))) = { + .a = 3, + .b = 4, +}; + +static void check_udata2(int expected) +{ + udata2.result = udata2.nested_result = 0; + usleep(1); + ASSERT_EQ(udata2.result, expected, "udata2.result"); + ASSERT_EQ(udata2.nested_result, expected, "udata2.nested_result"); +} + +static void test_uptr_basic(void) +{ + int map_fd, parent_task_fd, ev_fd; + struct value_type value = {}; + struct task_ls_uptr *skel; + pid_t child_pid, my_tid; + __u64 ev_dummy_data = 1; + int err; + + my_tid = sys_gettid(); + parent_task_fd = sys_pidfd_open(my_tid, 0); + if (!ASSERT_OK_FD(parent_task_fd, "parent_task_fd")) + return; + + ev_fd = eventfd(0, 0); + if (!ASSERT_OK_FD(ev_fd, "ev_fd")) { + close(parent_task_fd); + return; + } + + skel = task_ls_uptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + value.udata = &udata; + value.nested.udata = &udata; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "update_elem(udata)")) + goto out; + + err = task_ls_uptr__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + + child_pid = fork(); + if (!ASSERT_NEQ(child_pid, -1, "fork")) + goto out; + + /* Call syscall in the child process, but access the map value of + * the parent process in the BPF program to check if the user kptr + * is translated/mapped correctly. + */ + if (child_pid == 0) { + /* child */ + + /* Overwrite the user_data in the child process to check if + * the BPF program accesses the user_data of the parent. + */ + udata.a = 0; + udata.b = 0; + + /* Wait for the parent to set child_pid */ + read(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); + exit(0); + } + + skel->bss->parent_pid = my_tid; + skel->bss->target_pid = child_pid; + + write(ev_fd, &ev_dummy_data, sizeof(ev_dummy_data)); + + err = waitpid(child_pid, NULL, 0); + ASSERT_EQ(err, child_pid, "waitpid"); + ASSERT_EQ(udata.result, MAGIC_VALUE + udata.a + udata.b, "udata.result"); + ASSERT_EQ(udata.nested_result, MAGIC_VALUE + udata.a + udata.b, "udata.nested_result"); + + skel->bss->target_pid = my_tid; + + /* update_elem: uptr changes from udata1 to udata2 */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(MAGIC_VALUE + udata2.a + udata2.b); + + /* update_elem: uptr changes from udata2 uptr to NULL */ + memset(&value, 0, sizeof(value)); + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(0); + + /* update_elem: uptr changes from NULL to udata2 */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_EXIST); + if (!ASSERT_OK(err, "update_elem(udata2)")) + goto out; + check_udata2(MAGIC_VALUE + udata2.a + udata2.b); + + /* Check if user programs can access the value of user kptrs + * through bpf_map_lookup_elem(). Make sure the kernel value is not + * leaked. + */ + err = bpf_map_lookup_elem(map_fd, &parent_task_fd, &value); + if (!ASSERT_OK(err, "bpf_map_lookup_elem")) + goto out; + ASSERT_EQ(value.udata, NULL, "value.udata"); + ASSERT_EQ(value.nested.udata, NULL, "value.nested.udata"); + + /* delete_elem */ + err = bpf_map_delete_elem(map_fd, &parent_task_fd); + ASSERT_OK(err, "delete_elem(udata2)"); + check_udata2(0); + + /* update_elem: add uptr back to test map_free */ + value.udata = &udata2; + value.nested.udata = &udata2; + err = bpf_map_update_elem(map_fd, &parent_task_fd, &value, BPF_NOEXIST); + ASSERT_OK(err, "update_elem(udata2)"); + +out: + task_ls_uptr__destroy(skel); + close(ev_fd); + close(parent_task_fd); +} + +static void test_uptr_across_pages(void) +{ + int page_size = getpagesize(); + struct value_type value = {}; + struct task_ls_uptr *skel; + int err, task_fd, map_fd; + void *mem; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_OK_FD(task_fd, "task_fd")) + return; + + mem = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (!ASSERT_OK_PTR(mem, "mmap(page_size * 2)")) { + close(task_fd); + return; + } + + skel = task_ls_uptr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + value.udata = mem + page_size - offsetof(struct user_data, b); + err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); + if (!ASSERT_ERR(err, "update_elem(udata)")) + goto out; + ASSERT_EQ(errno, EOPNOTSUPP, "errno"); + + value.udata = mem + page_size - sizeof(struct user_data); + err = bpf_map_update_elem(map_fd, &task_fd, &value, 0); + ASSERT_OK(err, "update_elem(udata)"); + +out: + task_ls_uptr__destroy(skel); + close(task_fd); + munmap(mem, page_size * 2); +} + +static void test_uptr_update_failure(void) +{ + struct value_lock_type value = {}; + struct uptr_update_failure *skel; + int err, task_fd, map_fd; + + task_fd = sys_pidfd_open(getpid(), 0); + if (!ASSERT_OK_FD(task_fd, "task_fd")) + return; + + skel = uptr_update_failure__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + map_fd = bpf_map__fd(skel->maps.datamap); + + value.udata = &udata; + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_F_LOCK); + if (!ASSERT_ERR(err, "update_elem(udata, BPF_F_LOCK)")) + goto out; + ASSERT_EQ(errno, EOPNOTSUPP, "errno"); + + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_EXIST); + if (!ASSERT_ERR(err, "update_elem(udata, BPF_EXIST)")) + goto out; + ASSERT_EQ(errno, ENOENT, "errno"); + + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); + if (!ASSERT_OK(err, "update_elem(udata, BPF_NOEXIST)")) + goto out; + + value.udata = &udata2; + err = bpf_map_update_elem(map_fd, &task_fd, &value, BPF_NOEXIST); + if (!ASSERT_ERR(err, "update_elem(udata2, BPF_NOEXIST)")) + goto out; + ASSERT_EQ(errno, EEXIST, "errno"); + +out: + uptr_update_failure__destroy(skel); + close(task_fd); +} + +static void test_uptr_map_failure(const char *map_name, int expected_errno) +{ + LIBBPF_OPTS(bpf_map_create_opts, create_attr); + struct uptr_map_failure *skel; + struct bpf_map *map; + struct btf *btf; + int map_fd, err; + + skel = uptr_map_failure__open(); + if (!ASSERT_OK_PTR(skel, "uptr_map_failure__open")) + return; + + map = bpf_object__find_map_by_name(skel->obj, map_name); + btf = bpf_object__btf(skel->obj); + err = btf__load_into_kernel(btf); + if (!ASSERT_OK(err, "btf__load_into_kernel")) + goto done; + + create_attr.map_flags = bpf_map__map_flags(map); + create_attr.btf_fd = btf__fd(btf); + create_attr.btf_key_type_id = bpf_map__btf_key_type_id(map); + create_attr.btf_value_type_id = bpf_map__btf_value_type_id(map); + map_fd = bpf_map_create(bpf_map__type(map), map_name, + bpf_map__key_size(map), bpf_map__value_size(map), + 0, &create_attr); + if (ASSERT_ERR_FD(map_fd, "map_create")) + ASSERT_EQ(errno, expected_errno, "errno"); + else + close(map_fd); + +done: + uptr_map_failure__destroy(skel); +} + void test_task_local_storage(void) { if (test__start_subtest("sys_enter_exit")) @@ -237,4 +498,21 @@ void test_task_local_storage(void) test_recursion(); if (test__start_subtest("nodeadlock")) test_nodeadlock(); + if (test__start_subtest("uptr_basic")) + test_uptr_basic(); + if (test__start_subtest("uptr_across_pages")) + test_uptr_across_pages(); + if (test__start_subtest("uptr_update_failure")) + test_uptr_update_failure(); + if (test__start_subtest("uptr_map_failure_e2big")) { + if (getpagesize() == PAGE_SIZE) + test_uptr_map_failure("large_uptr_map", E2BIG); + else + test__skip(); + } + if (test__start_subtest("uptr_map_failure_size0")) + test_uptr_map_failure("empty_uptr_map", EINVAL); + if (test__start_subtest("uptr_map_failure_kstruct")) + test_uptr_map_failure("kstruct_uptr_map", EINVAL); + RUN_TESTS(uptr_failure); } diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c index b9135720024c..151a4210028f 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c @@ -14,7 +14,9 @@ #include "netlink_helpers.h" #include "tc_helpers.h" -#define ICMP_ECHO 8 +#define MARK 42 +#define PRIO 0xeb9f +#define ICMP_ECHO 8 struct icmphdr { __u8 type; @@ -33,7 +35,7 @@ struct iplink_req { }; static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, - bool same_netns) + bool same_netns, int scrub, int peer_scrub) { struct rtnl_handle rth = { .fd = -1 }; struct iplink_req req = {}; @@ -58,6 +60,8 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex, data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA); addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy); addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub); + addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub); addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode); addattr_nest_end(&req.n, data); addattr_nest_end(&req.n, linkinfo); @@ -118,9 +122,9 @@ static void destroy_netkit(void) static int __send_icmp(__u32 dest) { + int sock, ret, mark = MARK, prio = PRIO; struct sockaddr_in addr; struct icmphdr icmp; - int sock, ret; ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"); if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)")) @@ -135,6 +139,15 @@ static int __send_icmp(__u32 dest) if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)")) goto out; + ret = setsockopt(sock, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (!ASSERT_OK(ret, "setsockopt(SO_MARK)")) + goto out; + + ret = setsockopt(sock, SOL_SOCKET, SO_PRIORITY, + &prio, sizeof(prio)); + if (!ASSERT_OK(ret, "setsockopt(SO_PRIORITY)")) + goto out; + memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(dest); @@ -171,7 +184,8 @@ void serial_test_tc_netkit_basic(void) int err, ifindex; err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -285,7 +299,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -413,7 +428,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -527,7 +543,8 @@ void serial_test_tc_netkit_device(void) int err, ifindex, ifindex2; err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS, - &ifindex, true); + &ifindex, true, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -638,7 +655,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target) int err, ifindex; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, false); + &ifindex, false, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -715,7 +733,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode) struct bpf_link *link; err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS, - &ifindex, true); + &ifindex, true, NETKIT_SCRUB_DEFAULT, + NETKIT_SCRUB_DEFAULT); if (err) return; @@ -779,3 +798,60 @@ void serial_test_tc_netkit_pkt_type(void) serial_test_tc_netkit_pkt_type_mode(NETKIT_L2); serial_test_tc_netkit_pkt_type_mode(NETKIT_L3); } + +static void serial_test_tc_netkit_scrub_type(int scrub) +{ + LIBBPF_OPTS(bpf_netkit_opts, optl); + struct test_tc_link *skel; + struct bpf_link *link; + int err, ifindex; + + err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS, + &ifindex, false, scrub, scrub); + if (err) + return; + + skel = test_tc_link__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + goto cleanup; + + ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc8, + BPF_NETKIT_PRIMARY), 0, "tc8_attach_type"); + + err = test_tc_link__load(skel); + if (!ASSERT_OK(err, "skel_load")) + goto cleanup; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + ASSERT_EQ(skel->bss->seen_tc8, false, "seen_tc8"); + + link = bpf_program__attach_netkit(skel->progs.tc8, ifindex, &optl); + if (!ASSERT_OK_PTR(link, "link_attach")) + goto cleanup; + + skel->links.tc8 = link; + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + + tc_skel_reset_all_seen(skel); + ASSERT_EQ(send_icmp(), 0, "icmp_pkt"); + + ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8"); + ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark"); + ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio"); +cleanup: + test_tc_link__destroy(skel); + + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0); + assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0); + destroy_netkit(); +} + +void serial_test_tc_netkit_scrub(void) +{ + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT); + serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c new file mode 100644 index 000000000000..107b20d43e83 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_csum_diff.c @@ -0,0 +1,408 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates */ +#include <test_progs.h> +#include "csum_diff_test.skel.h" + +#define BUFF_SZ 512 + +struct testcase { + unsigned long long to_buff[BUFF_SZ / 8]; + unsigned int to_buff_len; + unsigned long long from_buff[BUFF_SZ / 8]; + unsigned int from_buff_len; + unsigned short seed; + unsigned short result; +}; + +#define NUM_PUSH_TESTS 4 + +struct testcase push_tests[NUM_PUSH_TESTS] = { + { + .to_buff = { + 0xdeadbeefdeadbeef, + }, + .to_buff_len = 8, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0, + .result = 0x3b3b + }, + { + .to_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .to_buff_len = 16, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0x1234, + .result = 0x88aa + }, + { + .to_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .to_buff_len = 15, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0x1234, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0xcaa9 +#else + .result = 0x87fd +#endif + }, + { + .to_buff = { + 0x327b23c66b8b4567, + 0x66334873643c9869, + 0x19495cff74b0dc51, + 0x625558ec2ae8944a, + 0x46e87ccd238e1f29, + 0x507ed7ab3d1b58ba, + 0x41b71efb2eb141f2, + 0x7545e14679e2a9e3, + 0x5bd062c2515f007c, + 0x4db127f812200854, + 0x1f16e9e80216231b, + 0x66ef438d1190cde7, + 0x3352255a140e0f76, + 0x0ded7263109cf92e, + 0x1befd79f7fdcc233, + 0x6b68079a41a7c4c9, + 0x25e45d324e6afb66, + 0x431bd7b7519b500d, + 0x7c83e4583f2dba31, + 0x62bbd95a257130a3, + 0x628c895d436c6125, + 0x721da317333ab105, + 0x2d1d5ae92443a858, + 0x75a2a8d46763845e, + 0x79838cb208edbdab, + 0x0b03e0c64353d0cd, + 0x54e49eb4189a769b, + 0x2ca8861171f32454, + 0x02901d820836c40e, + 0x081386413a95f874, + 0x7c3dbd3d1e7ff521, + 0x6ceaf087737b8ddc, + 0x4516dde922221a70, + 0x614fd4a13006c83e, + 0x5577f8e1419ac241, + 0x05072367440badfc, + 0x77465f013804823e, + 0x5c482a977724c67e, + 0x5e884adc2463b9ea, + 0x2d51779651ead36b, + 0x153ea438580bd78f, + 0x70a64e2a3855585c, + 0x2a487cb06a2342ec, + 0x725a06fb1d4ed43b, + 0x57e4ccaf2cd89a32, + 0x4b588f547a6d8d3c, + 0x6de91b18542289ec, + 0x7644a45c38437fdb, + 0x684a481a32fff902, + 0x749abb43579478fe, + 0x1ba026fa3dc240fb, + 0x75c6c33a79a1deaa, + 0x70c6a52912e685fb, + 0x374a3fe6520eedd1, + 0x23f9c13c4f4ef005, + 0x275ac794649bb77c, + 0x1cf10fd839386575, + 0x235ba861180115be, + 0x354fe9f947398c89, + 0x741226bb15b5af5c, + 0x10233c990d34b6a8, + 0x615740953f6ab60f, + 0x77ae35eb7e0c57b1, + 0x310c50b3579be4f1, + }, + .to_buff_len = 512, + .from_buff = {}, + .from_buff_len = 0, + .seed = 0xffff, + .result = 0xca45 + }, +}; + +#define NUM_PULL_TESTS 4 + +struct testcase pull_tests[NUM_PULL_TESTS] = { + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 8, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0, + .result = 0xc4c4 + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .from_buff_len = 16, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, + .result = 0x9bbd + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + 0xbeefdeadbeefdead, + }, + .from_buff_len = 15, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0x59be +#else + .result = 0x9c6a +#endif + }, + { + .from_buff = { + 0x327b23c66b8b4567, + 0x66334873643c9869, + 0x19495cff74b0dc51, + 0x625558ec2ae8944a, + 0x46e87ccd238e1f29, + 0x507ed7ab3d1b58ba, + 0x41b71efb2eb141f2, + 0x7545e14679e2a9e3, + 0x5bd062c2515f007c, + 0x4db127f812200854, + 0x1f16e9e80216231b, + 0x66ef438d1190cde7, + 0x3352255a140e0f76, + 0x0ded7263109cf92e, + 0x1befd79f7fdcc233, + 0x6b68079a41a7c4c9, + 0x25e45d324e6afb66, + 0x431bd7b7519b500d, + 0x7c83e4583f2dba31, + 0x62bbd95a257130a3, + 0x628c895d436c6125, + 0x721da317333ab105, + 0x2d1d5ae92443a858, + 0x75a2a8d46763845e, + 0x79838cb208edbdab, + 0x0b03e0c64353d0cd, + 0x54e49eb4189a769b, + 0x2ca8861171f32454, + 0x02901d820836c40e, + 0x081386413a95f874, + 0x7c3dbd3d1e7ff521, + 0x6ceaf087737b8ddc, + 0x4516dde922221a70, + 0x614fd4a13006c83e, + 0x5577f8e1419ac241, + 0x05072367440badfc, + 0x77465f013804823e, + 0x5c482a977724c67e, + 0x5e884adc2463b9ea, + 0x2d51779651ead36b, + 0x153ea438580bd78f, + 0x70a64e2a3855585c, + 0x2a487cb06a2342ec, + 0x725a06fb1d4ed43b, + 0x57e4ccaf2cd89a32, + 0x4b588f547a6d8d3c, + 0x6de91b18542289ec, + 0x7644a45c38437fdb, + 0x684a481a32fff902, + 0x749abb43579478fe, + 0x1ba026fa3dc240fb, + 0x75c6c33a79a1deaa, + 0x70c6a52912e685fb, + 0x374a3fe6520eedd1, + 0x23f9c13c4f4ef005, + 0x275ac794649bb77c, + 0x1cf10fd839386575, + 0x235ba861180115be, + 0x354fe9f947398c89, + 0x741226bb15b5af5c, + 0x10233c990d34b6a8, + 0x615740953f6ab60f, + 0x77ae35eb7e0c57b1, + 0x310c50b3579be4f1, + }, + .from_buff_len = 512, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0xffff, + .result = 0x35ba + }, +}; + +#define NUM_DIFF_TESTS 4 + +struct testcase diff_tests[NUM_DIFF_TESTS] = { + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 8, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 8, + .seed = 0, + .result = 0x7373 + }, + { + .from_buff = { + 0xdeadbeefdeadbeef, + }, + .from_buff_len = 7, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 7, + .seed = 0, +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + .result = 0xa673 +#else + .result = 0x73b7 +#endif + }, + { + .from_buff = { + 0, + }, + .from_buff_len = 8, + .to_buff = { + 0xabababababababab, + }, + .to_buff_len = 8, + .seed = 0, + .result = 0xaeae + }, + { + .from_buff = { + 0xdeadbeefdeadbeef + }, + .from_buff_len = 8, + .to_buff = { + 0, + }, + .to_buff_len = 8, + .seed = 0xffff, + .result = 0xc4c4 + }, +}; + +#define NUM_EDGE_TESTS 4 + +struct testcase edge_tests[NUM_EDGE_TESTS] = { + { + .from_buff = {}, + .from_buff_len = 0, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0, + .result = 0 + }, + { + .from_buff = { + 0x1234 + }, + .from_buff_len = 0, + .to_buff = { + 0x1234 + }, + .to_buff_len = 0, + .seed = 0, + .result = 0 + }, + { + .from_buff = {}, + .from_buff_len = 0, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0x1234, + .result = 0x1234 + }, + { + .from_buff = {}, + .from_buff_len = 512, + .to_buff = {}, + .to_buff_len = 0, + .seed = 0xffff, + .result = 0xffff + }, +}; + +static unsigned short trigger_csum_diff(const struct csum_diff_test *skel) +{ + u8 tmp_out[64 << 2] = {}; + u8 tmp_in[64] = {}; + int err; + int pfd; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = tmp_in, + .data_size_in = sizeof(tmp_in), + .data_out = tmp_out, + .data_size_out = sizeof(tmp_out), + .repeat = 1, + ); + pfd = bpf_program__fd(skel->progs.compute_checksum); + err = bpf_prog_test_run_opts(pfd, &topts); + if (err) + return -1; + + return skel->bss->result; +} + +static void test_csum_diff(struct testcase *tests, int num_tests) +{ + struct csum_diff_test *skel; + unsigned short got; + int err; + + for (int i = 0; i < num_tests; i++) { + skel = csum_diff_test__open(); + if (!ASSERT_OK_PTR(skel, "csum_diff_test open")) + return; + + skel->rodata->to_buff_len = tests[i].to_buff_len; + skel->rodata->from_buff_len = tests[i].from_buff_len; + + err = csum_diff_test__load(skel); + if (!ASSERT_EQ(err, 0, "csum_diff_test load")) + goto out; + + memcpy(skel->bss->to_buff, tests[i].to_buff, tests[i].to_buff_len); + memcpy(skel->bss->from_buff, tests[i].from_buff, tests[i].from_buff_len); + skel->bss->seed = tests[i].seed; + + got = trigger_csum_diff(skel); + ASSERT_EQ(got, tests[i].result, "csum_diff result"); + + csum_diff_test__destroy(skel); + } + + return; +out: + csum_diff_test__destroy(skel); +} + +void test_test_csum_diff(void) +{ + if (test__start_subtest("csum_diff_push")) + test_csum_diff(push_tests, NUM_PUSH_TESTS); + if (test__start_subtest("csum_diff_pull")) + test_csum_diff(pull_tests, NUM_PULL_TESTS); + if (test__start_subtest("csum_diff_diff")) + test_csum_diff(diff_tests, NUM_DIFF_TESTS); + if (test__start_subtest("csum_diff_edge")) + test_csum_diff(edge_tests, NUM_EDGE_TESTS); +} diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c index 871d16cb95cf..1a2f99596916 100644 --- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c +++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c @@ -5,6 +5,7 @@ #include <test_progs.h> #include <pthread.h> #include <network_helpers.h> +#include <sys/sysinfo.h> #include "timer_lockup.skel.h" @@ -52,6 +53,11 @@ void test_timer_lockup(void) pthread_t thrds[2]; void *ret; + if (get_nprocs() < 2) { + test__skip(); + return; + } + skel = timer_lockup__open_and_load(); if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index fe86e4fdb89c..c3ab9b6fb069 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -828,8 +828,12 @@ static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) return validate_struct_ops_load(mnt_fd, true /* should succeed */); } +static const char *token_bpffs_custom_dir() +{ + return getenv("BPF_SELFTESTS_BPF_TOKEN_DIR") ?: "/tmp/bpf-token-fs"; +} + #define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" -#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) { @@ -892,6 +896,7 @@ static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) { + const char *custom_dir = token_bpffs_custom_dir(); LIBBPF_OPTS(bpf_object_open_opts, opts); struct dummy_st_ops_success *skel; int err; @@ -909,10 +914,10 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l * BPF token implicitly, unless pointed to it through * LIBBPF_BPF_TOKEN_PATH envvar */ - rmdir(TOKEN_BPFFS_CUSTOM); - if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + rmdir(custom_dir); + if (!ASSERT_OK(mkdir(custom_dir, 0777), "mkdir_bpffs_custom")) goto err_out; - err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + err = sys_move_mount(mnt_fd, "", AT_FDCWD, custom_dir, MOVE_MOUNT_F_EMPTY_PATH); if (!ASSERT_OK(err, "move_mount_bpffs")) goto err_out; @@ -925,7 +930,7 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l goto err_out; } - err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + err = setenv(TOKEN_ENVVAR, custom_dir, 1 /*overwrite*/); if (!ASSERT_OK(err, "setenv_token_path")) goto err_out; @@ -951,11 +956,11 @@ static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *l if (!ASSERT_ERR(err, "obj_empty_token_path_load")) goto err_out; - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return 0; err_out: - rmdir(TOKEN_BPFFS_CUSTOM); + rmdir(custom_dir); unsetenv(TOKEN_ENVVAR); return -EINVAL; } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index 844f6fc8487b..2ee17ef1dae2 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -8,6 +8,11 @@ #include "uprobe_multi_usdt.skel.h" #include "uprobe_multi_consumers.skel.h" #include "uprobe_multi_pid_filter.skel.h" +#include "uprobe_multi_session.skel.h" +#include "uprobe_multi_session_single.skel.h" +#include "uprobe_multi_session_cookie.skel.h" +#include "uprobe_multi_session_recursive.skel.h" +#include "uprobe_multi_verifier.skel.h" #include "bpf/libbpf_internal.h" #include "testing_helpers.h" #include "../sdt.h" @@ -34,6 +39,12 @@ noinline void usdt_trigger(void) STAP_PROBE(test, pid_filter_usdt); } +noinline void uprobe_session_recursive(int i) +{ + if (i) + uprobe_session_recursive(i - 1); +} + struct child { int go[2]; int c2p[2]; /* child -> parent channel */ @@ -125,7 +136,7 @@ static void *child_thread(void *ctx) struct child *child = ctx; int c = 0, err; - child->tid = syscall(SYS_gettid); + child->tid = sys_gettid(); /* let parent know we are ready */ err = write(child->c2p[1], &c, 1); @@ -778,7 +789,7 @@ get_link(struct uprobe_multi_consumers *skel, int link) } } -static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx) +static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx, unsigned long offset) { struct bpf_program *prog = get_program(skel, idx); struct bpf_link **link = get_link(skel, idx); @@ -787,15 +798,19 @@ static int uprobe_attach(struct uprobe_multi_consumers *skel, int idx) if (!prog || !link) return -1; + opts.offsets = &offset; + opts.cnt = 1; + /* - * bit/prog: 0,1 uprobe entry - * bit/prog: 2,3 uprobe return + * bit/prog: 0 uprobe entry + * bit/prog: 1 uprobe return + * bit/prog: 2 uprobe session without return + * bit/prog: 3 uprobe session with return */ - opts.retprobe = idx == 2 || idx == 3; + opts.retprobe = idx == 1; + opts.session = idx == 2 || idx == 3; - *link = bpf_program__attach_uprobe_multi(prog, 0, "/proc/self/exe", - "uprobe_consumer_test", - &opts); + *link = bpf_program__attach_uprobe_multi(prog, 0, "/proc/self/exe", NULL, &opts); if (!ASSERT_OK_PTR(*link, "bpf_program__attach_uprobe_multi")) return -1; return 0; @@ -816,7 +831,8 @@ static bool test_bit(int bit, unsigned long val) noinline int uprobe_consumer_test(struct uprobe_multi_consumers *skel, - unsigned long before, unsigned long after) + unsigned long before, unsigned long after, + unsigned long offset) { int idx; @@ -829,89 +845,174 @@ uprobe_consumer_test(struct uprobe_multi_consumers *skel, /* ... and attach all new programs in 'after' state */ for (idx = 0; idx < 4; idx++) { if (!test_bit(idx, before) && test_bit(idx, after)) { - if (!ASSERT_OK(uprobe_attach(skel, idx), "uprobe_attach_after")) + if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_after")) return -1; } } return 0; } -static void consumer_test(struct uprobe_multi_consumers *skel, - unsigned long before, unsigned long after) +/* + * We generate 16 consumer_testX functions that will have uprobe installed on + * and will be called in separate threads. All function pointer are stored in + * "consumers" section and each thread will pick one function based on index. + */ + +extern const void *__start_consumers; + +#define __CONSUMER_TEST(func) \ +noinline int func(struct uprobe_multi_consumers *skel, unsigned long before, \ + unsigned long after, unsigned long offset) \ +{ \ + return uprobe_consumer_test(skel, before, after, offset); \ +} \ +void *__ ## func __used __attribute__((section("consumers"))) = (void *) func; + +#define CONSUMER_TEST(func) __CONSUMER_TEST(func) + +#define C1 CONSUMER_TEST(__PASTE(consumer_test, __COUNTER__)) +#define C4 C1 C1 C1 C1 +#define C16 C4 C4 C4 C4 + +C16 + +typedef int (*test_t)(struct uprobe_multi_consumers *, unsigned long, + unsigned long, unsigned long); + +static int consumer_test(struct uprobe_multi_consumers *skel, + unsigned long before, unsigned long after, + test_t test, unsigned long offset) { - int err, idx; + int err, idx, ret = -1; printf("consumer_test before %lu after %lu\n", before, after); /* 'before' is each, we attach uprobe for every set idx */ for (idx = 0; idx < 4; idx++) { if (test_bit(idx, before)) { - if (!ASSERT_OK(uprobe_attach(skel, idx), "uprobe_attach_before")) + if (!ASSERT_OK(uprobe_attach(skel, idx, offset), "uprobe_attach_before")) goto cleanup; } } - err = uprobe_consumer_test(skel, before, after); + err = test(skel, before, after, offset); if (!ASSERT_EQ(err, 0, "uprobe_consumer_test")) goto cleanup; for (idx = 0; idx < 4; idx++) { + bool uret_stays, uret_survives; const char *fmt = "BUG"; __u64 val = 0; - if (idx < 2) { + switch (idx) { + case 0: /* * uprobe entry * +1 if define in 'before' */ if (test_bit(idx, before)) val++; - fmt = "prog 0/1: uprobe"; - } else { + fmt = "prog 0: uprobe"; + break; + case 1: /* - * uprobe return is tricky ;-) - * - * to trigger uretprobe consumer, the uretprobe needs to be installed, - * which means one of the 'return' uprobes was alive when probe was hit: - * - * idxs: 2/3 uprobe return in 'installed' mask - * - * in addition if 'after' state removes everything that was installed in - * 'before' state, then uprobe kernel object goes away and return uprobe - * is not installed and we won't hit it even if it's in 'after' state. + * To trigger uretprobe consumer, the uretprobe under test either stayed from + * before to after (uret_stays + test_bit) or uretprobe instance survived and + * we have uretprobe active in after (uret_survives + test_bit) */ - unsigned long had_uretprobes = before & 0b1100; /* is uretprobe installed */ - unsigned long probe_preserved = before & after; /* did uprobe go away */ + uret_stays = before & after & 0b0110; + uret_survives = ((before & 0b0110) && (after & 0b0110) && (before & 0b1001)); - if (had_uretprobes && probe_preserved && test_bit(idx, after)) + if ((uret_stays || uret_survives) && test_bit(idx, after)) val++; - fmt = "idx 2/3: uretprobe"; + fmt = "prog 1: uretprobe"; + break; + case 2: + /* + * session with return + * +1 if defined in 'before' + * +1 if defined in 'after' + */ + if (test_bit(idx, before)) { + val++; + if (test_bit(idx, after)) + val++; + } + fmt = "prog 2: session with return"; + break; + case 3: + /* + * session without return + * +1 if defined in 'before' + */ + if (test_bit(idx, before)) + val++; + fmt = "prog 3: session with NO return"; + break; } - ASSERT_EQ(skel->bss->uprobe_result[idx], val, fmt); + if (!ASSERT_EQ(skel->bss->uprobe_result[idx], val, fmt)) + goto cleanup; skel->bss->uprobe_result[idx] = 0; } + ret = 0; + cleanup: for (idx = 0; idx < 4; idx++) uprobe_detach(skel, idx); + return ret; } -static void test_consumers(void) +#define CONSUMER_MAX 16 + +/* + * Each thread runs 1/16 of the load by running test for single + * 'before' number (based on thread index) and full scale of + * 'after' numbers. + */ +static void *consumer_thread(void *arg) { + unsigned long idx = (unsigned long) arg; struct uprobe_multi_consumers *skel; - int before, after; + unsigned long offset; + const void *func; + int after; skel = uprobe_multi_consumers__open_and_load(); if (!ASSERT_OK_PTR(skel, "uprobe_multi_consumers__open_and_load")) - return; + return NULL; + + func = *((&__start_consumers) + idx); + + offset = get_uprobe_offset(func); + if (!ASSERT_GE(offset, 0, "uprobe_offset")) + goto out; + + for (after = 0; after < CONSUMER_MAX; after++) + if (consumer_test(skel, idx, after, func, offset)) + goto out; + +out: + uprobe_multi_consumers__destroy(skel); + return NULL; +} + + +static void test_consumers(void) +{ + pthread_t pt[CONSUMER_MAX]; + unsigned long idx; + int err; /* * The idea of this test is to try all possible combinations of * uprobes consumers attached on single function. * - * - 2 uprobe entry consumer - * - 2 uprobe exit consumers + * - 1 uprobe entry consumer + * - 1 uprobe exit consumer + * - 1 uprobe session with return + * - 1 uprobe session without return * * The test uses 4 uprobes attached on single function, but that * translates into single uprobe with 4 consumers in kernel. @@ -919,37 +1020,38 @@ static void test_consumers(void) * The before/after values present the state of attached consumers * before and after the probed function: * - * bit/prog 0,1 : uprobe entry - * bit/prog 2,3 : uprobe return + * bit/prog 0 : uprobe entry + * bit/prog 1 : uprobe return * * For example for: * - * before = 0b0101 - * after = 0b0110 + * before = 0b01 + * after = 0b10 * * it means that before we call 'uprobe_consumer_test' we attach * uprobes defined in 'before' value: * - * - bit/prog 0: uprobe entry - * - bit/prog 2: uprobe return + * - bit/prog 1: uprobe entry * * uprobe_consumer_test is called and inside it we attach and detach * uprobes based on 'after' value: * - * - bit/prog 0: stays untouched - * - bit/prog 2: uprobe return is detached + * - bit/prog 0: is detached + * - bit/prog 1: is attached * * uprobe_consumer_test returns and we check counters values increased * by bpf programs on each uprobe to match the expected count based on * before/after bits. */ - for (before = 0; before < 16; before++) { - for (after = 0; after < 16; after++) - consumer_test(skel, before, after); + for (idx = 0; idx < CONSUMER_MAX; idx++) { + err = pthread_create(&pt[idx], NULL, consumer_thread, (void *) idx); + if (!ASSERT_OK(err, "pthread_create")) + break; } - uprobe_multi_consumers__destroy(skel); + while (idx) + pthread_join(pt[--idx], NULL); } static struct bpf_program *uprobe_multi_program(struct uprobe_multi_pid_filter *skel, int idx) @@ -1016,6 +1118,156 @@ static void test_pid_filter_process(bool clone_vm) uprobe_multi_pid_filter__destroy(skel); } +static void test_session_skel_api(void) +{ + struct uprobe_multi_session *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + struct bpf_link *link = NULL; + int err; + + skel = uprobe_multi_session__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + skel->bss->user_ptr = test_data; + + err = uprobe_multi_session__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session__attach")) + goto cleanup; + + /* trigger all probes */ + skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1; + skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2; + skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3; + + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + /* + * We expect 2 for uprobe_multi_func_2 because it runs both entry/return probe, + * uprobe_multi_func_[13] run just the entry probe. All expected numbers are + * doubled, because we run extra test for sleepable session. + */ + ASSERT_EQ(skel->bss->uprobe_session_result[0], 2, "uprobe_multi_func_1_result"); + ASSERT_EQ(skel->bss->uprobe_session_result[1], 4, "uprobe_multi_func_2_result"); + ASSERT_EQ(skel->bss->uprobe_session_result[2], 2, "uprobe_multi_func_3_result"); + + /* We expect increase in 3 entry and 1 return session calls -> 4 */ + ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 4, "uprobe_multi_sleep_result"); + +cleanup: + bpf_link__destroy(link); + uprobe_multi_session__destroy(skel); +} + +static void test_session_single_skel_api(void) +{ + struct uprobe_multi_session_single *skel = NULL; + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); + int err; + + skel = uprobe_multi_session_single__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_single__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_single__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_single__attach")) + goto cleanup; + + uprobe_multi_func_1(); + + /* + * We expect consumer 0 and 2 to trigger just entry handler (value 1) + * and consumer 1 to hit both (value 2). + */ + ASSERT_EQ(skel->bss->uprobe_session_result[0], 1, "uprobe_session_result_0"); + ASSERT_EQ(skel->bss->uprobe_session_result[1], 2, "uprobe_session_result_1"); + ASSERT_EQ(skel->bss->uprobe_session_result[2], 1, "uprobe_session_result_2"); + +cleanup: + uprobe_multi_session_single__destroy(skel); +} + +static void test_session_cookie_skel_api(void) +{ + struct uprobe_multi_session_cookie *skel = NULL; + int err; + + skel = uprobe_multi_session_cookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_cookie__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_cookie__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_cookie__attach")) + goto cleanup; + + /* trigger all probes */ + uprobe_multi_func_1(); + uprobe_multi_func_2(); + uprobe_multi_func_3(); + + ASSERT_EQ(skel->bss->test_uprobe_1_result, 1, "test_uprobe_1_result"); + ASSERT_EQ(skel->bss->test_uprobe_2_result, 2, "test_uprobe_2_result"); + ASSERT_EQ(skel->bss->test_uprobe_3_result, 3, "test_uprobe_3_result"); + +cleanup: + uprobe_multi_session_cookie__destroy(skel); +} + +static void test_session_recursive_skel_api(void) +{ + struct uprobe_multi_session_recursive *skel = NULL; + int i, err; + + skel = uprobe_multi_session_recursive__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi_session_recursive__open_and_load")) + goto cleanup; + + skel->bss->pid = getpid(); + + err = uprobe_multi_session_recursive__attach(skel); + if (!ASSERT_OK(err, "uprobe_multi_session_recursive__attach")) + goto cleanup; + + for (i = 0; i < ARRAY_SIZE(skel->bss->test_uprobe_cookie_entry); i++) + skel->bss->test_uprobe_cookie_entry[i] = i + 1; + + uprobe_session_recursive(5); + + /* + * entry uprobe: + * uprobe_session_recursive(5) { *cookie = 1, return 0 + * uprobe_session_recursive(4) { *cookie = 2, return 1 + * uprobe_session_recursive(3) { *cookie = 3, return 0 + * uprobe_session_recursive(2) { *cookie = 4, return 1 + * uprobe_session_recursive(1) { *cookie = 5, return 0 + * uprobe_session_recursive(0) { *cookie = 6, return 1 + * return uprobe: + * } i = 0 not executed + * } i = 1 test_uprobe_cookie_return[0] = 5 + * } i = 2 not executed + * } i = 3 test_uprobe_cookie_return[1] = 3 + * } i = 4 not executed + * } i = 5 test_uprobe_cookie_return[2] = 1 + */ + + ASSERT_EQ(skel->bss->idx_entry, 6, "idx_entry"); + ASSERT_EQ(skel->bss->idx_return, 3, "idx_return"); + + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[0], 5, "test_uprobe_cookie_return[0]"); + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[1], 3, "test_uprobe_cookie_return[1]"); + ASSERT_EQ(skel->bss->test_uprobe_cookie_return[2], 1, "test_uprobe_cookie_return[2]"); + +cleanup: + uprobe_multi_session_recursive__destroy(skel); +} + static void test_bench_attach_uprobe(void) { long attach_start_ns = 0, attach_end_ns = 0; @@ -1112,4 +1364,13 @@ void test_uprobe_multi_test(void) test_pid_filter_process(false); if (test__start_subtest("filter_clone_vm")) test_pid_filter_process(true); + if (test__start_subtest("session")) + test_session_skel_api(); + if (test__start_subtest("session_single")) + test_session_single_skel_api(); + if (test__start_subtest("session_cookie")) + test_session_cookie_skel_api(); + if (test__start_subtest("session_cookie_recursive")) + test_session_recursive_skel_api(); + RUN_TESTS(uprobe_multi_verifier); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index e26b5150fc43..d9f65adb456b 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -44,6 +44,7 @@ #include "verifier_ld_ind.skel.h" #include "verifier_ldsx.skel.h" #include "verifier_leak_ptr.skel.h" +#include "verifier_linked_scalars.skel.h" #include "verifier_loops1.skel.h" #include "verifier_lwt.skel.h" #include "verifier_map_in_map.skel.h" @@ -53,12 +54,14 @@ #include "verifier_masking.skel.h" #include "verifier_meta_access.skel.h" #include "verifier_movsx.skel.h" +#include "verifier_mtu.skel.h" #include "verifier_netfilter_ctx.skel.h" #include "verifier_netfilter_retcode.skel.h" #include "verifier_bpf_fastcall.skel.h" #include "verifier_or_jmp32_k.skel.h" #include "verifier_precision.skel.h" #include "verifier_prevent_map_lookup.skel.h" +#include "verifier_private_stack.skel.h" #include "verifier_raw_stack.skel.h" #include "verifier_raw_tp_writable.skel.h" #include "verifier_reg_equal.skel.h" @@ -170,6 +173,7 @@ void test_verifier_jit_convergence(void) { RUN(verifier_jit_convergence); } void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); } void test_verifier_ldsx(void) { RUN(verifier_ldsx); } void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); } +void test_verifier_linked_scalars(void) { RUN(verifier_linked_scalars); } void test_verifier_loops1(void) { RUN(verifier_loops1); } void test_verifier_lwt(void) { RUN(verifier_lwt); } void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); } @@ -185,6 +189,7 @@ void test_verifier_bpf_fastcall(void) { RUN(verifier_bpf_fastcall); } void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); } void test_verifier_precision(void) { RUN(verifier_precision); } void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); } +void test_verifier_private_stack(void) { RUN(verifier_private_stack); } void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); } void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); } void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); } @@ -221,6 +226,24 @@ void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_pack void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); } void test_verifier_lsm(void) { RUN(verifier_lsm); } +void test_verifier_mtu(void) +{ + __u64 caps = 0; + int ret; + + /* In case CAP_BPF and CAP_PERFMON is not set */ + ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps); + if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin")) + return; + ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL); + if (!ASSERT_OK(ret, "disable_cap_sys_admin")) + goto restore_cap; + RUN(verifier_mtu); +restore_cap: + if (caps) + cap_enable_effective(caps, NULL); +} + static int init_test_val_map(struct bpf_object *obj, char *map_name) { struct test_val value = { diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c index 481626a875d1..c7f74f068e78 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c @@ -2,35 +2,41 @@ #include <uapi/linux/bpf.h> #include <linux/if_link.h> #include <test_progs.h> +#include <network_helpers.h> #include "test_xdp_with_cpumap_frags_helpers.skel.h" #include "test_xdp_with_cpumap_helpers.skel.h" #define IFINDEX_LO 1 +#define TEST_NS "cpu_attach_ns" static void test_xdp_with_cpumap_helpers(void) { - struct test_xdp_with_cpumap_helpers *skel; + struct test_xdp_with_cpumap_helpers *skel = NULL; struct bpf_prog_info info = {}; __u32 len = sizeof(info); struct bpf_cpumap_val val = { .qsize = 192, }; - int err, prog_fd, map_fd; + int err, prog_fd, prog_redir_fd, map_fd; + struct nstoken *nstoken = NULL; __u32 idx = 0; + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + SYS(out_close, "ip link set dev lo up"); + skel = test_xdp_with_cpumap_helpers__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load")) return; - prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); + prog_redir_fd = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(IFINDEX_LO, prog_redir_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP")) goto out_close; - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); - ASSERT_OK(err, "XDP program detach"); - prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm); map_fd = bpf_map__fd(skel->maps.cpu_map); err = bpf_prog_get_info_by_fd(prog_fd, &info, &len); @@ -45,6 +51,26 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_OK(err, "Read cpumap entry"); ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id"); + /* send a packet to trigger any potential bugs in there */ + char data[10] = {}; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(prog_redir_fd, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed, then check that redirect has been + * performed + */ + kern_sync_rcu(); + ASSERT_NEQ(skel->bss->redirect_count, 0, "redirected packets"); + + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + /* can not attach BPF_XDP_CPUMAP program to a device */ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program")) @@ -65,6 +91,8 @@ static void test_xdp_with_cpumap_helpers(void) ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry"); out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); test_xdp_with_cpumap_helpers__destroy(skel); } @@ -111,7 +139,7 @@ out_close: test_xdp_with_cpumap_frags_helpers__destroy(skel); } -void serial_test_xdp_cpumap_attach(void) +void test_xdp_cpumap_attach(void) { if (test__start_subtest("CPUMAP with programs in entries")) test_xdp_with_cpumap_helpers(); diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c index ce6812558287..27ffed17d4be 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c @@ -1,6 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 +#include <arpa/inet.h> #include <uapi/linux/bpf.h> #include <linux/if_link.h> +#include <network_helpers.h> +#include <net/if.h> #include <test_progs.h> #include "test_xdp_devmap_helpers.skel.h" @@ -8,31 +11,36 @@ #include "test_xdp_with_devmap_helpers.skel.h" #define IFINDEX_LO 1 +#define TEST_NS "devmap_attach_ns" static void test_xdp_with_devmap_helpers(void) { - struct test_xdp_with_devmap_helpers *skel; + struct test_xdp_with_devmap_helpers *skel = NULL; struct bpf_prog_info info = {}; struct bpf_devmap_val val = { .ifindex = IFINDEX_LO, }; __u32 len = sizeof(info); - int err, dm_fd, map_fd; + int err, dm_fd, dm_fd_redir, map_fd; + struct nstoken *nstoken = NULL; + char data[10] = {}; __u32 idx = 0; + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + SYS(out_close, "ip link set dev lo up"); skel = test_xdp_with_devmap_helpers__open_and_load(); if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) - return; + goto out_close; - dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog); - err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); + dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(IFINDEX_LO, dm_fd_redir, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap")) goto out_close; - err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); - ASSERT_OK(err, "XDP program detach"); - dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); map_fd = bpf_map__fd(skel->maps.dm_ports); err = bpf_prog_get_info_by_fd(dm_fd, &info, &len); @@ -47,6 +55,22 @@ static void test_xdp_with_devmap_helpers(void) ASSERT_OK(err, "Read devmap entry"); ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); + /* send a packet to trigger any potential bugs in there */ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(dm_fd_redir, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed */ + kern_sync_rcu(); + + err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + /* can not attach BPF_XDP_DEVMAP program to a device */ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL); if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program")) @@ -67,6 +91,8 @@ static void test_xdp_with_devmap_helpers(void) ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry"); out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); test_xdp_with_devmap_helpers__destroy(skel); } @@ -124,6 +150,86 @@ out_close: test_xdp_with_devmap_frags_helpers__destroy(skel); } +static void test_xdp_with_devmap_helpers_veth(void) +{ + struct test_xdp_with_devmap_helpers *skel = NULL; + struct bpf_prog_info info = {}; + struct bpf_devmap_val val = {}; + struct nstoken *nstoken = NULL; + __u32 len = sizeof(info); + int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst; + char data[10] = {}; + __u32 idx = 0; + + SYS(out_close, "ip netns add %s", TEST_NS); + nstoken = open_netns(TEST_NS); + if (!ASSERT_OK_PTR(nstoken, "open_netns")) + goto out_close; + + SYS(out_close, "ip link add veth_src type veth peer name veth_dst"); + SYS(out_close, "ip link set dev veth_src up"); + SYS(out_close, "ip link set dev veth_dst up"); + + val.ifindex = if_nametoindex("veth_src"); + ifindex_dst = if_nametoindex("veth_dst"); + if (!ASSERT_NEQ(val.ifindex, 0, "val.ifindex") || + !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst")) + goto out_close; + + skel = test_xdp_with_devmap_helpers__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load")) + goto out_close; + + dm_fd_redir = bpf_program__fd(skel->progs.xdp_redir_prog); + err = bpf_xdp_attach(val.ifindex, dm_fd_redir, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "Attach of program with 8-byte devmap")) + goto out_close; + + dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm); + map_fd = bpf_map__fd(skel->maps.dm_ports); + err = bpf_prog_get_info_by_fd(dm_fd, &info, &len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto out_close; + + val.bpf_prog.fd = dm_fd; + err = bpf_map_update_elem(map_fd, &idx, &val, 0); + ASSERT_OK(err, "Add program to devmap entry"); + + err = bpf_map_lookup_elem(map_fd, &idx, &val); + ASSERT_OK(err, "Read devmap entry"); + ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id"); + + /* attach dummy to other side to enable reception */ + dm_fd = bpf_program__fd(skel->progs.xdp_dummy_prog); + err = bpf_xdp_attach(ifindex_dst, dm_fd, XDP_FLAGS_DRV_MODE, NULL); + if (!ASSERT_OK(err, "Attach of dummy XDP")) + goto out_close; + + /* send a packet to trigger any potential bugs in there */ + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &data, + .data_size_in = 10, + .flags = BPF_F_TEST_XDP_LIVE_FRAMES, + .repeat = 1, + ); + err = bpf_prog_test_run_opts(dm_fd_redir, &opts); + ASSERT_OK(err, "XDP test run"); + + /* wait for the packets to be flushed */ + kern_sync_rcu(); + + err = bpf_xdp_detach(val.ifindex, XDP_FLAGS_DRV_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + + err = bpf_xdp_detach(ifindex_dst, XDP_FLAGS_DRV_MODE, NULL); + ASSERT_OK(err, "XDP program detach"); + +out_close: + close_netns(nstoken); + SYS_NOFAIL("ip netns del %s", TEST_NS); + test_xdp_with_devmap_helpers__destroy(skel); +} + void serial_test_xdp_devmap_attach(void) { if (test__start_subtest("DEVMAP with programs in entries")) @@ -134,4 +240,7 @@ void serial_test_xdp_devmap_attach(void) if (test__start_subtest("Verifier check of DEVMAP programs")) test_neg_xdp_devmap_helpers(); + + if (test__start_subtest("DEVMAP with programs in entries on veth")) + test_xdp_with_devmap_helpers_veth(); } diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h deleted file mode 100644 index c41ee80533ca..000000000000 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ /dev/null @@ -1,167 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used -#define bpf_iter__netlink bpf_iter__netlink___not_used -#define bpf_iter__task bpf_iter__task___not_used -#define bpf_iter__task_file bpf_iter__task_file___not_used -#define bpf_iter__task_vma bpf_iter__task_vma___not_used -#define bpf_iter__tcp bpf_iter__tcp___not_used -#define tcp6_sock tcp6_sock___not_used -#define bpf_iter__udp bpf_iter__udp___not_used -#define udp6_sock udp6_sock___not_used -#define bpf_iter__unix bpf_iter__unix___not_used -#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used -#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used -#define bpf_iter__sockmap bpf_iter__sockmap___not_used -#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used -#define bpf_iter__cgroup bpf_iter__cgroup___not_used -#define btf_ptr btf_ptr___not_used -#define BTF_F_COMPACT BTF_F_COMPACT___not_used -#define BTF_F_NONAME BTF_F_NONAME___not_used -#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used -#define BTF_F_ZERO BTF_F_ZERO___not_used -#define bpf_iter__ksym bpf_iter__ksym___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map -#undef bpf_iter__ipv6_route -#undef bpf_iter__netlink -#undef bpf_iter__task -#undef bpf_iter__task_file -#undef bpf_iter__task_vma -#undef bpf_iter__tcp -#undef tcp6_sock -#undef bpf_iter__udp -#undef udp6_sock -#undef bpf_iter__unix -#undef bpf_iter__bpf_map_elem -#undef bpf_iter__bpf_sk_storage_map -#undef bpf_iter__sockmap -#undef bpf_iter__bpf_link -#undef bpf_iter__cgroup -#undef btf_ptr -#undef BTF_F_COMPACT -#undef BTF_F_NONAME -#undef BTF_F_PTR_RAW -#undef BTF_F_ZERO -#undef bpf_iter__ksym - -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__ipv6_route { - struct bpf_iter_meta *meta; - struct fib6_info *rt; -} __attribute__((preserve_access_index)); - -struct bpf_iter__netlink { - struct bpf_iter_meta *meta; - struct netlink_sock *sk; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_file { - struct bpf_iter_meta *meta; - struct task_struct *task; - __u32 fd; - struct file *file; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_vma { - struct bpf_iter_meta *meta; - struct task_struct *task; - struct vm_area_struct *vma; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - -struct bpf_iter__tcp { - struct bpf_iter_meta *meta; - struct sock_common *sk_common; - uid_t uid; -} __attribute__((preserve_access_index)); - -struct tcp6_sock { - struct tcp_sock tcp; - struct ipv6_pinfo inet6; -} __attribute__((preserve_access_index)); - -struct bpf_iter__udp { - struct bpf_iter_meta *meta; - struct udp_sock *udp_sk; - uid_t uid __attribute__((aligned(8))); - int bucket __attribute__((aligned(8))); -} __attribute__((preserve_access_index)); - -struct udp6_sock { - struct udp_sock udp; - struct ipv6_pinfo inet6; -} __attribute__((preserve_access_index)); - -struct bpf_iter__unix { - struct bpf_iter_meta *meta; - struct unix_sock *unix_sk; - uid_t uid; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map_elem { - struct bpf_iter_meta *meta; - struct bpf_map *map; - void *key; - void *value; -}; - -struct bpf_iter__bpf_sk_storage_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; - struct sock *sk; - void *value; -}; - -struct bpf_iter__sockmap { - struct bpf_iter_meta *meta; - struct bpf_map *map; - void *key; - struct sock *sk; -}; - -struct bpf_iter__bpf_link { - struct bpf_iter_meta *meta; - struct bpf_link *link; -}; - -struct bpf_iter__cgroup { - struct bpf_iter_meta *meta; - struct cgroup *cgroup; -} __attribute__((preserve_access_index)); - -struct btf_ptr { - void *ptr; - __u32 type_id; - __u32 flags; -}; - -enum { - BTF_F_COMPACT = (1ULL << 0), - BTF_F_NONAME = (1ULL << 1), - BTF_F_PTR_RAW = (1ULL << 2), - BTF_F_ZERO = (1ULL << 3), -}; - -struct bpf_iter__ksym { - struct bpf_iter_meta *meta; - struct kallsym_iter *ksym; -}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c index 564835ba7d51..19710cc0f250 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index d7a69217fb68..f47da665f7e0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c index e1af2f8f75a6..7b69e1887705 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Red Hat, Inc. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index 6c39e86b666f..c868ffb8080f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c index 9f0e0705b2bf..9fdea8cd4c6f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c index 5014a17d6c02..aa529f76c7fc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c index 6cecab2b32ba..e88dab196e0f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Google LLC. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c index c7b8e006b171..eb9642923e1c 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index 784a610ce039..73a5cf3ba3d3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c index 521267818f4d..3e725b1fce37 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022, Oracle and/or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index a28e51e2dcee..00b2ceae81fb 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c index ec7f91850dec..774d4dbe8189 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2021 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c index eafc877ea460..d92631ec6161 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright Amazon.com Inc. or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <limits.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c index f3af0e30cead..317fe49760cc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Cloudflare */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c index bca8b889cb10..ef2f7c8d9373 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020, Oracle and/or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index b0255080662d..959a8d899eaf 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c index 442f4ca39fd7..f5a309455490 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c index 423b39e60b6f..d64ba7ddaed5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c index 6cbb3393f243..bc10c4e4b4fa 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c index 92267abb462f..d22449c69363 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c index 943f7bba180e..8b072666f9d9 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c index 2a4647f20c46..6b17e7e86a48 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index dbf61c44acac..56177508798f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c index e3a7575e81d2..9d8b7310d2c2 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c index 1c7304f56b1e..b150bd468824 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h index d5e3df66ad9a..6a4c50497c5e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c index cf0c485b1ed7..ffbd4b116d17 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c index 5031e21c433f..47ff7754f4fd 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c index e6aefae38894..fea275df9e22 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright Amazon.com Inc. or its affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c index ee7455d2623a..174298e122d3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c index de03997322a7..f30841997a8d 100644 --- a/tools/testing/selftests/bpf/progs/cgroup_iter.c +++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Google */ - -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 5e282c16eadc..a2de95f85648 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */ - -#include "bpf_iter.h" +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index b979e91f55f0..4ece7873ba60 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -7,6 +7,11 @@ #include "errno.h" #include <stdbool.h> +/* Should use BTF_FIELDS_MAX, but it is not always available in vmlinux.h, + * so use the hard-coded number as a workaround. + */ +#define CPUMASK_KPTR_FIELDS_MAX 11 + int err; #define private(name) SEC(".bss." #name) __attribute__((aligned(8))) diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c index a988d2823b52..b40b52548ffb 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_failure.c +++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c @@ -10,6 +10,21 @@ char _license[] SEC("license") = "GPL"; +struct kptr_nested_array_2 { + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_array_1 { + /* Make btf_parse_fields() in map_create() return -E2BIG */ + struct kptr_nested_array_2 d_2[CPUMASK_KPTR_FIELDS_MAX + 1]; +}; + +struct kptr_nested_array { + struct kptr_nested_array_1 d_1; +}; + +private(MASK_NESTED) static struct kptr_nested_array global_mask_nested_arr; + /* Prototype for all of the program trace events below: * * TRACE_EVENT(task_newtask, @@ -187,3 +202,23 @@ int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 c return 0; } + +SEC("tp_btf/task_newtask") +__failure __msg("has no valid kptr") +int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local, *prev; + + local = create_cpumask(); + if (!local) + return 0; + + prev = bpf_kptr_xchg(&global_mask_nested_arr.d_1.d_2[CPUMASK_KPTR_FIELDS_MAX].mask, local); + if (prev) { + bpf_cpumask_release(prev); + err = 3; + return 0; + } + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index fd8106831c32..80ee469b0b60 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -31,11 +31,59 @@ struct kptr_nested_deep { struct kptr_nested_pair ptr_pairs[3]; }; +struct kptr_nested_deep_array_1_2 { + int dummy; + struct bpf_cpumask __kptr * mask[CPUMASK_KPTR_FIELDS_MAX]; +}; + +struct kptr_nested_deep_array_1_1 { + int dummy; + struct kptr_nested_deep_array_1_2 d_2; +}; + +struct kptr_nested_deep_array_1 { + long dummy; + struct kptr_nested_deep_array_1_1 d_1; +}; + +struct kptr_nested_deep_array_2_2 { + long dummy[2]; + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_deep_array_2_1 { + int dummy; + struct kptr_nested_deep_array_2_2 d_2[CPUMASK_KPTR_FIELDS_MAX]; +}; + +struct kptr_nested_deep_array_2 { + long dummy; + struct kptr_nested_deep_array_2_1 d_1; +}; + +struct kptr_nested_deep_array_3_2 { + long dummy[2]; + struct bpf_cpumask __kptr * mask; +}; + +struct kptr_nested_deep_array_3_1 { + int dummy; + struct kptr_nested_deep_array_3_2 d_2; +}; + +struct kptr_nested_deep_array_3 { + long dummy; + struct kptr_nested_deep_array_3_1 d_1[CPUMASK_KPTR_FIELDS_MAX]; +}; + private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2]; private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1]; private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1]; private(MASK) static struct kptr_nested global_mask_nested[2]; private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep; +private(MASK_1) static struct kptr_nested_deep_array_1 global_mask_nested_deep_array_1; +private(MASK_2) static struct kptr_nested_deep_array_2 global_mask_nested_deep_array_2; +private(MASK_3) static struct kptr_nested_deep_array_3 global_mask_nested_deep_array_3; static bool is_test_task(void) { @@ -543,12 +591,21 @@ static int _global_mask_array_rcu(struct bpf_cpumask **mask0, goto err_exit; } - /* [<mask 0>, NULL] */ - if (!*mask0 || *mask1) { + /* [<mask 0>, *] */ + if (!*mask0) { err = 2; goto err_exit; } + if (!mask1) + goto err_exit; + + /* [*, NULL] */ + if (*mask1) { + err = 3; + goto err_exit; + } + local = create_cpumask(); if (!local) { err = 9; @@ -632,6 +689,23 @@ int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clo } SEC("tp_btf/task_newtask") +int BPF_PROG(test_global_mask_nested_deep_array_rcu, struct task_struct *task, u64 clone_flags) +{ + int i; + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_1.d_1.d_2.mask[i], NULL); + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_2.d_1.d_2[i].mask, NULL); + + for (i = 0; i < CPUMASK_KPTR_FIELDS_MAX; i++) + _global_mask_array_rcu(&global_mask_nested_deep_array_3.d_1[i].d_2.mask, NULL); + + return 0; +} + +SEC("tp_btf/task_newtask") int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) { struct bpf_cpumask *local; diff --git a/tools/testing/selftests/bpf/progs/csum_diff_test.c b/tools/testing/selftests/bpf/progs/csum_diff_test.c new file mode 100644 index 000000000000..9438f1773a58 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/csum_diff_test.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates */ +#include <linux/types.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define BUFF_SZ 512 + +/* Will be updated by benchmark before program loading */ +char to_buff[BUFF_SZ]; +const volatile unsigned int to_buff_len = 0; +char from_buff[BUFF_SZ]; +const volatile unsigned int from_buff_len = 0; +unsigned short seed = 0; + +short result; + +char _license[] SEC("license") = "GPL"; + +SEC("tc") +int compute_checksum(void *ctx) +{ + int to_len_half = to_buff_len / 2; + int from_len_half = from_buff_len / 2; + short result2; + + /* Calculate checksum in one go */ + result2 = bpf_csum_diff((void *)from_buff, from_buff_len, + (void *)to_buff, to_buff_len, seed); + + /* Calculate checksum by concatenating bpf_csum_diff()*/ + result = bpf_csum_diff((void *)from_buff, from_buff_len - from_len_half, + (void *)to_buff, to_buff_len - to_len_half, seed); + + result = bpf_csum_diff((void *)from_buff + (from_buff_len - from_len_half), from_len_half, + (void *)to_buff + (to_buff_len - to_len_half), to_len_half, result); + + result = (result == result2) ? result : 0; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 9cceb6521143..fe0f3fa5aab6 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx) } SEC("?tc") -__failure __msg("bpf_rcu_read_unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") int reject_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); @@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("bpf_rcu_read_unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region") int reject_subprog_with_rcu_read_lock(void *ctx) { bpf_rcu_read_lock(); diff --git a/tools/testing/selftests/bpf/progs/kfunc_module_order.c b/tools/testing/selftests/bpf/progs/kfunc_module_order.c new file mode 100644 index 000000000000..76003d04c95f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kfunc_module_order.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +extern int bpf_test_modorder_retx(void) __ksym; +extern int bpf_test_modorder_rety(void) __ksym; + +SEC("classifier") +int call_kfunc_xy(struct __sk_buff *skb) +{ + int ret1, ret2; + + ret1 = bpf_test_modorder_retx(); + ret2 = bpf_test_modorder_rety(); + + return ret1 == 'x' && ret2 == 'y' ? 0 : -1; +} + +SEC("classifier") +int call_kfunc_yx(struct __sk_buff *skb) +{ + int ret1, ret2; + + ret1 = bpf_test_modorder_rety(); + ret2 = bpf_test_modorder_retx(); + + return ret1 == 'y' && ret2 == 'x' ? 0 : -1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/kmem_cache_iter.c b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c new file mode 100644 index 000000000000..b9c8f9457492 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kmem_cache_iter.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Google */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +#define SLAB_NAME_MAX 32 + +struct kmem_cache_result { + char name[SLAB_NAME_MAX]; + long obj_size; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(void *)); + __uint(value_size, SLAB_NAME_MAX); + __uint(max_entries, 1); +} slab_hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(struct kmem_cache_result)); + __uint(max_entries, 1024); +} slab_result SEC(".maps"); + +extern struct kmem_cache *bpf_get_kmem_cache(u64 addr) __ksym; + +/* Result, will be checked by userspace */ +int task_struct_found; +int kmem_cache_seen; +int open_coded_seen; + +SEC("iter/kmem_cache") +int slab_info_collector(struct bpf_iter__kmem_cache *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct kmem_cache *s = ctx->s; + struct kmem_cache_result *r; + int idx; + + if (s) { + /* To make sure if the slab_iter implements the seq interface + * properly and it's also useful for debugging. + */ + BPF_SEQ_PRINTF(seq, "%s: %u\n", s->name, s->size); + + idx = kmem_cache_seen; + r = bpf_map_lookup_elem(&slab_result, &idx); + if (r == NULL) + return 0; + + kmem_cache_seen++; + + /* Save name and size to match /proc/slabinfo */ + bpf_probe_read_kernel_str(r->name, sizeof(r->name), s->name); + r->obj_size = s->size; + + if (!bpf_strncmp(r->name, 11, "task_struct")) + bpf_map_update_elem(&slab_hash, &s, r->name, BPF_NOEXIST); + } + + return 0; +} + +SEC("raw_tp/bpf_test_finish") +int BPF_PROG(check_task_struct) +{ + u64 curr = bpf_get_current_task(); + struct kmem_cache *s; + char *name; + + s = bpf_get_kmem_cache(curr); + if (s == NULL) { + task_struct_found = -1; + return 0; + } + name = bpf_map_lookup_elem(&slab_hash, &s); + if (name && !bpf_strncmp(name, 11, "task_struct")) + task_struct_found = 1; + else + task_struct_found = -2; + return 0; +} + +SEC("syscall") +int open_coded_iter(const void *ctx) +{ + struct kmem_cache *s; + + bpf_for_each(kmem_cache, s) { + struct kmem_cache_result *r; + + r = bpf_map_lookup_elem(&slab_result, &open_coded_seen); + if (!r) + break; + + if (r->obj_size != s->size) + break; + + open_coded_seen++; + } + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c new file mode 100644 index 000000000000..288577e81deb --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kprobe_multi_verifier.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + + +SEC("kprobe.session") +__success +int kprobe_session_return_0(struct pt_regs *ctx) +{ + return 0; +} + +SEC("kprobe.session") +__success +int kprobe_session_return_1(struct pt_regs *ctx) +{ + return 1; +} + +SEC("kprobe.session") +__failure +__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int kprobe_session_return_2(struct pt_regs *ctx) +{ + return 2; +} diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c index cc79dddac182..049a1f78de3f 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs1.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c @@ -63,6 +63,8 @@ extern int set_output_val2(int x); /* here we'll force set_output_ctx2() to be __hidden in the final obj file */ __hidden extern void set_output_ctx2(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler1, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler1, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen1(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c index 942cc5526ddf..96850759fd8d 100644 --- a/tools/testing/selftests/bpf/progs/linked_funcs2.c +++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c @@ -63,6 +63,8 @@ extern int set_output_val1(int x); /* here we'll force set_output_ctx1() to be __hidden in the final obj file */ __hidden extern void set_output_ctx1(__u64 *ctx); +void *bpf_cast_to_kern_ctx(void *obj) __ksym; + SEC("?raw_tp/sys_enter") int BPF_PROG(handler2, struct pt_regs *regs, long id) { @@ -86,4 +88,10 @@ int BPF_PROG(handler2, struct pt_regs *regs, long id) return 0; } +/* Generate BTF FUNC record and test linking with duplicate extern functions */ +void kfunc_gen2(void) +{ + bpf_cast_to_kern_ctx(0); +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mptcp_bpf.h b/tools/testing/selftests/bpf/progs/mptcp_bpf.h new file mode 100644 index 000000000000..3b188ccdcc40 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_bpf.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __MPTCP_BPF_H__ +#define __MPTCP_BPF_H__ + +#include "bpf_experimental.h" + +/* list helpers from include/linux/list.h */ +static inline int list_is_head(const struct list_head *list, + const struct list_head *head) +{ + return list == head; +} + +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +#define list_entry_is_head(pos, head, member) \ + list_is_head(&pos->member, (head)) + +/* small difference: 'can_loop' has been added in the conditions */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member); \ + !list_entry_is_head(pos, head, member) && can_loop; \ + pos = list_next_entry(pos, member)) + +/* mptcp helpers from protocol.h */ +#define mptcp_for_each_subflow(__msk, __subflow) \ + list_for_each_entry(__subflow, &((__msk)->conn_list), node) + +static __always_inline struct sock * +mptcp_subflow_tcp_sock(const struct mptcp_subflow_context *subflow) +{ + return subflow->tcp_sock; +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c new file mode 100644 index 000000000000..70302477e326 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Tessares SA. */ +/* Copyright (c) 2024, Kylin Software */ + +/* vmlinux.h, bpf_helpers.h and other 'define' */ +#include "bpf_tracing_net.h" +#include "mptcp_bpf.h" + +char _license[] SEC("license") = "GPL"; + +char cc[TCP_CA_NAME_MAX] = "reno"; +int pid; + +/* Associate a subflow counter to each token */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} mptcp_sf SEC(".maps"); + +SEC("sockops") +int mptcp_subflow(struct bpf_sock_ops *skops) +{ + __u32 init = 1, key, mark, *cnt; + struct mptcp_sock *msk; + struct bpf_sock *sk; + int err; + + if (skops->op != BPF_SOCK_OPS_TCP_CONNECT_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + msk = bpf_skc_to_mptcp_sock(sk); + if (!msk) + return 1; + + key = msk->token; + cnt = bpf_map_lookup_elem(&mptcp_sf, &key); + if (cnt) { + /* A new subflow is added to an existing MPTCP connection */ + __sync_fetch_and_add(cnt, 1); + mark = *cnt; + } else { + /* A new MPTCP connection is just initiated and this is its primary subflow */ + bpf_map_update_elem(&mptcp_sf, &key, &init, BPF_ANY); + mark = init; + } + + /* Set the mark of the subflow's socket based on appearance order */ + err = bpf_setsockopt(skops, SOL_SOCKET, SO_MARK, &mark, sizeof(mark)); + if (err < 0) + return 1; + if (mark == 2) + err = bpf_setsockopt(skops, SOL_TCP, TCP_CONGESTION, cc, TCP_CA_NAME_MAX); + + return 1; +} + +static int _check_getsockopt_subflow_mark(struct mptcp_sock *msk, struct bpf_sockopt *ctx) +{ + struct mptcp_subflow_context *subflow; + int i = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, + struct mptcp_subflow_context)); + + if (ssk->sk_mark != ++i) { + ctx->retval = -2; + break; + } + } + + return 1; +} + +static int _check_getsockopt_subflow_cc(struct mptcp_sock *msk, struct bpf_sockopt *ctx) +{ + struct mptcp_subflow_context *subflow; + + mptcp_for_each_subflow(msk, subflow) { + struct inet_connection_sock *icsk; + struct sock *ssk; + + ssk = mptcp_subflow_tcp_sock(bpf_core_cast(subflow, + struct mptcp_subflow_context)); + icsk = bpf_core_cast(ssk, struct inet_connection_sock); + + if (ssk->sk_mark == 2 && + __builtin_memcmp(icsk->icsk_ca_ops->name, cc, TCP_CA_NAME_MAX)) { + ctx->retval = -2; + break; + } + } + + return 1; +} + +SEC("cgroup/getsockopt") +int _getsockopt_subflow(struct bpf_sockopt *ctx) +{ + struct bpf_sock *sk = ctx->sk; + struct mptcp_sock *msk; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + if (!sk || sk->protocol != IPPROTO_MPTCP || + (!(ctx->level == SOL_SOCKET && ctx->optname == SO_MARK) && + !(ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION))) + return 1; + + msk = bpf_core_cast(sk, struct mptcp_sock); + if (msk->pm.subflows != 1) { + ctx->retval = -1; + return 1; + } + + if (ctx->optname == SO_MARK) + return _check_getsockopt_subflow_mark(msk, ctx); + return _check_getsockopt_subflow_cc(msk, ctx); +} diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c index aeff3a4f9287..c6edf8dbefeb 100644 --- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c +++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c @@ -27,6 +27,8 @@ struct { __type(value, __u64); } sock_map SEC(".maps"); +int tcx_init_netns_cookie, tcx_netns_cookie; + SEC("sockops") int get_netns_cookie_sockops(struct bpf_sock_ops *ctx) { @@ -81,4 +83,12 @@ int get_netns_cookie_sk_msg(struct sk_msg_md *msg) return 1; } +SEC("tcx/ingress") +int get_netns_cookie_tcx(struct __sk_buff *skb) +{ + tcx_init_netns_cookie = bpf_get_netns_cookie(NULL); + tcx_netns_cookie = bpf_get_netns_cookie(skb); + return TCX_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c index 672fc368d9c4..885377e83607 100644 --- a/tools/testing/selftests/bpf/progs/preempt_lock.c +++ b/tools/testing/selftests/bpf/progs/preempt_lock.c @@ -6,7 +6,7 @@ #include "bpf_experimental.h" SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -14,7 +14,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("2 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -23,7 +23,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("3 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -33,7 +33,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx) { bpf_preempt_disable(); @@ -55,7 +55,7 @@ static __noinline void preempt_enable(void) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -63,7 +63,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("2 bpf_preempt_enable(s) are missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) { preempt_disable(); @@ -72,7 +72,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx) } SEC("?tc") -__failure __msg("1 bpf_preempt_enable is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region") int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx) { preempt_disable(); diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c new file mode 100644 index 000000000000..457f34c151e3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int tid; +int i; + +SEC("tp_btf/bpf_testmod_test_raw_tp_null") +int BPF_PROG(test_raw_tp_null, struct sk_buff *skb) +{ + struct task_struct *task = bpf_get_current_task_btf(); + + if (task->pid != tid) + return 0; + + i = i + skb->mark + 1; + /* The compiler may move the NULL check before this deref, which causes + * the load to fail as deref of scalar. Prevent that by using a barrier. + */ + barrier(); + /* If dead code elimination kicks in, the increment below will + * be removed. For raw_tp programs, we mark input arguments as + * PTR_MAYBE_NULL, so branch prediction should never kick in. + */ + if (!skb) + i += 2; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c index 56b787a89876..d7fdcabe7d90 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c @@ -6,5 +6,17 @@ char _license[] SEC("license") = "GPL"; +/* + * This subprogram validates that libbpf handles the situation in which BPF + * object has subprograms in .text section, but has no entry BPF programs. + * At some point that was causing issues due to legacy logic of treating such + * subprogram as entry program (with unknown program type, which would fail). + */ +int dangling_subprog(void) +{ + /* do nothing, just be here */ + return 0; +} + SEC(".struct_ops.link") struct bpf_testmod_ops testmod_do_detach; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c new file mode 100644 index 000000000000..8ea57e5348ab --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 200 bytes */ + int a[50] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c new file mode 100644 index 000000000000..1f55ec4cee37 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_2(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[10] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 200 bytes */ + int b[50] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 100 bytes */ + int a[25] = {}; + + a[10] = 1; + val_i = subprog1(a); + bpf_testmod_ops3_call_test_2(); + return 0; +} + +SEC("struct_ops") +int BPF_PROG(test_2) +{ + /* stack size 400 bytes */ + int a[100] = {}; + + a[10] = 3; + val_j = subprog1(a); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c new file mode 100644 index 000000000000..f2f300d50988 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +#if defined(__TARGET_ARCH_x86) +bool skip __attribute((__section__(".data"))) = false; +#else +bool skip = true; +#endif + +void bpf_testmod_ops3_call_test_1(void) __ksym; + +int val_i, val_j; + +__noinline static int subprog2(int *a, int *b) +{ + return val_i + a[1] + b[20]; +} + +__noinline static int subprog1(int *a) +{ + /* stack size 400 bytes */ + int b[100] = {}; + + b[20] = 2; + return subprog2(a, b); +} + + +SEC("struct_ops") +int BPF_PROG(test_1) +{ + /* stack size 20 bytes */ + int a[5] = {}; + + a[1] = 1; + val_j += subprog1(a); + bpf_testmod_ops3_call_test_1(); + return 0; +} + +SEC(".struct_ops") +struct bpf_testmod_ops3 testmod_1 = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/tailcall_fail.c b/tools/testing/selftests/bpf/progs/tailcall_fail.c new file mode 100644 index 000000000000..bc77921d2bb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_fail.c @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +extern void bpf_rcu_read_lock(void) __ksym; +extern void bpf_rcu_read_unlock(void) __ksym; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) + +private(A) struct bpf_spin_lock lock; + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +SEC("?tc") +__failure __msg("function calls are not allowed while holding a lock") +int reject_tail_call_spin_lock(struct __sk_buff *ctx) +{ + bpf_spin_lock(&lock); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call cannot be used inside bpf_rcu_read_lock-ed region") +int reject_tail_call_rcu_lock(struct __sk_buff *ctx) +{ + bpf_rcu_read_lock(); + bpf_tail_call_static(ctx, &jmp_table, 0); + bpf_rcu_read_unlock(); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call cannot be used inside bpf_preempt_disable-ed region") +int reject_tail_call_preempt_lock(struct __sk_buff *ctx) +{ + bpf_guard_preempt(); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +SEC("?tc") +__failure __msg("tail_call would lead to reference leak") +int reject_tail_call_ref(struct __sk_buff *ctx) +{ + struct foo { int i; } *p; + + p = bpf_obj_new(typeof(*p)); + bpf_tail_call_static(ctx, &jmp_table, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h index 6720c4b5be41..e9c4fea7a4bb 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h +++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h @@ -23,6 +23,7 @@ struct { struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym; void bpf_task_release(struct task_struct *p) __ksym; struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +struct task_struct *bpf_task_from_vpid(s32 vpid) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c index ad88a3796ddf..4c07ea193f72 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c @@ -247,6 +247,20 @@ int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 cl return 0; } +SEC("tp_btf/task_newtask") +__failure __msg("Possibly NULL pointer passed to trusted arg0") +int BPF_PROG(task_kfunc_from_vpid_no_null_check, struct task_struct *task, u64 clone_flags) +{ + struct task_struct *acquired; + + acquired = bpf_task_from_vpid(task->pid); + + /* Releasing bpf_task_from_vpid() lookup without a NULL check. */ + bpf_task_release(acquired); + + return 0; +} + SEC("lsm/task_free") __failure __msg("R1 must be a rcu pointer") int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task) diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c index a55149015063..5fb4fc19d26a 100644 --- a/tools/testing/selftests/bpf/progs/task_kfunc_success.c +++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c @@ -366,3 +366,54 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl return 0; } + +SEC("syscall") +int test_task_from_vpid_current(const void *ctx) +{ + struct task_struct *current, *v_task; + + v_task = bpf_task_from_vpid(1); + if (!v_task) { + err = 1; + return 0; + } + + current = bpf_get_current_task_btf(); + + /* The current process should be the init process (pid 1) in the new pid namespace. */ + if (current != v_task) + err = 2; + + bpf_task_release(v_task); + return 0; +} + +SEC("syscall") +int test_task_from_vpid_invalid(const void *ctx) +{ + struct task_struct *v_task; + + v_task = bpf_task_from_vpid(-1); + if (v_task) { + err = 1; + goto err; + } + + /* There should be only one process (current process) in the new pid namespace. */ + v_task = bpf_task_from_vpid(2); + if (v_task) { + err = 2; + goto err; + } + + v_task = bpf_task_from_vpid(9999); + if (v_task) { + err = 3; + goto err; + } + + return 0; +err: + bpf_task_release(v_task); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/task_ls_uptr.c b/tools/testing/selftests/bpf/progs/task_ls_uptr.c new file mode 100644 index 000000000000..ddbe11b46eef --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_ls_uptr.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct task_struct *bpf_task_from_pid(s32 pid) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_type); +} datamap SEC(".maps"); + +pid_t target_pid = 0; +pid_t parent_pid = 0; + +SEC("tp_btf/sys_enter") +int on_enter(__u64 *ctx) +{ + struct task_struct *task, *data_task; + struct value_type *ptr; + struct user_data *udata; + struct cgroup *cgrp; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + data_task = bpf_task_from_pid(parent_pid); + if (!data_task) + return 0; + + ptr = bpf_task_storage_get(&datamap, data_task, 0, 0); + bpf_task_release(data_task); + if (!ptr) + return 0; + + cgrp = bpf_kptr_xchg(&ptr->cgrp, NULL); + if (cgrp) { + int lvl = cgrp->level; + + bpf_cgroup_release(cgrp); + return lvl; + } + + udata = ptr->udata; + if (!udata || udata->result) + return 0; + udata->result = MAGIC_VALUE + udata->a + udata->b; + + udata = ptr->nested.udata; + if (udata && !udata->nested_result) + udata->nested_result = udata->result; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c index 8a0632c37839..d1a57f7d09bd 100644 --- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c @@ -5,10 +5,11 @@ #include "bpf_misc.h" __noinline -int subprog(struct __sk_buff *skb) +int subprog_tc(struct __sk_buff *skb) { int ret = 1; + __sink(skb); __sink(ret); return ret; } @@ -16,7 +17,7 @@ int subprog(struct __sk_buff *skb) SEC("tc") int entry_tc(struct __sk_buff *skb) { - return subprog(skb); + return subprog_tc(skb); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c index f0759efff6ef..1cd1a1b72cb5 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -10,16 +10,18 @@ #endif struct sockaddr_in6 srv_sa6 = {}; +struct sockaddr_in srv_sa4 = {}; __u16 listen_tp_sport = 0; __u16 req_sk_sport = 0; __u32 recv_cookie = 0; __u32 gen_cookie = 0; +__u32 mss = 0; __u32 linum = 0; #define LOG() ({ if (!linum) linum = __LINE__; }) -static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, - struct tcp_sock *tp, +static void test_syncookie_helper(void *iphdr, int iphdr_size, + struct tcphdr *th, struct tcp_sock *tp, struct __sk_buff *skb) { if (th->syn) { @@ -38,17 +40,18 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, return; } - mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h), + mss_cookie = bpf_tcp_gen_syncookie(tp, iphdr, iphdr_size, th, 40); if (mss_cookie < 0) { if (mss_cookie != -ENOENT) LOG(); } else { gen_cookie = (__u32)mss_cookie; + mss = mss_cookie >> 32; } } else if (gen_cookie) { /* It was in cookie mode */ - int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h), + int ret = bpf_tcp_check_syncookie(tp, iphdr, iphdr_size, th, sizeof(*th)); if (ret < 0) { @@ -60,26 +63,58 @@ static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, } } -static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) +static int handle_ip_tcp(struct ethhdr *eth, struct __sk_buff *skb) { - struct bpf_sock_tuple *tuple; + struct bpf_sock_tuple *tuple = NULL; + unsigned int tuple_len = 0; struct bpf_sock *bpf_skc; - unsigned int tuple_len; + void *data_end, *iphdr; + struct ipv6hdr *ip6h; + struct iphdr *ip4h; struct tcphdr *th; - void *data_end; + int iphdr_size; data_end = (void *)(long)(skb->data_end); - th = (struct tcphdr *)(ip6h + 1); - if (th + 1 > data_end) - return TC_ACT_OK; - - /* Is it the testing traffic? */ - if (th->dest != srv_sa6.sin6_port) + switch (eth->h_proto) { + case bpf_htons(ETH_P_IP): + ip4h = (struct iphdr *)(eth + 1); + if (ip4h + 1 > data_end) + return TC_ACT_OK; + if (ip4h->protocol != IPPROTO_TCP) + return TC_ACT_OK; + th = (struct tcphdr *)(ip4h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + /* Is it the testing traffic? */ + if (th->dest != srv_sa4.sin_port) + return TC_ACT_OK; + tuple_len = sizeof(tuple->ipv4); + tuple = (struct bpf_sock_tuple *)&ip4h->saddr; + iphdr = ip4h; + iphdr_size = sizeof(*ip4h); + break; + case bpf_htons(ETH_P_IPV6): + ip6h = (struct ipv6hdr *)(eth + 1); + if (ip6h + 1 > data_end) + return TC_ACT_OK; + if (ip6h->nexthdr != IPPROTO_TCP) + return TC_ACT_OK; + th = (struct tcphdr *)(ip6h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + /* Is it the testing traffic? */ + if (th->dest != srv_sa6.sin6_port) + return TC_ACT_OK; + tuple_len = sizeof(tuple->ipv6); + tuple = (struct bpf_sock_tuple *)&ip6h->saddr; + iphdr = ip6h; + iphdr_size = sizeof(*ip6h); + break; + default: return TC_ACT_OK; + } - tuple_len = sizeof(tuple->ipv6); - tuple = (struct bpf_sock_tuple *)&ip6h->saddr; if ((void *)tuple + tuple_len > data_end) { LOG(); return TC_ACT_OK; @@ -126,7 +161,7 @@ static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num; - test_syncookie_helper(ip6h, th, tp, skb); + test_syncookie_helper(iphdr, iphdr_size, th, tp, skb); bpf_sk_release(tp); return TC_ACT_OK; } @@ -142,7 +177,6 @@ release: SEC("tc") int cls_ingress(struct __sk_buff *skb) { - struct ipv6hdr *ip6h; struct ethhdr *eth; void *data_end; @@ -152,17 +186,11 @@ int cls_ingress(struct __sk_buff *skb) if (eth + 1 > data_end) return TC_ACT_OK; - if (eth->h_proto != bpf_htons(ETH_P_IPV6)) - return TC_ACT_OK; - - ip6h = (struct ipv6hdr *)(eth + 1); - if (ip6h + 1 > data_end) + if (eth->h_proto != bpf_htons(ETH_P_IP) && + eth->h_proto != bpf_htons(ETH_P_IPV6)) return TC_ACT_OK; - if (ip6h->nexthdr == IPPROTO_TCP) - return handle_ip6_tcp(ip6h, skb); - - return TC_ACT_OK; + return handle_ip_tcp(eth, skb); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c index 92354cd72044..176a355e3062 100644 --- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c +++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c @@ -1,27 +1,50 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <linux/bpf.h> +#include <vmlinux.h> #include <linux/version.h> #include <bpf/bpf_helpers.h> -__u32 sig = 0, pid = 0, status = 0, signal_thread = 0; +struct task_struct *bpf_task_from_pid(int pid) __ksym; +void bpf_task_release(struct task_struct *p) __ksym; +int bpf_send_signal_task(struct task_struct *task, int sig, enum pid_type type, u64 value) __ksym; + +__u32 sig = 0, pid = 0, status = 0, signal_thread = 0, target_pid = 0; static __always_inline int bpf_send_signal_test(void *ctx) { + struct task_struct *target_task = NULL; int ret; + u64 value; if (status != 0 || pid == 0) return 0; if ((bpf_get_current_pid_tgid() >> 32) == pid) { - if (signal_thread) - ret = bpf_send_signal_thread(sig); - else - ret = bpf_send_signal(sig); + if (target_pid) { + target_task = bpf_task_from_pid(target_pid); + if (!target_task) + return 0; + value = 8; + } + + if (signal_thread) { + if (target_pid) + ret = bpf_send_signal_task(target_task, sig, PIDTYPE_PID, value); + else + ret = bpf_send_signal_thread(sig); + } else { + if (target_pid) + ret = bpf_send_signal_task(target_task, sig, PIDTYPE_TGID, value); + else + ret = bpf_send_signal(sig); + } if (ret == 0) status = 1; } + if (target_task) + bpf_task_release(target_task); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c index 43f40c4fe241..1c8b678e2e9a 100644 --- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c +++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c @@ -28,8 +28,8 @@ struct { }, }; -SEC(".data.A") struct bpf_spin_lock lockA; -SEC(".data.B") struct bpf_spin_lock lockB; +static struct bpf_spin_lock lockA SEC(".data.A"); +static struct bpf_spin_lock lockB SEC(".data.B"); SEC("?tc") int lock_id_kptr_preserve(void *ctx) diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c index ab3eae3d6af8..10d825928499 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_link.c +++ b/tools/testing/selftests/bpf/progs/test_tc_link.c @@ -18,6 +18,7 @@ bool seen_tc4; bool seen_tc5; bool seen_tc6; bool seen_tc7; +bool seen_tc8; bool set_type; @@ -25,6 +26,8 @@ bool seen_eth; bool seen_host; bool seen_mcast; +int mark, prio; + SEC("tc/ingress") int tc1(struct __sk_buff *skb) { @@ -100,3 +103,12 @@ out: seen_tc7 = true; return TCX_PASS; } + +SEC("tc/egress") +int tc8(struct __sk_buff *skb) +{ + seen_tc8 = true; + mark = skb->mark; + prio = skb->priority; + return TCX_PASS; +} diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c deleted file mode 100644 index 6edebce563b5..000000000000 --- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook -// Copyright (c) 2019 Cloudflare - -#include <string.h> - -#include <linux/bpf.h> -#include <linux/pkt_cls.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <sys/socket.h> -#include <linux/tcp.h> - -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_endian.h> - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __type(key, __u32); - __type(value, __u32); - __uint(max_entries, 3); -} results SEC(".maps"); - -static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk, - void *iph, __u32 ip_size, - struct tcphdr *tcph) -{ - __u32 thlen = tcph->doff * 4; - - if (tcph->syn && !tcph->ack) { - // packet should only have an MSS option - if (thlen != 24) - return 0; - - if ((void *)tcph + thlen > data_end) - return 0; - - return bpf_tcp_gen_syncookie(sk, iph, ip_size, tcph, thlen); - } - return 0; -} - -static __always_inline void check_syncookie(void *ctx, void *data, - void *data_end) -{ - struct bpf_sock_tuple tup; - struct bpf_sock *sk; - struct ethhdr *ethh; - struct iphdr *ipv4h; - struct ipv6hdr *ipv6h; - struct tcphdr *tcph; - int ret; - __u32 key_mss = 2; - __u32 key_gen = 1; - __u32 key = 0; - __s64 seq_mss; - - ethh = data; - if (ethh + 1 > data_end) - return; - - switch (bpf_ntohs(ethh->h_proto)) { - case ETH_P_IP: - ipv4h = data + sizeof(struct ethhdr); - if (ipv4h + 1 > data_end) - return; - - if (ipv4h->ihl != 5) - return; - - tcph = data + sizeof(struct ethhdr) + sizeof(struct iphdr); - if (tcph + 1 > data_end) - return; - - tup.ipv4.saddr = ipv4h->saddr; - tup.ipv4.daddr = ipv4h->daddr; - tup.ipv4.sport = tcph->source; - tup.ipv4.dport = tcph->dest; - - sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv4), - BPF_F_CURRENT_NETNS, 0); - if (!sk) - return; - - if (sk->state != BPF_TCP_LISTEN) - goto release; - - seq_mss = gen_syncookie(data_end, sk, ipv4h, sizeof(*ipv4h), - tcph); - - ret = bpf_tcp_check_syncookie(sk, ipv4h, sizeof(*ipv4h), - tcph, sizeof(*tcph)); - break; - - case ETH_P_IPV6: - ipv6h = data + sizeof(struct ethhdr); - if (ipv6h + 1 > data_end) - return; - - if (ipv6h->nexthdr != IPPROTO_TCP) - return; - - tcph = data + sizeof(struct ethhdr) + sizeof(struct ipv6hdr); - if (tcph + 1 > data_end) - return; - - memcpy(tup.ipv6.saddr, &ipv6h->saddr, sizeof(tup.ipv6.saddr)); - memcpy(tup.ipv6.daddr, &ipv6h->daddr, sizeof(tup.ipv6.daddr)); - tup.ipv6.sport = tcph->source; - tup.ipv6.dport = tcph->dest; - - sk = bpf_skc_lookup_tcp(ctx, &tup, sizeof(tup.ipv6), - BPF_F_CURRENT_NETNS, 0); - if (!sk) - return; - - if (sk->state != BPF_TCP_LISTEN) - goto release; - - seq_mss = gen_syncookie(data_end, sk, ipv6h, sizeof(*ipv6h), - tcph); - - ret = bpf_tcp_check_syncookie(sk, ipv6h, sizeof(*ipv6h), - tcph, sizeof(*tcph)); - break; - - default: - return; - } - - if (seq_mss > 0) { - __u32 cookie = (__u32)seq_mss; - __u32 mss = seq_mss >> 32; - - bpf_map_update_elem(&results, &key_gen, &cookie, 0); - bpf_map_update_elem(&results, &key_mss, &mss, 0); - } - - if (ret == 0) { - __u32 cookie = bpf_ntohl(tcph->ack_seq) - 1; - - bpf_map_update_elem(&results, &key, &cookie, 0); - } - -release: - bpf_sk_release(sk); -} - -SEC("tc") -int check_syncookie_clsact(struct __sk_buff *skb) -{ - check_syncookie(skb, (void *)(long)skb->data, - (void *)(long)skb->data_end); - return TC_ACT_OK; -} - -SEC("xdp") -int check_syncookie_xdp(struct xdp_md *ctx) -{ - check_syncookie(ctx, (void *)(long)ctx->data, - (void *)(long)ctx->data_end); - return XDP_PASS; -} - -char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h index f8b1b7e68d2e..34024de6337e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -22,7 +22,7 @@ __builtin_memcpy(b, __tmp, sizeof(a)); \ } while (0) -/* asm-generic/unaligned.h */ +/* linux/unaligned.h */ #define __get_unaligned_t(type, ptr) ({ \ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \ __pptr->x; \ diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c index bba3e37f749b..5aaf2b065f86 100644 --- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c +++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c @@ -7,7 +7,11 @@ #include "bpf_misc.h" SEC("tp_btf/bpf_testmod_test_nullable_bare") -__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'") +/* This used to be a failure test, but raw_tp nullable arguments can now + * directly be dereferenced, whether they have nullable annotation or not, + * and don't need to be explicitly checked. + */ +__success int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx) { return nullable_ctx->len; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c index 20ec6723df18..3619239b01b7 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c @@ -12,10 +12,12 @@ struct { __uint(max_entries, 4); } cpu_map SEC(".maps"); +__u32 redirect_count = 0; + SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { - return bpf_redirect_map(&cpu_map, 1, 0); + return bpf_redirect_map(&cpu_map, 0, 0); } SEC("xdp") @@ -27,6 +29,9 @@ int xdp_dummy_prog(struct xdp_md *ctx) SEC("xdp/cpumap") int xdp_dummy_cm(struct xdp_md *ctx) { + if (bpf_get_smp_processor_id() == 0) + redirect_count++; + if (ctx->ingress_ifindex == IFINDEX_LO) return XDP_DROP; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c index 4139a14f9996..92b65a485d4a 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c @@ -12,7 +12,7 @@ struct { SEC("xdp") int xdp_redir_prog(struct xdp_md *ctx) { - return bpf_redirect_map(&dm_ports, 1, 0); + return bpf_redirect_map(&dm_ports, 0, 0); } /* invalid program on DEVMAP entry; diff --git a/tools/testing/selftests/bpf/progs/update_map_in_htab.c b/tools/testing/selftests/bpf/progs/update_map_in_htab.c new file mode 100644 index 000000000000..c2066247cd9c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/update_map_in_htab.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2024. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); + __array(values, struct inner_map_type); +} outer_htab_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, int); + __uint(max_entries, 2); + __array(values, struct inner_map_type); +} outer_alloc_htab_map SEC(".maps"); + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c index 7e0fdcbbd242..93752bb5690b 100644 --- a/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_consumers.c @@ -24,16 +24,16 @@ int uprobe_1(struct pt_regs *ctx) return 0; } -SEC("uprobe.multi") +SEC("uprobe.session") int uprobe_2(struct pt_regs *ctx) { uprobe_result[2]++; return 0; } -SEC("uprobe.multi") +SEC("uprobe.session") int uprobe_3(struct pt_regs *ctx) { uprobe_result[3]++; - return 0; + return 1; } diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c new file mode 100644 index 000000000000..30bff90b68dc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_multi_func_1_addr = 0; +__u64 uprobe_multi_func_2_addr = 0; +__u64 uprobe_multi_func_3_addr = 0; + +__u64 uprobe_session_result[3] = {}; +__u64 uprobe_multi_sleep_result = 0; + +void *user_ptr = 0; +int pid = 0; + +static int uprobe_multi_check(void *ctx, bool is_return) +{ + const __u64 funcs[] = { + uprobe_multi_func_1_addr, + uprobe_multi_func_2_addr, + uprobe_multi_func_3_addr, + }; + unsigned int i; + __u64 addr; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + addr = bpf_get_func_ip(ctx); + + for (i = 0; i < ARRAY_SIZE(funcs); i++) { + if (funcs[i] == addr) { + uprobe_session_result[i]++; + break; + } + } + + /* only uprobe_multi_func_2 executes return probe */ + if ((addr == uprobe_multi_func_1_addr) || + (addr == uprobe_multi_func_3_addr)) + return 1; + + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_*") +int uprobe(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, bpf_session_is_return()); +} + +static __always_inline bool verify_sleepable_user_copy(void) +{ + char data[9]; + + bpf_copy_from_user(data, sizeof(data), user_ptr); + return bpf_strncmp(data, sizeof(data), "test_data") == 0; +} + +SEC("uprobe.session.s//proc/self/exe:uprobe_multi_func_*") +int uprobe_sleepable(struct pt_regs *ctx) +{ + if (verify_sleepable_user_copy()) + uprobe_multi_sleep_result++; + return uprobe_multi_check(ctx, bpf_session_is_return()); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c new file mode 100644 index 000000000000..5befdf944dc6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_cookie.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +__u64 test_uprobe_1_result = 0; +__u64 test_uprobe_2_result = 0; +__u64 test_uprobe_3_result = 0; + +static int check_cookie(__u64 val, __u64 *result) +{ + __u64 *cookie; + + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + cookie = bpf_session_cookie(); + + if (bpf_session_is_return()) + *result = *cookie == val ? val : 0; + else + *cookie = val; + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_1(struct pt_regs *ctx) +{ + return check_cookie(1, &test_uprobe_1_result); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_2") +int uprobe_2(struct pt_regs *ctx) +{ + return check_cookie(2, &test_uprobe_2_result); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_3") +int uprobe_3(struct pt_regs *ctx) +{ + return check_cookie(3, &test_uprobe_3_result); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c new file mode 100644 index 000000000000..8fbcd69fae22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_recursive.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +int pid = 0; + +int idx_entry = 0; +int idx_return = 0; + +__u64 test_uprobe_cookie_entry[6]; +__u64 test_uprobe_cookie_return[3]; + +static int check_cookie(void) +{ + __u64 *cookie = bpf_session_cookie(); + + if (bpf_session_is_return()) { + if (idx_return >= ARRAY_SIZE(test_uprobe_cookie_return)) + return 1; + test_uprobe_cookie_return[idx_return++] = *cookie; + return 0; + } + + if (idx_entry >= ARRAY_SIZE(test_uprobe_cookie_entry)) + return 1; + *cookie = test_uprobe_cookie_entry[idx_entry]; + return idx_entry++ % 2; +} + + +SEC("uprobe.session//proc/self/exe:uprobe_session_recursive") +int uprobe_recursive(struct pt_regs *ctx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + return check_cookie(); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c new file mode 100644 index 000000000000..7c960376ae97 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_session_single.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <stdbool.h> +#include "bpf_kfuncs.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +__u64 uprobe_session_result[3] = {}; +int pid = 0; + +static int uprobe_multi_check(void *ctx, int idx) +{ + if (bpf_get_current_pid_tgid() >> 32 != pid) + return 1; + + uprobe_session_result[idx]++; + + /* only consumer 1 executes return probe */ + if (idx == 0 || idx == 2) + return 1; + + return 0; +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_0(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 0); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_1(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 1); +} + +SEC("uprobe.session//proc/self/exe:uprobe_multi_func_1") +int uprobe_2(struct pt_regs *ctx) +{ + return uprobe_multi_check(ctx, 2); +} diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c new file mode 100644 index 000000000000..fe49f2cb5360 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uprobe_multi_verifier.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/usdt.bpf.h> +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + + +SEC("uprobe.session") +__success +int uprobe_sesison_return_0(struct pt_regs *ctx) +{ + return 0; +} + +SEC("uprobe.session") +__success +int uprobe_sesison_return_1(struct pt_regs *ctx) +{ + return 1; +} + +SEC("uprobe.session") +__failure +__msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]") +int uprobe_sesison_return_2(struct pt_regs *ctx) +{ + return 2; +} diff --git a/tools/testing/selftests/bpf/progs/uptr_failure.c b/tools/testing/selftests/bpf/progs/uptr_failure.c new file mode 100644 index 000000000000..0cfa1fd61440 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_failure.c @@ -0,0 +1,105 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_experimental.h" +#include "bpf_misc.h" +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_type); +} datamap SEC(".maps"); + +SEC("?syscall") +__failure __msg("store to uptr disallowed") +int uptr_write(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->udata = NULL; + return 0; +} + +SEC("?syscall") +__failure __msg("store to uptr disallowed") +int uptr_write_nested(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->nested.udata = NULL; + return 0; +} + +SEC("?syscall") +__failure __msg("R1 invalid mem access 'mem_or_null'") +int uptr_no_null_check(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + v->udata->result = 0; + + return 0; +} + +SEC("?syscall") +__failure __msg("doesn't point to kptr") +int uptr_kptr_xchg(const void *ctx) +{ + struct task_struct *task; + struct value_type *v; + + task = bpf_get_current_task_btf(); + v = bpf_task_storage_get(&datamap, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!v) + return 0; + + bpf_kptr_xchg(&v->udata, NULL); + + return 0; +} + +SEC("?syscall") +__failure __msg("invalid mem access 'scalar'") +int uptr_obj_new(const void *ctx) +{ + struct value_type *v; + + v = bpf_obj_new(typeof(*v)); + if (!v) + return 0; + + if (v->udata) + v->udata->result = 0; + + bpf_obj_drop(v); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/uptr_map_failure.c b/tools/testing/selftests/bpf/progs/uptr_map_failure.c new file mode 100644 index 000000000000..417b763d76b4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_map_failure.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct large_uptr); +} large_uptr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct empty_uptr); +} empty_uptr_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct kstruct_uptr); +} kstruct_uptr_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/uptr_update_failure.c b/tools/testing/selftests/bpf/progs/uptr_update_failure.c new file mode 100644 index 000000000000..86c3bb954abc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/uptr_update_failure.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "uptr_test_common.h" + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct value_lock_type); +} datamap SEC(".maps"); + +/* load test only. not used */ +SEC("syscall") +int not_used(void *ctx) +{ + struct value_lock_type *ptr; + struct task_struct *task; + struct user_data *udata; + + task = bpf_get_current_task_btf(); + ptr = bpf_task_storage_get(&datamap, task, 0, 0); + if (!ptr) + return 0; + + bpf_spin_lock(&ptr->lock); + + udata = ptr->udata; + if (!udata) { + bpf_spin_unlock(&ptr->lock); + return 0; + } + udata->result = MAGIC_VALUE + udata->a + udata->b; + + bpf_spin_unlock(&ptr->lock); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index 6065f862d964..f94f30cf1bb8 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -29,12 +29,12 @@ int big_alloc1(void *ctx) if (!page1) return 1; *page1 = 1; - page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, + page2 = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE * 2, 1, NUMA_NO_NODE, 0); if (!page2) return 2; *page2 = 2; - no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE, + no_page = bpf_arena_alloc_pages(&arena, base + ARENA_SIZE - PAGE_SIZE, 1, NUMA_NO_NODE, 0); if (no_page) return 3; @@ -66,4 +66,110 @@ int big_alloc1(void *ctx) #endif return 0; } + +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) +#define PAGE_CNT 100 +__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ +__u8 __arena *base; + +/* + * Check that arena's range_tree algorithm allocates pages sequentially + * on the first pass and then fills in all gaps on the second pass. + */ +__noinline int alloc_pages(int page_cnt, int pages_atonce, bool first_pass, + int max_idx, int step) +{ + __u8 __arena *pg; + int i, pg_idx; + + for (i = 0; i < page_cnt; i++) { + pg = bpf_arena_alloc_pages(&arena, NULL, pages_atonce, + NUMA_NO_NODE, 0); + if (!pg) + return step; + pg_idx = (unsigned long) (pg - base) / PAGE_SIZE; + if (first_pass) { + /* Pages must be allocated sequentially */ + if (pg_idx != i) + return step + 100; + } else { + /* Allocator must fill into gaps */ + if (pg_idx >= max_idx || (pg_idx & 1)) + return step + 200; + } + *pg = pg_idx; + page[pg_idx] = pg; + cond_break; + } + return 0; +} + +SEC("syscall") +__success __retval(0) +int big_alloc2(void *ctx) +{ + __u8 __arena *pg; + int i, err; + + base = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!base) + return 1; + bpf_arena_free_pages(&arena, (void __arena *)base, 1); + + err = alloc_pages(PAGE_CNT, 1, true, PAGE_CNT, 2); + if (err) + return err; + + /* Clear all even pages */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 3; + bpf_arena_free_pages(&arena, (void __arena *)pg, 1); + page[i] = NULL; + cond_break; + } + + /* Allocate into freed gaps */ + err = alloc_pages(PAGE_CNT / 2, 1, false, PAGE_CNT, 4); + if (err) + return err; + + /* Free pairs of pages */ + for (i = 0; i < PAGE_CNT; i += 4) { + pg = page[i]; + if (*pg != i) + return 5; + bpf_arena_free_pages(&arena, (void __arena *)pg, 2); + page[i] = NULL; + page[i + 1] = NULL; + cond_break; + } + + /* Allocate 2 pages at a time into freed gaps */ + err = alloc_pages(PAGE_CNT / 4, 2, false, PAGE_CNT, 6); + if (err) + return err; + + /* Check pages without freeing */ + for (i = 0; i < PAGE_CNT; i += 2) { + pg = page[i]; + if (*pg != i) + return 7; + cond_break; + } + + pg = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + + if (!pg) + return 8; + /* + * The first PAGE_CNT pages are occupied. The new page + * must be above. + */ + if ((pg - base) / PAGE_SIZE < PAGE_CNT) + return 9; + return 0; +} +#endif char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c index 95d7ecc12963..4195aa824ba5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_array_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c @@ -368,8 +368,7 @@ __naked void a_read_only_array_2_1(void) r4 = 0; \ r5 = 0; \ call %[bpf_csum_diff]; \ -l0_%=: r0 &= 0xffff; \ - exit; \ +l0_%=: exit; \ " : : __imm(bpf_csum_diff), __imm(bpf_map_lookup_elem), diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c index f4da4d508ddb..7c881bca9af5 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c +++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c @@ -15,6 +15,8 @@ int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign, int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak; void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak; +u64 bits_array[511] = {}; + SEC("iter.s/cgroup") __description("bits iter without destroy") __failure __msg("Unreleased reference") @@ -55,9 +57,15 @@ __description("null pointer") __success __retval(0) int null_pointer(void) { - int nr = 0; + struct bpf_iter_bits iter; + int err, nr = 0; int *bit; + err = bpf_iter_bits_new(&iter, NULL, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EINVAL) + return 1; + bpf_for_each(bits, bit, NULL, 1) nr++; return nr; @@ -110,16 +118,16 @@ int bit_index(void) } SEC("syscall") -__description("bits nomem") +__description("bits too big") __success __retval(0) -int bits_nomem(void) +int bits_too_big(void) { u64 data[4]; int nr = 0; int *bit; __builtin_memset(&data, 0xff, sizeof(data)); - bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */ + bpf_for_each(bits, bit, &data[0], 512) /* Be greater than 511 */ nr++; return nr; } @@ -151,3 +159,74 @@ int zero_words(void) nr++; return nr; } + +SEC("syscall") +__description("huge words") +__success __retval(0) +int huge_words(void) +{ + u64 data[8] = {0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1, 0x1}; + int nr = 0; + int *bit; + + bpf_for_each(bits, bit, &data[0], 67108865) + nr++; + return nr; +} + +SEC("syscall") +__description("max words") +__success __retval(4) +int max_words(void) +{ + volatile int nr = 0; + int *bit; + + bits_array[0] = (1ULL << 63) | 1U; + bits_array[510] = (1ULL << 33) | (1ULL << 32); + + bpf_for_each(bits, bit, bits_array, 511) { + if (nr == 0 && *bit != 0) + break; + if (nr == 2 && *bit != 32672) + break; + nr++; + } + return nr; +} + +SEC("syscall") +__description("bad words") +__success __retval(0) +int bad_words(void) +{ + void *bad_addr = (void *)-4095; + struct bpf_iter_bits iter; + volatile int nr; + int *bit; + int err; + + err = bpf_iter_bits_new(&iter, bad_addr, 1); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 1; + + nr = 0; + bpf_for_each(bits, bit, bad_addr, 1) + nr++; + if (nr != 0) + return 2; + + err = bpf_iter_bits_new(&iter, bad_addr, 4); + bpf_iter_bits_destroy(&iter); + if (err != -EFAULT) + return 3; + + nr = 0; + bpf_for_each(bits, bit, bad_addr, 4) + nr++; + if (nr != 0) + return 4; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c index 9da97d2efcd9..5094c288cfd7 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c +++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c @@ -790,61 +790,6 @@ __naked static void cumulative_stack_depth_subprog(void) :: __imm(bpf_get_smp_processor_id) : __clobber_all); } -SEC("raw_tp") -__arch_x86_64 -__log_level(4) -__msg("stack depth 512") -__xlated("0: r1 = 42") -__xlated("1: *(u64 *)(r10 -512) = r1") -__xlated("2: w0 = ") -__xlated("3: r0 = &(void __percpu *)(r0)") -__xlated("4: r0 = *(u32 *)(r0 +0)") -__xlated("5: exit") -__success -__naked int bpf_fastcall_max_stack_ok(void) -{ - asm volatile( - "r1 = 42;" - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_smp_processor_id];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - "exit;" - : - : __imm_const(max_bpf_stack, MAX_BPF_STACK), - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), - __imm(bpf_get_smp_processor_id) - : __clobber_all - ); -} - -SEC("raw_tp") -__arch_x86_64 -__log_level(4) -__msg("stack depth 520") -__failure -__naked int bpf_fastcall_max_stack_fail(void) -{ - asm volatile( - "r1 = 42;" - "*(u64 *)(r10 - %[max_bpf_stack]) = r1;" - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_smp_processor_id];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - /* call to prandom blocks bpf_fastcall rewrite */ - "*(u64 *)(r10 - %[max_bpf_stack_8]) = r1;" - "call %[bpf_get_prandom_u32];" - "r1 = *(u64 *)(r10 - %[max_bpf_stack_8]);" - "exit;" - : - : __imm_const(max_bpf_stack, MAX_BPF_STACK), - __imm_const(max_bpf_stack_8, MAX_BPF_STACK + 8), - __imm(bpf_get_smp_processor_id), - __imm(bpf_get_prandom_u32) - : __clobber_all - ); -} - SEC("cgroup/getsockname_unix") __xlated("0: r2 = 1") /* bpf_cast_to_kern_ctx is replaced by a single assignment */ diff --git a/tools/testing/selftests/bpf/progs/verifier_const.c b/tools/testing/selftests/bpf/progs/verifier_const.c index 2e533d7eec2f..e118dbb768bf 100644 --- a/tools/testing/selftests/bpf/progs/verifier_const.c +++ b/tools/testing/selftests/bpf/progs/verifier_const.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 Isovalent */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> #include "bpf_misc.h" const volatile long foo = 42; @@ -66,4 +67,32 @@ int tcx6(struct __sk_buff *skb) return TCX_PASS; } +static inline void write_fixed(volatile void *p, __u32 val) +{ + *(volatile __u32 *)p = val; +} + +static inline void write_dyn(void *p, void *val, int len) +{ + bpf_copy_from_user(p, len, val); +} + +SEC("tc/ingress") +__description("rodata/mark: write with unknown reg rejected") +__failure __msg("write into map forbidden") +int tcx7(struct __sk_buff *skb) +{ + write_fixed((void *)&foo, skb->mark); + return TCX_PASS; +} + +SEC("lsm.s/bprm_committed_creds") +__description("rodata/mark: write with unknown reg rejected") +__failure __msg("write into map forbidden") +int BPF_PROG(bprm, struct linux_binprm *bprm) +{ + write_dyn((void *)&foo, &bart, bpf_get_prandom_u32() & 3); + return 0; +} + char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c new file mode 100644 index 000000000000..8f755d2464cf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_linked_scalars.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("socket") +__description("scalars: find linked scalars") +__failure +__msg("math between fp pointer and 2147483647 is not allowed") +__naked void scalars(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = 0x80000001 ll; \ + r1 /= 1; \ + r2 = r1; \ + r4 = r1; \ + w2 += 0x7FFFFFFF; \ + w4 += 0; \ + if r2 == 0 goto l1; \ + exit; \ +l1: \ + r4 >>= 63; \ + r3 = 1; \ + r3 -= r4; \ + r3 *= 0x7FFFFFFF; \ + r3 += r10; \ + *(u8*)(r3 - 1) = r0; \ + exit; \ +" ::: __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 028ec855587b..994bbc346d25 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -287,6 +287,46 @@ l0_%=: \ : __clobber_all); } +SEC("socket") +__description("MOV64SX, S8, unsigned range_check") +__success __retval(0) +__naked void mov64sx_s8_range_check(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x1; \ + r0 += 0xfe; \ + r0 = (s8)r0; \ + if r0 < 0xfffffffffffffffe goto label_%=; \ + r0 = 0; \ + exit; \ +label_%=: \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("MOV32SX, S8, unsigned range_check") +__success __retval(0) +__naked void mov32sx_s8_range_check(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + w0 &= 0x1; \ + w0 += 0xfe; \ + w0 = (s8)w0; \ + if w0 < 0xfffffffe goto label_%=; \ + r0 = 0; \ + exit; \ +label_%=: \ + exit; \ + " : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + #else SEC("socket") diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c new file mode 100644 index 000000000000..70c7600a26a0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +SEC("tc/ingress") +__description("uninit/mtu: write rejected") +__failure __msg("invalid indirect read from stack") +int tc_uninit_mtu(struct __sk_buff *ctx) +{ + __u32 mtu; + + bpf_check_mtu(ctx, 0, &mtu, 0, 0); + return TCX_PASS; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c new file mode 100644 index 000000000000..b1fbdf119553 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -0,0 +1,272 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +/* From include/linux/filter.h */ +#define MAX_BPF_STACK 512 + +#if defined(__TARGET_ARCH_x86) + +struct elem { + struct bpf_timer t; + char pad[256]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct elem); +} array SEC(".maps"); + +SEC("kprobe") +__description("Private stack, single prog") +__success +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x100(%r9)") +__naked void private_stack_single_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 256) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("raw_tp") +__description("No private stack") +__success +__arch_x86_64 +__jited(" subq $0x8, %rsp") +__naked void no_private_stack_nested(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +__used +__naked static void cumulative_stack_depth_subprog(void) +{ + asm volatile (" \ + r1 = 41; \ + *(u64 *)(r10 - 32) = r1; \ + call %[bpf_get_smp_processor_id]; \ + exit; \ +" : + : __imm(bpf_get_smp_processor_id) + : __clobber_all); +} + +SEC("kprobe") +__description("Private stack, subtree > MAX_BPF_STACK") +__success +__arch_x86_64 +/* private stack fp for the main prog */ +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq 0x{{.*}}") +__jited(" popq %r9") +__jited(" xorl %eax, %eax") +__naked void private_stack_nested_1(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - %[max_bpf_stack]) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm_const(max_bpf_stack, MAX_BPF_STACK) + : __clobber_all); +} + +__naked __noinline __used +static unsigned long loop_callback(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call cumulative_stack_depth_subprog; \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_common); +} + +SEC("raw_tp") +__description("Private stack, callback") +__success +__arch_x86_64 +/* for func loop_callback */ +__jited("func #1") +__jited(" endbr64") +__jited(" nopl (%rax,%rax)") +__jited(" nopl (%rax)") +__jited(" pushq %rbp") +__jited(" movq %rsp, %rbp") +__jited(" endbr64") +__jited(" movabsq $0x{{.*}}, %r9") +__jited(" addq %gs:0x{{.*}}, %r9") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__jited(" movl $0x2a, %edi") +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +__naked void private_stack_callback(void) +{ + asm volatile (" \ + r1 = 1; \ + r2 = %[loop_callback]; \ + r3 = 0; \ + r4 = 0; \ + call %[bpf_loop]; \ + r0 = 0; \ + exit; \ +" : + : __imm_ptr(loop_callback), + __imm(bpf_loop) + : __clobber_common); +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in main prog") +__success __retval(0) +__arch_x86_64 +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_main_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ +" ::: __clobber_common); + + bpf_throw(0); + return 0; +} + +__used static int subprog_exception(void) +{ + bpf_throw(0); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, exception in subprog") +__success __retval(0) +__arch_x86_64 +__jited(" movq %rdi, -0x200(%r9)") +__jited(" pushq %r9") +__jited(" callq") +__jited(" popq %r9") +int private_stack_exception_sub_prog(void) +{ + asm volatile (" \ + r1 = 42; \ + *(u64 *)(r10 - 512) = r1; \ + call subprog_exception; \ +" ::: __clobber_common); + + return 0; +} + +int glob; +__noinline static void subprog2(int *val) +{ + glob += val[0] * 2; +} + +__noinline static void subprog1(int *val) +{ + int tmp[64] = {}; + + tmp[0] = *val; + subprog2(tmp); +} + +__noinline static int timer_cb1(void *map, int *key, struct bpf_timer *timer) +{ + subprog1(key); + return 0; +} + +__noinline static int timer_cb2(void *map, int *key, struct bpf_timer *timer) +{ + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, not nested") +__success __retval(0) +__arch_x86_64 +__jited(" movabsq $0x{{.*}}, %r9") +int private_stack_async_callback_1(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb2); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +SEC("fentry/bpf_fentry_test9") +__description("Private stack, async callback, potential nesting") +__success __retval(0) +__arch_x86_64 +__jited(" subq $0x100, %rsp") +int private_stack_async_callback_2(void) +{ + struct bpf_timer *arr_timer; + int array_key = 0; + + arr_timer = bpf_map_lookup_elem(&array, &array_key); + if (!arr_timer) + return 0; + + bpf_timer_init(arr_timer, &array, 1); + bpf_timer_set_callback(arr_timer, timer_cb1); + bpf_timer_start(arr_timer, 0, 0); + subprog1(&array_key); + return 0; +} + +#else + +SEC("kprobe") +__description("private stack is not supported, use a dummy test") +__success +int dummy_test(void) +{ + return 0; +} + +#endif + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index c4c6da21265e..683a882b3e6d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -791,7 +791,7 @@ l0_%=: r0 = *(u8*)skb[0]; \ SEC("tc") __description("reference tracking: forbid LD_ABS while holding reference") -__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references") +__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak") __naked void ld_abs_while_holding_reference(void) { asm volatile (" \ @@ -836,7 +836,7 @@ l0_%=: r7 = 1; \ SEC("tc") __description("reference tracking: forbid LD_IND while holding reference") -__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references") +__failure __msg("BPF_LD_[ABS|IND] would lead to reference leak") __naked void ld_ind_while_holding_reference(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c index 2ecf77b623e0..7c5e5e6d10eb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c +++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c @@ -760,4 +760,71 @@ __naked void two_old_ids_one_cur_id(void) : __clobber_all); } +SEC("socket") +/* Note the flag, see verifier.c:opt_subreg_zext_lo32_rnd_hi32() */ +__flag(BPF_F_TEST_RND_HI32) +__success +/* This test was added because of a bug in verifier.c:sync_linked_regs(), + * upon range propagation it destroyed subreg_def marks for registers. + * The subreg_def mark is used to decide whether zero extension instructions + * are needed when register is read. When BPF_F_TEST_RND_HI32 is set it + * also causes generation of statements to randomize upper halves of + * read registers. + * + * The test is written in a way to return an upper half of a register + * that is affected by range propagation and must have it's subreg_def + * preserved. This gives a return value of 0 and leads to undefined + * return value if subreg_def mark is not preserved. + */ +__retval(0) +/* Check that verifier believes r1/r0 are zero at exit */ +__log_level(2) +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +__msg("from 3 to 4") +__msg("4: (77) r1 >>= 32 ; R1_w=0") +__msg("5: (bf) r0 = r1 ; R0_w=0 R1_w=0") +__msg("6: (95) exit") +/* Verify that statements to randomize upper half of r1 had not been + * generated. + */ +__xlated("call unknown") +__xlated("r0 &= 2147483647") +__xlated("w1 = w0") +/* This is how disasm.c prints BPF_ZEXT_REG at the moment, x86 and arm + * are the only CI archs that do not need zero extension for subregs. + */ +#if !defined(__TARGET_ARCH_x86) && !defined(__TARGET_ARCH_arm64) +__xlated("w1 = w1") +#endif +__xlated("if w0 < 0xa goto pc+0") +__xlated("r1 >>= 32") +__xlated("r0 = r1") +__xlated("exit") +__naked void linked_regs_and_subreg_def(void) +{ + asm volatile ( + "call %[bpf_ktime_get_ns];" + /* make sure r0 is in 32-bit range, otherwise w1 = w0 won't + * assign same IDs to registers. + */ + "r0 &= 0x7fffffff;" + /* link w1 and w0 via ID */ + "w1 = w0;" + /* 'if' statement propagates range info from w0 to w1, + * but should not affect w1->subreg_def property. + */ + "if w0 < 10 goto +0;" + /* r1 is read here, on archs that require subreg zero + * extension this would cause zext patch generation. + */ + "r1 >>= 32;" + "r0 = r1;" + "exit;" + : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c index 5a14498d352f..f40e57251e94 100644 --- a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c +++ b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c @@ -2,6 +2,7 @@ /* Converted from tools/testing/selftests/bpf/verifier/search_pruning.c */ #include <linux/bpf.h> +#include <../../../include/linux/filter.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -336,4 +337,26 @@ l0_%=: r1 = 42; \ : __clobber_all); } +/* Without checkpoint forcibly inserted at the back-edge a loop this + * test would take a very long time to verify. + */ +SEC("kprobe") +__failure __log_level(4) +__msg("BPF program is too large.") +__naked void short_loop1(void) +{ + asm volatile ( + " r7 = *(u16 *)(r1 +0);" + "1: r7 += 0x1ab064b9;" + " .8byte %[jset];" /* same as 'if r7 & 0x702000 goto 1b;' */ + " r7 &= 0x1ee60e;" + " r7 += r1;" + " if r7 s> 0x37d2 goto +0;" + " r0 = 0;" + " exit;" + : + : __imm_insn(jset, BPF_JMP_IMM(BPF_JSET, BPF_REG_7, 0x702000, -2)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c index ee76b51005ab..d3e70e38e442 100644 --- a/tools/testing/selftests/bpf/progs/verifier_sock.c +++ b/tools/testing/selftests/bpf/progs/verifier_sock.c @@ -977,4 +977,64 @@ l1_%=: r0 = *(u8*)(r7 + 0); \ : __clobber_all); } +SEC("cgroup/post_bind4") +__description("sk->src_ip6[0] [load 1st byte]") +__failure __msg("invalid bpf_context access off=28 size=2") +__naked void post_bind4_read_src_ip6(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_ip6_0]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_ip6_0, offsetof(struct bpf_sock, src_ip6[0])) + : __clobber_all); +} + +SEC("cgroup/post_bind4") +__description("sk->mark [load mark]") +__failure __msg("invalid bpf_context access off=16 size=2") +__naked void post_bind4_read_mark(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_mark]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)) + : __clobber_all); +} + +SEC("cgroup/post_bind6") +__description("sk->src_ip4 [load src_ip4]") +__failure __msg("invalid bpf_context access off=24 size=2") +__naked void post_bind6_read_src_ip4(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_ip4]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_ip4, offsetof(struct bpf_sock, src_ip4)) + : __clobber_all); +} + +SEC("cgroup/sock_create") +__description("sk->src_port [word load]") +__failure __msg("invalid bpf_context access off=44 size=2") +__naked void sock_create_read_src_port(void) +{ + asm volatile (" \ + r6 = r1; \ + r7 = *(u16*)(r6 + %[bpf_sock_src_port]); \ + r0 = 1; \ + exit; \ +" : + : __imm_const(bpf_sock_src_port, offsetof(struct bpf_sock, src_port)) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c index fb316c080c84..3f679de73229 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c @@ -187,7 +187,7 @@ l0_%=: r6 = r0; \ SEC("cgroup/skb") __description("spin_lock: test6 missing unlock") -__failure __msg("unlock is missing") +__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region") __failure_unpriv __msg_unpriv("") __naked void spin_lock_test6_missing_unlock(void) { diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index f8f5dc9f72b8..62b8e29ced9f 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -21,7 +21,6 @@ #define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3]) -#define IP_DF 0x4000 #define IP_MF 0x2000 #define IP_OFFSET 0x1fff @@ -442,7 +441,7 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo /* TCP doesn't normally use fragments, and XDP can't reassemble * them. */ - if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF)) + if ((hdr->ipv4->frag_off & bpf_htons(IP_MF | IP_OFFSET)) != 0) return XDP_DROP; tup.ipv4.saddr = hdr->ipv4->saddr; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 905d5981ace1..8b40e9496af1 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -26,10 +26,6 @@ #include "test_maps.h" #include "testing_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - int skips; static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) }; diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index c7a70e1a1085..6088d8222d59 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -16,15 +16,18 @@ #include <sys/socket.h> #include <sys/un.h> #include <bpf/btf.h> +#include <time.h> #include "json_writer.h" #include "network_helpers.h" +/* backtrace() and backtrace_symbols_fd() are glibc specific, + * use header file when glibc is available and provide stub + * implementations when another libc implementation is used. + */ #ifdef __GLIBC__ #include <execinfo.h> /* backtrace */ -#endif - -/* Default backtrace funcs if missing at link */ +#else __weak int backtrace(void **buffer, int size) { return 0; @@ -34,6 +37,7 @@ __weak void backtrace_symbols_fd(void *const *buffer, int size, int fd) { dprintf(fd, "<backtrace not supported>\n"); } +#endif /*__GLIBC__ */ int env_verbosity = 0; @@ -176,6 +180,88 @@ int usleep(useconds_t usec) return syscall(__NR_nanosleep, &ts, NULL); } +/* Watchdog timer is started by watchdog_start() and stopped by watchdog_stop(). + * If timer is active for longer than env.secs_till_notify, + * it prints the name of the current test to the stderr. + * If timer is active for longer than env.secs_till_kill, + * it kills the thread executing the test by sending a SIGSEGV signal to it. + */ +static void watchdog_timer_func(union sigval sigval) +{ + struct itimerspec timeout = {}; + char test_name[256]; + int err; + + if (env.subtest_state) + snprintf(test_name, sizeof(test_name), "%s/%s", + env.test->test_name, env.subtest_state->name); + else + snprintf(test_name, sizeof(test_name), "%s", + env.test->test_name); + + switch (env.watchdog_state) { + case WD_NOTIFY: + fprintf(env.stderr_saved, "WATCHDOG: test case %s executes for %d seconds...\n", + test_name, env.secs_till_notify); + timeout.it_value.tv_sec = env.secs_till_kill - env.secs_till_notify; + env.watchdog_state = WD_KILL; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to arm watchdog timer\n"); + break; + case WD_KILL: + fprintf(env.stderr_saved, + "WATCHDOG: test case %s executes for %d seconds, terminating with SIGSEGV\n", + test_name, env.secs_till_kill); + pthread_kill(env.main_thread, SIGSEGV); + break; + } +} + +static void watchdog_start(void) +{ + struct itimerspec timeout = {}; + int err; + + if (env.secs_till_kill == 0) + return; + if (env.secs_till_notify > 0) { + env.watchdog_state = WD_NOTIFY; + timeout.it_value.tv_sec = env.secs_till_notify; + } else { + env.watchdog_state = WD_KILL; + timeout.it_value.tv_sec = env.secs_till_kill; + } + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to start watchdog timer\n"); +} + +static void watchdog_stop(void) +{ + struct itimerspec timeout = {}; + int err; + + env.watchdog_state = WD_NOTIFY; + err = timer_settime(env.watchdog, 0, &timeout, NULL); + if (err) + fprintf(env.stderr_saved, "Failed to stop watchdog timer\n"); +} + +static void watchdog_init(void) +{ + struct sigevent watchdog_sev = { + .sigev_notify = SIGEV_THREAD, + .sigev_notify_function = watchdog_timer_func, + }; + int err; + + env.main_thread = pthread_self(); + err = timer_create(CLOCK_MONOTONIC, &watchdog_sev, &env.watchdog); + if (err) + fprintf(stderr, "Failed to initialize watchdog timer\n"); +} + static bool should_run(struct test_selector *sel, int num, const char *name) { int i; @@ -512,6 +598,7 @@ bool test__start_subtest(const char *subtest_name) env.subtest_state = subtest_state; stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt); + watchdog_start(); return true; } @@ -777,6 +864,7 @@ enum ARG_KEYS { ARG_DEBUG = -1, ARG_JSON_SUMMARY = 'J', ARG_TRAFFIC_MONITOR = 'm', + ARG_WATCHDOG_TIMEOUT = 'w', }; static const struct argp_option opts[] = { @@ -807,6 +895,8 @@ static const struct argp_option opts[] = { { "traffic-monitor", ARG_TRAFFIC_MONITOR, "NAMES", 0, "Monitor network traffic of tests with name matching the pattern (supports '*' wildcard)." }, #endif + { "watchdog-timeout", ARG_WATCHDOG_TIMEOUT, "SECONDS", 0, + "Kill the process if tests are not making progress for specified number of seconds." }, {}, }; @@ -868,6 +958,7 @@ static int libbpf_print_fn(enum libbpf_print_level level, va_copy(args2, args); vfprintf(libbpf_capture_stream, format, args2); + va_end(args2); } if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG) @@ -1031,6 +1122,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) true); break; #endif + case ARG_WATCHDOG_TIMEOUT: + env->secs_till_kill = atoi(arg); + if (env->secs_till_kill < 0) { + fprintf(stderr, "Invalid watchdog timeout: %s.\n", arg); + return -EINVAL; + } + if (env->secs_till_kill < env->secs_till_notify) { + env->secs_till_notify = 0; + } + break; default: return ARGP_ERR_UNKNOWN; } @@ -1259,10 +1360,12 @@ static void run_one_test(int test_num) stdio_hijack(&state->log_buf, &state->log_cnt); + watchdog_start(); if (test->run_test) test->run_test(); else if (test->run_serial_test) test->run_serial_test(); + watchdog_stop(); /* ensure last sub-test is finalized properly */ if (env.subtest_state) @@ -1703,6 +1806,7 @@ out: static int worker_main(int sock) { save_netns(); + watchdog_init(); while (true) { /* receive command */ @@ -1812,6 +1916,8 @@ int main(int argc, char **argv) sigaction(SIGSEGV, &sigact, NULL); + env.secs_till_notify = 10; + env.secs_till_kill = 120; err = argp_parse(&argp, argc, argv, 0, NULL, &env); if (err) return err; @@ -1820,6 +1926,8 @@ int main(int argc, char **argv) if (err) return err; + watchdog_init(); + /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); libbpf_set_print(libbpf_print_fn); diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 7767d9a825ae..74de33ae37e5 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -131,6 +131,12 @@ struct test_env { pid_t *worker_pids; /* array of worker pids */ int *worker_socks; /* array of worker socks */ int *worker_current_test; /* array of current running test for each worker */ + + pthread_t main_thread; + int secs_till_notify; + int secs_till_kill; + timer_t watchdog; /* watch for stalled tests/subtests */ + enum { WD_NOTIFY, WD_KILL } watchdog_state; }; #define MAX_LOG_TRUNK_SIZE 8192 @@ -390,6 +396,14 @@ int test__join_cgroup(const char *path); ___ok; \ }) +#define ASSERT_ERR_FD(fd, name) ({ \ + static int duration = 0; \ + int ___fd = (fd); \ + bool ___ok = ___fd < 0; \ + CHECK(!___ok, (name), "unexpected fd: %d\n", ___fd); \ + ___ok; \ +}) + #define SYS(goto_label, fmt, ...) \ ({ \ char cmd[1024]; \ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 3e02d7267de8..e5c7ecbe57e3 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -56,6 +56,8 @@ static void running_handler(int a); #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o" #define CG_PATH "/sockmap" +#define EDATAINTEGRITY 2001 + /* global sockets */ int s1, s2, c1, c2, p1, p2; int test_cnt; @@ -86,6 +88,10 @@ int ktls; int peek_flag; int skb_use_parser; int txmsg_omit_skb_parser; +int verify_push_start; +int verify_push_len; +int verify_pop_start; +int verify_pop_len; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -418,16 +424,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt, { bool drop = opt->drop_expected; unsigned char k = 0; + int i, j, fp; FILE *file; - int i, fp; file = tmpfile(); if (!file) { perror("create file for sendpage"); return 1; } - for (i = 0; i < iov_length * cnt; i++, k++) - fwrite(&k, sizeof(char), 1, file); + for (i = 0; i < cnt; i++, k = 0) { + for (j = 0; j < iov_length; j++, k++) + fwrite(&k, sizeof(char), 1, file); + } fflush(file); fseek(file, 0, SEEK_SET); @@ -510,42 +518,111 @@ unwind_iov: return -ENOMEM; } -static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) +/* In push or pop test, we need to do some calculations for msg_verify_data */ +static void msg_verify_date_prep(void) { - int i, j = 0, bytes_cnt = 0; - unsigned char k = 0; + int push_range_end = txmsg_start_push + txmsg_end_push - 1; + int pop_range_end = txmsg_start_pop + txmsg_pop - 1; + + if (txmsg_end_push && txmsg_pop && + txmsg_start_push <= pop_range_end && txmsg_start_pop <= push_range_end) { + /* The push range and the pop range overlap */ + int overlap_len; + + verify_push_start = txmsg_start_push; + verify_pop_start = txmsg_start_pop; + if (txmsg_start_push < txmsg_start_pop) + overlap_len = min(push_range_end - txmsg_start_pop + 1, txmsg_pop); + else + overlap_len = min(pop_range_end - txmsg_start_push + 1, txmsg_end_push); + verify_push_len = max(txmsg_end_push - overlap_len, 0); + verify_pop_len = max(txmsg_pop - overlap_len, 0); + } else { + /* Otherwise */ + verify_push_start = txmsg_start_push; + verify_pop_start = txmsg_start_pop; + verify_push_len = txmsg_end_push; + verify_pop_len = txmsg_pop; + } +} + +static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz, + unsigned char *k_p, int *bytes_cnt_p, + int *check_cnt_p, int *push_p) +{ + int bytes_cnt = *bytes_cnt_p, check_cnt = *check_cnt_p, push = *push_p; + unsigned char k = *k_p; + int i, j; - for (i = 0; i < msg->msg_iovlen; i++) { + for (i = 0, j = 0; i < msg->msg_iovlen && size; i++, j = 0) { unsigned char *d = msg->msg_iov[i].iov_base; /* Special case test for skb ingress + ktls */ if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) - return -EIO; + return -EDATAINTEGRITY; if (memcmp(d, "PASS", 4) != 0) { fprintf(stderr, "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; + return -EDATAINTEGRITY; } j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { + if (push > 0 && + check_cnt == verify_push_start + verify_push_len - push) { + int skipped; +revisit_push: + skipped = push; + if (j + push >= msg->msg_iov[i].iov_len) + skipped = msg->msg_iov[i].iov_len - j; + push -= skipped; + size -= skipped; + j += skipped - 1; + check_cnt += skipped; + continue; + } + + if (verify_pop_len > 0 && check_cnt == verify_pop_start) { + bytes_cnt += verify_pop_len; + check_cnt += verify_pop_len; + k += verify_pop_len; + + if (bytes_cnt == chunk_sz) { + k = 0; + bytes_cnt = 0; + check_cnt = 0; + push = verify_push_len; + } + + if (push > 0 && + check_cnt == verify_push_start + verify_push_len - push) + goto revisit_push; + } + if (d[j] != k++) { fprintf(stderr, "detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n", i, j, d[j], k - 1, d[j+1], k); - return -EIO; + return -EDATAINTEGRITY; } bytes_cnt++; + check_cnt++; if (bytes_cnt == chunk_sz) { k = 0; bytes_cnt = 0; + check_cnt = 0; + push = verify_push_len; } size--; } } + *k_p = k; + *bytes_cnt_p = bytes_cnt; + *check_cnt_p = check_cnt; + *push_p = push; return 0; } @@ -598,10 +675,14 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, } clock_gettime(CLOCK_MONOTONIC, &s->end); } else { + float total_bytes, txmsg_pop_total, txmsg_push_total; int slct, recvp = 0, recv, max_fd = fd; - float total_bytes, txmsg_pop_total; int fd_flags = O_NONBLOCK; struct timeval timeout; + unsigned char k = 0; + int bytes_cnt = 0; + int check_cnt = 0; + int push = 0; fd_set w; fcntl(fd, fd_flags); @@ -615,12 +696,22 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, * This is really only useful for testing edge cases in code * paths. */ - total_bytes = (float)iov_count * (float)iov_length * (float)cnt; - if (txmsg_apply) + total_bytes = (float)iov_length * (float)cnt; + if (!opt->sendpage) + total_bytes *= (float)iov_count; + if (txmsg_apply) { + txmsg_push_total = txmsg_end_push * (total_bytes / txmsg_apply); txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply); - else + } else { + txmsg_push_total = txmsg_end_push * cnt; txmsg_pop_total = txmsg_pop * cnt; + } + total_bytes += txmsg_push_total; total_bytes -= txmsg_pop_total; + if (data) { + msg_verify_date_prep(); + push = verify_push_len; + } err = clock_gettime(CLOCK_MONOTONIC, &s->start); if (err < 0) perror("recv start time"); @@ -693,10 +784,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (data) { int chunk_sz = opt->sendpage ? - iov_length * cnt : + iov_length : iov_length * iov_count; - errno = msg_verify_data(&msg, recv, chunk_sz); + errno = msg_verify_data(&msg, recv, chunk_sz, &k, &bytes_cnt, + &check_cnt, &push); if (errno) { perror("data verify msg failed"); goto out_errno; @@ -704,7 +796,11 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt, if (recvp) { errno = msg_verify_data(&msg_peek, recvp, - chunk_sz); + chunk_sz, + &k, + &bytes_cnt, + &check_cnt, + &push); if (errno) { perror("data verify msg_peek failed"); goto out_errno; @@ -786,8 +882,6 @@ static int sendmsg_test(struct sockmap_options *opt) rxpid = fork(); if (rxpid == 0) { - if (txmsg_pop || txmsg_start_pop) - iov_buf -= (txmsg_pop - txmsg_start_pop + 1); if (opt->drop_expected || txmsg_ktls_skb_drop) _exit(0); @@ -812,7 +906,7 @@ static int sendmsg_test(struct sockmap_options *opt) s.bytes_sent, sent_Bps, sent_Bps/giga, s.bytes_recvd, recvd_Bps, recvd_Bps/giga, peek_flag ? "(peek_msg)" : ""); - if (err && txmsg_cork) + if (err && err != -EDATAINTEGRITY && txmsg_cork) err = 0; exit(err ? 1 : 0); } else if (rxpid == -1) { @@ -1456,8 +1550,8 @@ static void test_send_many(struct sockmap_options *opt, int cgrp) static void test_send_large(struct sockmap_options *opt, int cgrp) { - opt->iov_length = 256; - opt->iov_count = 1024; + opt->iov_length = 8192; + opt->iov_count = 32; opt->rate = 2; test_exec(cgrp, opt); } @@ -1586,17 +1680,19 @@ static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt) static void test_txmsg_pull(int cgrp, struct sockmap_options *opt) { /* Test basic start/end */ + txmsg_pass = 1; txmsg_start = 1; txmsg_end = 2; test_send(opt, cgrp); /* Test >4k pull */ + txmsg_pass = 1; txmsg_start = 4096; txmsg_end = 9182; test_send_large(opt, cgrp); /* Test pull + redirect */ - txmsg_redir = 0; + txmsg_redir = 1; txmsg_start = 1; txmsg_end = 2; test_send(opt, cgrp); @@ -1618,12 +1714,16 @@ static void test_txmsg_pull(int cgrp, struct sockmap_options *opt) static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) { + bool data = opt->data_test; + /* Test basic pop */ + txmsg_pass = 1; txmsg_start_pop = 1; txmsg_pop = 2; test_send_many(opt, cgrp); /* Test pop with >4k */ + txmsg_pass = 1; txmsg_start_pop = 4096; txmsg_pop = 4096; test_send_large(opt, cgrp); @@ -1634,6 +1734,12 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) txmsg_pop = 2; test_send_many(opt, cgrp); + /* TODO: Test for pop + cork should be different, + * - It makes the layout of the received data difficult + * - It makes it hard to calculate the total_bytes in the recvmsg + * Temporarily skip the data integrity test for this case now. + */ + opt->data_test = false; /* Test pop + cork */ txmsg_redir = 0; txmsg_cork = 512; @@ -1647,16 +1753,21 @@ static void test_txmsg_pop(int cgrp, struct sockmap_options *opt) txmsg_start_pop = 1; txmsg_pop = 2; test_send_many(opt, cgrp); + opt->data_test = data; } static void test_txmsg_push(int cgrp, struct sockmap_options *opt) { + bool data = opt->data_test; + /* Test basic push */ + txmsg_pass = 1; txmsg_start_push = 1; txmsg_end_push = 1; test_send(opt, cgrp); /* Test push 4kB >4k */ + txmsg_pass = 1; txmsg_start_push = 4096; txmsg_end_push = 4096; test_send_large(opt, cgrp); @@ -1667,21 +1778,66 @@ static void test_txmsg_push(int cgrp, struct sockmap_options *opt) txmsg_end_push = 2; test_send_many(opt, cgrp); + /* TODO: Test for push + cork should be different, + * - It makes the layout of the received data difficult + * - It makes it hard to calculate the total_bytes in the recvmsg + * Temporarily skip the data integrity test for this case now. + */ + opt->data_test = false; /* Test push + cork */ txmsg_redir = 0; txmsg_cork = 512; txmsg_start_push = 1; txmsg_end_push = 2; test_send_many(opt, cgrp); + opt->data_test = data; } static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt) { + /* Test push/pop range overlapping */ + txmsg_pass = 1; txmsg_start_push = 1; txmsg_end_push = 10; txmsg_start_pop = 5; txmsg_pop = 4; test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 1; + txmsg_end_push = 10; + txmsg_start_pop = 5; + txmsg_pop = 16; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 5; + txmsg_end_push = 4; + txmsg_start_pop = 1; + txmsg_pop = 10; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 5; + txmsg_end_push = 16; + txmsg_start_pop = 1; + txmsg_pop = 10; + test_send_large(opt, cgrp); + + /* Test push/pop range non-overlapping */ + txmsg_pass = 1; + txmsg_start_push = 1; + txmsg_end_push = 10; + txmsg_start_pop = 16; + txmsg_pop = 4; + test_send_large(opt, cgrp); + + txmsg_pass = 1; + txmsg_start_push = 16; + txmsg_end_push = 10; + txmsg_start_pop = 5; + txmsg_pop = 4; + test_send_large(opt, cgrp); } static void test_txmsg_apply(int cgrp, struct sockmap_options *opt) diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh deleted file mode 100755 index b42c24282c25..000000000000 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh +++ /dev/null @@ -1,85 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Copyright (c) 2018 Facebook -# Copyright (c) 2019 Cloudflare - -set -eu -readonly NS1="ns1-$(mktemp -u XXXXXX)" - -wait_for_ip() -{ - local _i - printf "Wait for IP %s to become available " "$1" - for _i in $(seq ${MAX_PING_TRIES}); do - printf "." - if ns1_exec ping -c 1 -W 1 "$1" >/dev/null 2>&1; then - echo " OK" - return - fi - sleep 1 - done - echo 1>&2 "ERROR: Timeout waiting for test IP to become available." - exit 1 -} - -get_prog_id() -{ - awk '/ id / {sub(/.* id /, "", $0); print($1)}' -} - -ns1_exec() -{ - ip netns exec ${NS1} "$@" -} - -setup() -{ - ip netns add ${NS1} - ns1_exec ip link set lo up - - ns1_exec sysctl -w net.ipv4.tcp_syncookies=2 - ns1_exec sysctl -w net.ipv4.tcp_window_scaling=0 - ns1_exec sysctl -w net.ipv4.tcp_timestamps=0 - ns1_exec sysctl -w net.ipv4.tcp_sack=0 - - wait_for_ip 127.0.0.1 - wait_for_ip ::1 -} - -cleanup() -{ - ip netns del ns1 2>/dev/null || : -} - -main() -{ - trap cleanup EXIT 2 3 6 15 - setup - - printf "Testing clsact..." - ns1_exec tc qdisc add dev "${TEST_IF}" clsact - ns1_exec tc filter add dev "${TEST_IF}" ingress \ - bpf obj "${BPF_PROG_OBJ}" sec "${CLSACT_SECTION}" da - - BPF_PROG_ID=$(ns1_exec tc filter show dev "${TEST_IF}" ingress | \ - get_prog_id) - ns1_exec "${PROG}" "${BPF_PROG_ID}" - ns1_exec tc qdisc del dev "${TEST_IF}" clsact - - printf "Testing XDP..." - ns1_exec ip link set "${TEST_IF}" xdp \ - object "${BPF_PROG_OBJ}" section "${XDP_SECTION}" - BPF_PROG_ID=$(ns1_exec ip link show "${TEST_IF}" | get_prog_id) - ns1_exec "${PROG}" "${BPF_PROG_ID}" -} - -DIR=$(dirname $0) -TEST_IF=lo -MAX_PING_TRIES=5 -BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.bpf.o" -CLSACT_SECTION="tc" -XDP_SECTION="xdp" -BPF_PROG_ID=0 -PROG="${DIR}/test_tcp_check_syncookie_user" - -main diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c deleted file mode 100644 index 3844f9b8232a..000000000000 --- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c +++ /dev/null @@ -1,213 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2018 Facebook -// Copyright (c) 2019 Cloudflare - -#include <limits.h> -#include <string.h> -#include <stdlib.h> -#include <unistd.h> - -#include <arpa/inet.h> -#include <netinet/in.h> -#include <sys/types.h> -#include <sys/socket.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "network_helpers.h" - -static int get_map_fd_by_prog_id(int prog_id, bool *xdp) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 map_ids[1]; - int prog_fd = -1; - int map_fd = -1; - - prog_fd = bpf_prog_get_fd_by_id(prog_id); - if (prog_fd < 0) { - log_err("Failed to get fd by prog id %d", prog_id); - goto err; - } - - info.nr_map_ids = 1; - info.map_ids = (__u64)(unsigned long)map_ids; - - if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) { - log_err("Failed to get info by prog fd %d", prog_fd); - goto err; - } - - if (!info.nr_map_ids) { - log_err("No maps found for prog fd %d", prog_fd); - goto err; - } - - *xdp = info.type == BPF_PROG_TYPE_XDP; - - map_fd = bpf_map_get_fd_by_id(map_ids[0]); - if (map_fd < 0) - log_err("Failed to get fd by map id %d", map_ids[0]); -err: - if (prog_fd >= 0) - close(prog_fd); - return map_fd; -} - -static int run_test(int server_fd, int results_fd, bool xdp) -{ - int client = -1, srv_client = -1; - int ret = 0; - __u32 key = 0; - __u32 key_gen = 1; - __u32 key_mss = 2; - __u32 value = 0; - __u32 value_gen = 0; - __u32 value_mss = 0; - - if (bpf_map_update_elem(results_fd, &key, &value, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - if (bpf_map_update_elem(results_fd, &key_gen, &value_gen, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - if (bpf_map_update_elem(results_fd, &key_mss, &value_mss, 0) < 0) { - log_err("Can't clear results"); - goto err; - } - - client = connect_to_fd(server_fd, 0); - if (client == -1) - goto err; - - srv_client = accept(server_fd, NULL, 0); - if (srv_client == -1) { - log_err("Can't accept connection"); - goto err; - } - - if (bpf_map_lookup_elem(results_fd, &key, &value) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (value == 0) { - log_err("Didn't match syncookie: %u", value); - goto err; - } - - if (bpf_map_lookup_elem(results_fd, &key_gen, &value_gen) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (xdp && value_gen == 0) { - // SYN packets do not get passed through generic XDP, skip the - // rest of the test. - printf("Skipping XDP cookie check\n"); - goto out; - } - - if (bpf_map_lookup_elem(results_fd, &key_mss, &value_mss) < 0) { - log_err("Can't lookup result"); - goto err; - } - - if (value != value_gen) { - log_err("BPF generated cookie does not match kernel one"); - goto err; - } - - if (value_mss < 536 || value_mss > USHRT_MAX) { - log_err("Unexpected MSS retrieved"); - goto err; - } - - goto out; - -err: - ret = 1; -out: - close(client); - close(srv_client); - return ret; -} - -static int v6only_true(int fd, void *opts) -{ - int mode = true; - - return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); -} - -static int v6only_false(int fd, void *opts) -{ - int mode = false; - - return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode)); -} - -int main(int argc, char **argv) -{ - struct network_helper_opts opts = { 0 }; - int server = -1; - int server_v6 = -1; - int server_dual = -1; - int results = -1; - int err = 0; - bool xdp; - - if (argc < 2) { - fprintf(stderr, "Usage: %s prog_id\n", argv[0]); - exit(1); - } - - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - - results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp); - if (results < 0) { - log_err("Can't get map"); - goto err; - } - - server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL); - if (server == -1) - goto err; - - opts.post_socket_cb = v6only_true; - server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts); - if (server_v6 == -1) - goto err; - - opts.post_socket_cb = v6only_false; - server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts); - if (server_dual == -1) - goto err; - - if (run_test(server, results, xdp)) - goto err; - - if (run_test(server_v6, results, xdp)) - goto err; - - if (run_test(server_dual, results, xdp)) - goto err; - - printf("ok\n"); - goto out; -err: - err = 1; -out: - close(server); - close(server_v6); - close(server_dual); - close(results); - return err; -} diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 610392dfc4fb..447b68509d76 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -42,10 +42,6 @@ #include "../../../include/linux/filter.h" #include "testing_helpers.h" -#ifndef ENOTSUPP -#define ENOTSUPP 524 -#endif - #define MAX_INSNS BPF_MAXINSNS #define MAX_EXPECTED_INSNS 32 #define MAX_UNEXPECTED_INSNS 32 diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index d3c3c3a24150..5e9f16683be5 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -367,7 +367,7 @@ int delete_module(const char *name, int flags) return syscall(__NR_delete_module, name, flags); } -int unload_bpf_testmod(bool verbose) +int unload_module(const char *name, bool verbose) { int ret, cnt = 0; @@ -375,11 +375,11 @@ int unload_bpf_testmod(bool verbose) fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n"); for (;;) { - ret = delete_module("bpf_testmod", 0); + ret = delete_module(name, 0); if (!ret || errno != EAGAIN) break; if (++cnt > 10000) { - fprintf(stdout, "Unload of bpf_testmod timed out\n"); + fprintf(stdout, "Unload of %s timed out\n", name); break; } usleep(100); @@ -388,41 +388,51 @@ int unload_bpf_testmod(bool verbose) if (ret) { if (errno == ENOENT) { if (verbose) - fprintf(stdout, "bpf_testmod.ko is already unloaded.\n"); + fprintf(stdout, "%s.ko is already unloaded.\n", name); return -1; } - fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno); + fprintf(stdout, "Failed to unload %s.ko from kernel: %d\n", name, -errno); return -1; } if (verbose) - fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n"); + fprintf(stdout, "Successfully unloaded %s.ko.\n", name); return 0; } -int load_bpf_testmod(bool verbose) +int load_module(const char *path, bool verbose) { int fd; if (verbose) - fprintf(stdout, "Loading bpf_testmod.ko...\n"); + fprintf(stdout, "Loading %s...\n", path); - fd = open("bpf_testmod.ko", O_RDONLY); + fd = open(path, O_RDONLY); if (fd < 0) { - fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno); + fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno); return -ENOENT; } if (finit_module(fd, "", 0)) { - fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno); + fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno); close(fd); return -EINVAL; } close(fd); if (verbose) - fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n"); + fprintf(stdout, "Successfully loaded %s.\n", path); return 0; } +int unload_bpf_testmod(bool verbose) +{ + return unload_module("bpf_testmod", verbose); +} + +int load_bpf_testmod(bool verbose) +{ + return load_module("bpf_testmod.ko", verbose); +} + /* * Trigger synchronize_rcu() in kernel. */ diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index d55f6ab12433..46d7f7089f63 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -38,6 +38,8 @@ int unload_bpf_testmod(bool verbose); int kern_sync_rcu(void); int finit_module(int fd, const char *param_values, int flags); int delete_module(const char *name, int flags); +int load_module(const char *path, bool verbose); +int unload_module(const char *name, bool verbose); static inline __u64 get_time_ns(void) { diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c index c7828b13e5ff..dd38dc68f635 100644 --- a/tools/testing/selftests/bpf/uprobe_multi.c +++ b/tools/testing/selftests/bpf/uprobe_multi.c @@ -12,6 +12,10 @@ #define MADV_POPULATE_READ 22 #endif +#ifndef MADV_PAGEOUT +#define MADV_PAGEOUT 21 +#endif + int __attribute__((weak)) uprobe(void) { return 0; diff --git a/tools/testing/selftests/bpf/uptr_test_common.h b/tools/testing/selftests/bpf/uptr_test_common.h new file mode 100644 index 000000000000..f8a134ba12f9 --- /dev/null +++ b/tools/testing/selftests/bpf/uptr_test_common.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#ifndef _UPTR_TEST_COMMON_H +#define _UPTR_TEST_COMMON_H + +#define MAGIC_VALUE 0xabcd1234 +#define PAGE_SIZE 4096 + +#ifdef __BPF__ +/* Avoid fwd btf type being generated for the following struct */ +struct large_data *dummy_large_data; +struct empty_data *dummy_empty_data; +struct user_data *dummy_data; +struct cgroup *dummy_cgrp; +#else +#define __uptr +#define __kptr +#endif + +struct user_data { + int a; + int b; + int result; + int nested_result; +}; + +struct nested_udata { + struct user_data __uptr *udata; +}; + +struct value_type { + struct user_data __uptr *udata; + struct cgroup __kptr *cgrp; + struct nested_udata nested; +}; + +struct value_lock_type { + struct user_data __uptr *udata; + struct bpf_spin_lock lock; +}; + +struct large_data { + __u8 one_page[PAGE_SIZE]; + int a; +}; + +struct large_uptr { + struct large_data __uptr *udata; +}; + +struct empty_data { +}; + +struct empty_uptr { + struct empty_data __uptr *udata; +}; + +struct kstruct_uptr { + struct cgroup __uptr *cgrp; +}; + +#endif diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1ec5c4c47235..e12ef953fba8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -16,6 +16,7 @@ #include <sys/stat.h> #include <bpf/libbpf.h> #include <bpf/btf.h> +#include <bpf/bpf.h> #include <libelf.h> #include <gelf.h> #include <float.h> @@ -179,6 +180,7 @@ static struct env { int files_skipped; int progs_processed; int progs_skipped; + int top_src_lines; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -228,6 +230,7 @@ static const struct argp_option opts[] = { "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" }, { "test-reg-invariants", 'r', NULL, 0, "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" }, + { "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" }, {}, }; @@ -327,6 +330,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) return err; } break; + case 'S': + errno = 0; + env.top_src_lines = strtol(arg, NULL, 10); + if (errno) { + fprintf(stderr, "invalid top lines N specifier: %s\n", arg); + argp_usage(state); + } + break; case ARGP_KEY_ARG: tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames)); if (!tmp) @@ -854,6 +865,118 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats * return 0; } +struct line_cnt { + char *line; + int cnt; +}; + +static int str_cmp(const void *a, const void *b) +{ + const char **str1 = (const char **)a; + const char **str2 = (const char **)b; + + return strcmp(*str1, *str2); +} + +static int line_cnt_cmp(const void *a, const void *b) +{ + const struct line_cnt *a_cnt = (const struct line_cnt *)a; + const struct line_cnt *b_cnt = (const struct line_cnt *)b; + + if (a_cnt->cnt != b_cnt->cnt) + return a_cnt->cnt < b_cnt->cnt ? -1 : 1; + return strcmp(a_cnt->line, b_cnt->line); +} + +static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name) +{ + int lines_cap = 0; + int lines_size = 0; + char **lines = NULL; + char *line = NULL; + char *state; + struct line_cnt *freq = NULL; + struct line_cnt *cur; + int unique_lines; + int err = 0; + int i; + + while ((line = strtok_r(line ? NULL : buf, "\n", &state))) { + if (strncmp(line, "; ", 2) != 0) + continue; + line += 2; + + if (lines_size == lines_cap) { + char **tmp; + + lines_cap = max(16, lines_cap * 2); + tmp = realloc(lines, lines_cap * sizeof(*tmp)); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + lines = tmp; + } + lines[lines_size] = line; + lines_size++; + } + + if (lines_size == 0) + goto cleanup; + + qsort(lines, lines_size, sizeof(*lines), str_cmp); + + freq = calloc(lines_size, sizeof(*freq)); + if (!freq) { + err = -ENOMEM; + goto cleanup; + } + + cur = freq; + cur->line = lines[0]; + cur->cnt = 1; + for (i = 1; i < lines_size; ++i) { + if (strcmp(lines[i], cur->line) != 0) { + cur++; + cur->line = lines[i]; + cur->cnt = 0; + } + cur->cnt++; + } + unique_lines = cur - freq + 1; + + qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp); + + printf("Top source lines (%s):\n", prog_name); + for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) { + const char *src_code = freq[i].line; + const char *src_line = NULL; + char *split = strrchr(freq[i].line, '@'); + + if (split) { + src_line = split + 1; + + while (*src_line && isspace(*src_line)) + src_line++; + + while (split > src_code && isspace(*split)) + split--; + *split = '\0'; + } + + if (src_line) + printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code); + else + printf("%5d: %s\n", freq[i].cnt, src_code); + } + printf("\n"); + +cleanup: + free(freq); + free(lines); + return err; +} + static int guess_prog_type_by_ctx_name(const char *ctx_name, enum bpf_prog_type *prog_type, enum bpf_attach_type *attach_type) @@ -987,6 +1110,35 @@ skip_freplace_fixup: return; } +static int max_verifier_log_size(void) +{ + const int SMALL_LOG_SIZE = UINT_MAX >> 8; + const int BIG_LOG_SIZE = UINT_MAX >> 2; + struct bpf_insn insns[] = { + { .code = BPF_ALU | BPF_MOV | BPF_X, .dst_reg = BPF_REG_0, }, + { .code = BPF_JMP | BPF_EXIT, }, + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .log_size = BIG_LOG_SIZE, + .log_buf = (void *)-1, + .log_level = 4 + ); + int ret, insn_cnt = ARRAY_SIZE(insns); + static int log_size; + + if (log_size != 0) + return log_size; + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + + if (ret == -EFAULT) + log_size = BIG_LOG_SIZE; + else /* ret == -EINVAL, big log size is not supported by the verifier */ + log_size = SMALL_LOG_SIZE; + + return log_size; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); @@ -1009,13 +1161,16 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats = &env.prog_stats[env.prog_stat_cnt++]; memset(stats, 0, sizeof(*stats)); - if (env.verbose) { - buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024; + if (env.verbose || env.top_src_lines > 0) { + buf_sz = env.log_size ? env.log_size : max_verifier_log_size(); buf = malloc(buf_sz); if (!buf) return -ENOMEM; /* ensure we always request stats */ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0); + /* --top-src-lines needs verifier log */ + if (env.top_src_lines > 0 && env.log_level == 0) + log_level |= 2; } else { buf = verif_log_buf; buf_sz = sizeof(verif_log_buf); @@ -1048,6 +1203,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf filename, prog_name, stats->stats[DURATION], err ? "failure" : "success", buf); } + if (env.top_src_lines > 0) + print_top_src_lines(buf, buf_sz, stats->prog_name); if (verif_log_buf != buf) free(buf); diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg index 1a385061618d..e661ffdcaadf 100644 --- a/tools/testing/selftests/bpf/veristat.cfg +++ b/tools/testing/selftests/bpf/veristat.cfg @@ -15,3 +15,4 @@ test_usdt* test_verif_scale* test_xdp_noinline* xdp_synproxy* +verifier_search_pruning* diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index dfec31fb9b30..8d275f03e977 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -152,7 +152,10 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); - if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem")) + system("(echo mem > /sys/power/state) 2> /dev/null"); + + timerfd_gettime(timerfd, &spec); + if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c index dad2ed82f3ef..a2b50af8e9ee 100644 --- a/tools/testing/selftests/cgroup/test_cpu.c +++ b/tools/testing/selftests/cgroup/test_cpu.c @@ -8,6 +8,7 @@ #include <pthread.h> #include <stdio.h> #include <time.h> +#include <unistd.h> #include "../kselftest.h" #include "cgroup_util.h" @@ -229,6 +230,79 @@ cleanup: return ret; } +/* + * Creates a nice process that consumes CPU and checks that the elapsed + * usertime in the cgroup is close to the expected time. + */ +static int test_cpucg_nice(const char *root) +{ + int ret = KSFT_FAIL; + int status; + long user_usec, nice_usec; + long usage_seconds = 2; + long expected_nice_usec = usage_seconds * USEC_PER_SEC; + char *cpucg; + pid_t pid; + + cpucg = cg_name(root, "cpucg_test"); + if (!cpucg) + goto cleanup; + + if (cg_create(cpucg)) + goto cleanup; + + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); + if (nice_usec == -1) + ret = KSFT_SKIP; + if (user_usec != 0 || nice_usec != 0) + goto cleanup; + + /* + * We fork here to create a new process that can be niced without + * polluting the nice value of other selftests + */ + pid = fork(); + if (pid < 0) { + goto cleanup; + } else if (pid == 0) { + struct cpu_hog_func_param param = { + .nprocs = 1, + .ts = { + .tv_sec = usage_seconds, + .tv_nsec = 0, + }, + .clock_type = CPU_HOG_CLOCK_PROCESS, + }; + char buf[64]; + snprintf(buf, sizeof(buf), "%d", getpid()); + if (cg_write(cpucg, "cgroup.procs", buf)) + goto cleanup; + + /* Try to keep niced CPU usage as constrained to hog_cpu as possible */ + nice(1); + hog_cpus_timed(cpucg, ¶m); + exit(0); + } else { + waitpid(pid, &status, 0); + if (!WIFEXITED(status)) + goto cleanup; + + user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec"); + nice_usec = cg_read_key_long(cpucg, "cpu.stat", "nice_usec"); + if (!values_close(nice_usec, expected_nice_usec, 1)) + goto cleanup; + + ret = KSFT_PASS; + } + +cleanup: + cg_destroy(cpucg); + free(cpucg); + + return ret; +} + static int run_cpucg_weight_test( const char *root, @@ -686,6 +760,7 @@ struct cpucg_test { } tests[] = { T(test_cpucg_subtree_control), T(test_cpucg_stats), + T(test_cpucg_nice), T(test_cpucg_weight_overprovisioned), T(test_cpucg_weight_underprovisioned), T(test_cpucg_nested_weight_overprovisioned), diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 31b56d625655..3c196fa86c99 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -27,8 +27,6 @@ #include "../kselftest_harness.h" #include "clone3_selftests.h" -#define MAX_PID_NS_LEVEL 32 - static void child_exit(int ret) { fflush(stdout); diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore index 6e6712ce5817..7999361992aa 100644 --- a/tools/testing/selftests/core/.gitignore +++ b/tools/testing/selftests/core/.gitignore @@ -1 +1,2 @@ close_range_test +unshare_test diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh index aa995516870b..54d45791b0d9 100644 --- a/tools/testing/selftests/damon/_debugfs_common.sh +++ b/tools/testing/selftests/damon/_debugfs_common.sh @@ -8,7 +8,12 @@ test_write_result() { expect_reason=$4 expected=$5 - echo "$content" > "$file" + if [ "$expected" = "0" ] + then + echo "$content" > "$file" + else + echo "$content" > "$file" 2> /dev/null + fi if [ $? -ne "$expected" ] then echo "writing $content to $file doesn't return $expected" diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c index 3be121487432..a9f4e9aaf3a9 100644 --- a/tools/testing/selftests/damon/access_memory_even.c +++ b/tools/testing/selftests/damon/access_memory_even.c @@ -14,10 +14,8 @@ int main(int argc, char *argv[]) { char **regions; - clock_t start_clock; int nr_regions; int sz_region; - int access_time_ms; int i; if (argc != 3) { diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh index 4a76e37ef16b..bd6c22d96ead 100755 --- a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh +++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh @@ -12,7 +12,7 @@ then exit 1 fi -if echo foo > "$DBGFS/mk_contexts" +if echo foo > "$DBGFS/mk_contexts" 2> /dev/null then echo "duplicate context creation success" exit 1 diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c index a6fe0689f88d..53e69a669668 100644 --- a/tools/testing/selftests/damon/huge_count_read_write.c +++ b/tools/testing/selftests/damon/huge_count_read_write.c @@ -18,7 +18,7 @@ void write_read_with_huge_count(char *file) { int filedesc = open(file, O_RDWR); - char buf[25]; + char buf[256]; int ret; printf("%s %s\n", __func__, file); @@ -28,9 +28,7 @@ void write_read_with_huge_count(char *file) } write(filedesc, "", 0xfffffffful); - perror("after write: "); ret = read(filedesc, buf, 0xfffffffful); - perror("after read: "); close(filedesc); } diff --git a/tools/testing/selftests/devices/probe/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py index d94a74b8a054..d7a2bb91c807 100755 --- a/tools/testing/selftests/devices/probe/test_discoverable_devices.py +++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py @@ -45,7 +45,7 @@ def find_pci_controller_dirs(): def find_usb_controller_dirs(): - usb_controller_sysfs_dir = "usb[\d]+" + usb_controller_sysfs_dir = r"usb[\d]+" dir_regex = re.compile(usb_controller_sysfs_dir) for d in os.scandir(sysfs_usb_devices): @@ -91,7 +91,7 @@ def get_acpi_uid(sysfs_dev_dir): def get_usb_version(sysfs_dev_dir): - re_usb_version = re.compile("PRODUCT=.*/(\d)/.*") + re_usb_version = re.compile(r"PRODUCT=.*/(\d)/.*") with open(os.path.join(sysfs_dev_dir, "uevent")) as f: return int(re_usb_version.search(f.read()).group(1)) diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 39fb97a8c1df..0fec8f9801ad 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -9,6 +9,7 @@ TEST_PROGS := \ ping.py \ queues.py \ stats.py \ + shaper.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index 41d0859feb7d..edc56e2cc606 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -11,6 +11,8 @@ ALL_TESTS=" lib_dir=$(dirname "$0") source ${lib_dir}/bond_topo_3d1c.sh +c_maddr="33:33:00:00:00:10" +g_maddr="33:33:00:00:02:54" skip_prio() { @@ -240,6 +242,54 @@ arp_validate_test() done } +# Testing correct multicast groups are added to slaves for ns targets +arp_validate_mcast() +{ + RET=0 + local arp_valid=$(cmd_jq "ip -n ${s_ns} -j -d link show bond0" ".[].linkinfo.info_data.arp_validate") + local active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done + + # Do failover + ip -n ${s_ns} link set ${active_slave} down + # wait for active link change + slowwait 2 active_slave_changed $active_slave + active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + + for i in $(seq 0 2); do + maddr_list=$(ip -n ${s_ns} maddr show dev eth${i}) + + # arp_valid == 0 or active_slave should not join any maddrs + if { [ "$arp_valid" == "null" ] || [ "eth${i}" == ${active_slave} ]; } && \ + echo "$maddr_list" | grep -qE "${c_maddr}|${g_maddr}"; then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + # arp_valid != 0 and backup_slave should join both maddrs + elif [ "$arp_valid" != "null" ] && [ "eth${i}" != ${active_slave} ] && \ + ( ! echo "$maddr_list" | grep -q "${c_maddr}" || \ + ! echo "$maddr_list" | grep -q "${m_maddr}"); then + RET=1 + check_err 1 "arp_valid $arp_valid active_slave $active_slave, eth$i has mcast group" + fi + done +} + arp_validate_arp() { local mode=$1 @@ -261,8 +311,10 @@ arp_validate_ns() fi for val in $(seq 0 6); do - arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val" + arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6},${c_ip6} arp_validate $val" log_test "arp_validate" "$mode ns_ip6_target arp_validate $val" + arp_validate_mcast + log_test "arp_validate" "join mcast group" done } diff --git a/tools/testing/selftests/drivers/net/hw/.gitignore b/tools/testing/selftests/drivers/net/hw/.gitignore new file mode 100644 index 000000000000..e9fe6ede681a --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/.gitignore @@ -0,0 +1 @@ +ncdevmem diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index c9f2f48fc30f..21ba64ce1e34 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -3,6 +3,7 @@ TEST_PROGS = \ csum.py \ devlink_port_split.py \ + devmem.py \ ethtool.sh \ ethtool_extended_state.sh \ ethtool_mm.sh \ @@ -10,6 +11,8 @@ TEST_PROGS = \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ loopback.sh \ + nic_link_layer.py \ + nic_performance.py \ pp_alloc_fail.py \ rss_ctx.py \ # @@ -26,4 +29,12 @@ TEST_INCLUDES := \ ../../../net/forwarding/tc_common.sh \ # +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := ncdevmem +TEST_GEN_FILES += $(YNL_GEN_FILES) + include ../../../lib.mk + +# YNL build +YNL_GENS := ethtool netdev +include ../../../net/ynl.mk diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py new file mode 100755 index 000000000000..1223f0f5c10c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit +from lib.py import ksft_eq, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ksft_disruptive + + +def require_devmem(cfg): + if not hasattr(cfg, "_devmem_probed"): + port = rand_port() + probe_command = f"./ncdevmem -f {cfg.ifname}" + cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg._devmem_probed = True + + if not cfg._devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +@ksft_disruptive +def check_rx(cfg) -> None: + cfg.require_v6() + require_devmem(cfg) + + port = rand_port() + listen_cmd = f"./ncdevmem -l -f {cfg.ifname} -s {cfg.v6} -p {port}" + + with bkg(listen_cmd) as socat: + wait_port_listen(port) + cmd(f"echo -e \"hello\\nworld\"| socat -u - TCP6:[{cfg.v6}]:{port}", host=cfg.remote, shell=True) + + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def main() -> None: + with NetDrvEpEnv(__file__) as cfg: + ksft_run([check_rx], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b582885786f5..399789a9676a 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -9,6 +9,7 @@ try: sys.path.append(KSFT_DIR.as_posix()) from net.lib.py import * from drivers.net.lib.py import * + from .linkconfig import LinkConfig except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py new file mode 100644 index 000000000000..db84000fc75b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/linkconfig.py @@ -0,0 +1,222 @@ +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import cmd, ethtool, ip +from lib.py import ksft_pr, ksft_eq, KsftSkipEx +from typing import Optional +import re +import time +import json + +#The LinkConfig class is implemented to handle the link layer configurations. +#Required minimum ethtool version is 6.10 + +class LinkConfig: + """Class for handling the link layer configurations""" + def __init__(self, cfg: object) -> None: + self.cfg = cfg + self.partner_netif = self.get_partner_netif_name() + + """Get the initial link configuration of local interface""" + self.common_link_modes = self.get_common_link_modes() + + def get_partner_netif_name(self) -> Optional[str]: + partner_netif = None + try: + if not self.verify_link_up(): + return None + """Get partner interface name""" + partner_json_output = ip("addr show", json=True, host=self.cfg.remote) + for interface in partner_json_output: + for addr in interface.get('addr_info', []): + if addr.get('local') == self.cfg.remote_addr: + partner_netif = interface['ifname'] + ksft_pr(f"Partner Interface name: {partner_netif}") + if partner_netif is None: + ksft_pr("Unable to get the partner interface name") + except Exception as e: + print(f"Unexpected error occurred while getting partner interface name: {e}") + self.partner_netif = partner_netif + return partner_netif + + def verify_link_up(self) -> bool: + """Verify whether the local interface link is up""" + with open(f"/sys/class/net/{self.cfg.ifname}/operstate", "r") as fp: + link_state = fp.read().strip() + + if link_state == "down": + ksft_pr(f"Link state of interface {self.cfg.ifname} is DOWN") + return False + else: + return True + + def reset_interface(self, local: bool = True, remote: bool = True) -> bool: + ksft_pr("Resetting interfaces in local and remote") + if remote: + if self.verify_link_up(): + if self.partner_netif is not None: + ifname = self.partner_netif + link_up_cmd = f"ip link set up {ifname}" + link_down_cmd = f"ip link set down {ifname}" + reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" + try: + cmd(reset_cmd, host=self.cfg.remote) + except Exception as e: + ksft_pr(f"Unexpected error occurred while resetting remote: {e}") + else: + ksft_pr("Partner interface not available") + if local: + ifname = self.cfg.ifname + link_up_cmd = f"ip link set up {ifname}" + link_down_cmd = f"ip link set down {ifname}" + reset_cmd = f"{link_down_cmd} && sleep 5 && {link_up_cmd}" + try: + cmd(reset_cmd) + except Exception as e: + ksft_pr(f"Unexpected error occurred while resetting local: {e}") + time.sleep(10) + if self.verify_link_up() and self.get_ethtool_field("link-detected"): + ksft_pr("Local and remote interfaces reset to original state") + return True + else: + ksft_pr("Error occurred after resetting interfaces. Link is DOWN.") + return False + + def set_speed_and_duplex(self, speed: str, duplex: str, autoneg: bool = True) -> bool: + """Set the speed and duplex state for the interface""" + autoneg_state = "on" if autoneg is True else "off" + process = None + try: + process = ethtool(f"--change {self.cfg.ifname} speed {speed} duplex {duplex} autoneg {autoneg_state}") + except Exception as e: + ksft_pr(f"Unexpected error occurred while setting speed/duplex: {e}") + if process is None or process.ret != 0: + return False + else: + ksft_pr(f"Speed: {speed} Mbps, Duplex: {duplex} set for Interface: {self.cfg.ifname}") + return True + + def verify_speed_and_duplex(self, expected_speed: str, expected_duplex: str) -> bool: + if not self.verify_link_up(): + return False + """Verifying the speed and duplex state for the interface""" + with open(f"/sys/class/net/{self.cfg.ifname}/speed", "r") as fp: + actual_speed = fp.read().strip() + with open(f"/sys/class/net/{self.cfg.ifname}/duplex", "r") as fp: + actual_duplex = fp.read().strip() + + ksft_eq(actual_speed, expected_speed) + ksft_eq(actual_duplex, expected_duplex) + return True + + def set_autonegotiation_state(self, state: str, remote: bool = False) -> bool: + common_link_modes = self.common_link_modes + speeds, duplex_modes = self.get_speed_duplex_values(self.common_link_modes) + speed = speeds[0] + duplex = duplex_modes[0] + if not speed or not duplex: + ksft_pr("No speed or duplex modes found") + return False + + speed_duplex_cmd = f"speed {speed} duplex {duplex}" if state == "off" else "" + if remote: + if not self.verify_link_up(): + return False + """Set the autonegotiation state for the partner""" + command = f"-s {self.partner_netif} {speed_duplex_cmd} autoneg {state}" + partner_autoneg_change = None + """Set autonegotiation state for interface in remote pc""" + try: + partner_autoneg_change = ethtool(command, host=self.cfg.remote) + except Exception as e: + ksft_pr(f"Unexpected error occurred while changing auto-neg in remote: {e}") + if partner_autoneg_change is None or partner_autoneg_change.ret != 0: + ksft_pr(f"Not able to set autoneg parameter for interface {self.partner_netif}.") + return False + ksft_pr(f"Autoneg set as {state} for {self.partner_netif}") + else: + """Set the autonegotiation state for the interface""" + try: + process = ethtool(f"-s {self.cfg.ifname} {speed_duplex_cmd} autoneg {state}") + if process.ret != 0: + ksft_pr(f"Not able to set autoneg parameter for interface {self.cfg.ifname}") + return False + except Exception as e: + ksft_pr(f"Unexpected error occurred while changing auto-neg in local: {e}") + return False + ksft_pr(f"Autoneg set as {state} for {self.cfg.ifname}") + return True + + def check_autoneg_supported(self, remote: bool = False) -> bool: + if not remote: + local_autoneg = self.get_ethtool_field("supports-auto-negotiation") + if local_autoneg is None: + ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.cfg.ifname}") + """Return autoneg status of the local interface""" + return local_autoneg + else: + if not self.verify_link_up(): + raise KsftSkipEx("Link is DOWN") + """Check remote auto-negotiation support status""" + partner_autoneg = False + if self.partner_netif is not None: + partner_autoneg = self.get_ethtool_field("supports-auto-negotiation", remote=True) + if partner_autoneg is None: + ksft_pr(f"Unable to fetch auto-negotiation status for interface {self.partner_netif}") + return partner_autoneg + + def get_common_link_modes(self) -> set[str]: + common_link_modes = [] + """Populate common link modes""" + link_modes = self.get_ethtool_field("supported-link-modes") + partner_link_modes = self.get_ethtool_field("link-partner-advertised-link-modes") + if link_modes is None: + raise KsftSkipEx(f"Link modes not available for {self.cfg.ifname}") + if partner_link_modes is None: + raise KsftSkipEx(f"Partner link modes not available for {self.cfg.ifname}") + common_link_modes = set(link_modes) and set(partner_link_modes) + return common_link_modes + + def get_speed_duplex_values(self, link_modes: list[str]) -> tuple[list[str], list[str]]: + speed = [] + duplex = [] + """Check the link modes""" + for data in link_modes: + parts = data.split('/') + speed_value = re.match(r'\d+', parts[0]) + if speed_value: + speed.append(speed_value.group()) + else: + ksft_pr(f"No speed value found for interface {self.ifname}") + return None, None + duplex.append(parts[1].lower()) + return speed, duplex + + def get_ethtool_field(self, field: str, remote: bool = False) -> Optional[str]: + process = None + if not remote: + """Get the ethtool field value for the local interface""" + try: + process = ethtool(self.cfg.ifname, json=True) + except Exception as e: + ksft_pr("Required minimum ethtool version is 6.10") + ksft_pr(f"Unexpected error occurred while getting ethtool field in local: {e}") + return None + else: + if not self.verify_link_up(): + return None + """Get the ethtool field value for the remote interface""" + self.cfg.require_cmd("ethtool", remote=True) + if self.partner_netif is None: + ksft_pr(f"Partner interface name is unavailable.") + return None + try: + process = ethtool(self.partner_netif, json=True, host=self.cfg.remote) + except Exception as e: + ksft_pr("Required minimum ethtool version is 6.10") + ksft_pr(f"Unexpected error occurred while getting ethtool field in remote: {e}") + return None + json_data = process[0] + """Check if the field exist in the json data""" + if field not in json_data: + raise KsftSkipEx(f"Field {field} does not exist in the output of interface {json_data["ifname"]}") + return json_data[field] diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c new file mode 100644 index 000000000000..8e502a1f8f9b --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -0,0 +1,789 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * tcpdevmem netcat. Works similarly to netcat but does device memory TCP + * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. + * + * Usage: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 + * + * On client: + * echo -n "hello\nworld" | nc -s <server IP> 5201 -p 5201 + * + * Test data validation: + * + * On server: + * ncdevmem -s <server IP> [-c <client IP>] -f eth1 -l -p 5201 -v 7 + * + * On client: + * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ + * tr \\n \\0 | \ + * head -c 5G | \ + * nc <server IP> 5201 -p 5201 + * + * + * Note this is compatible with regular netcat. i.e. the sender or receiver can + * be replaced with regular netcat to test the RX or TX path in isolation. + */ +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + +#include <linux/uio.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#define __iovec_defined +#include <fcntl.h> +#include <malloc.h> +#include <error.h> + +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/mman.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#include <linux/memfd.h> +#include <linux/dma-buf.h> +#include <linux/udmabuf.h> +#include <libmnl/libmnl.h> +#include <linux/types.h> +#include <linux/netlink.h> +#include <linux/genetlink.h> +#include <linux/netdev.h> +#include <linux/ethtool_netlink.h> +#include <time.h> +#include <net/if.h> + +#include "netdev-user.h" +#include "ethtool-user.h" +#include <ynl.h> + +#define PAGE_SHIFT 12 +#define TEST_PREFIX "ncdevmem" +#define NUM_PAGES 16000 + +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif + +static char *server_ip; +static char *client_ip; +static char *port; +static size_t do_validation; +static int start_queue = -1; +static int num_queues = -1; +static char *ifname; +static unsigned int ifindex; +static unsigned int dmabuf_id; + +struct memory_buffer { + int fd; + size_t size; + + int devfd; + int memfd; + char *buf_mem; +}; + +struct memory_provider { + struct memory_buffer *(*alloc)(size_t size); + void (*free)(struct memory_buffer *ctx); + void (*memcpy_from_device)(void *dst, struct memory_buffer *src, + size_t off, int n); +}; + +static struct memory_buffer *udmabuf_alloc(size_t size) +{ + struct udmabuf_create create; + struct memory_buffer *ctx; + int ret; + + ctx = malloc(sizeof(*ctx)); + if (!ctx) + error(1, ENOMEM, "malloc failed"); + + ctx->size = size; + + ctx->devfd = open("/dev/udmabuf", O_RDWR); + if (ctx->devfd < 0) + error(1, errno, + "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", + TEST_PREFIX); + + ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); + if (ctx->memfd < 0) + error(1, errno, "%s: [skip,no-memfd]\n", TEST_PREFIX); + + ret = fcntl(ctx->memfd, F_ADD_SEALS, F_SEAL_SHRINK); + if (ret < 0) + error(1, errno, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); + + ret = ftruncate(ctx->memfd, size); + if (ret == -1) + error(1, errno, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); + + memset(&create, 0, sizeof(create)); + + create.memfd = ctx->memfd; + create.offset = 0; + create.size = size; + ctx->fd = ioctl(ctx->devfd, UDMABUF_CREATE, &create); + if (ctx->fd < 0) + error(1, errno, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); + + ctx->buf_mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, + ctx->fd, 0); + if (ctx->buf_mem == MAP_FAILED) + error(1, errno, "%s: [FAIL, map udmabuf]\n", TEST_PREFIX); + + return ctx; +} + +static void udmabuf_free(struct memory_buffer *ctx) +{ + munmap(ctx->buf_mem, ctx->size); + close(ctx->fd); + close(ctx->memfd); + close(ctx->devfd); + free(ctx); +} + +static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src, + size_t off, int n) +{ + struct dma_buf_sync sync = {}; + + sync.flags = DMA_BUF_SYNC_START; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); + + memcpy(dst, src->buf_mem + off, n); + + sync.flags = DMA_BUF_SYNC_END; + ioctl(src->fd, DMA_BUF_IOCTL_SYNC, &sync); +} + +static struct memory_provider udmabuf_memory_provider = { + .alloc = udmabuf_alloc, + .free = udmabuf_free, + .memcpy_from_device = udmabuf_memcpy_from_device, +}; + +static struct memory_provider *provider = &udmabuf_memory_provider; + +static void print_nonzero_bytes(void *ptr, size_t size) +{ + unsigned char *p = ptr; + unsigned int i; + + for (i = 0; i < size; i++) + putchar(p[i]); +} + +void validate_buffer(void *line, size_t size) +{ + static unsigned char seed = 1; + unsigned char *ptr = line; + int errors = 0; + size_t i; + + for (i = 0; i < size; i++) { + if (ptr[i] != seed) { + fprintf(stderr, + "Failed validation: expected=%u, actual=%u, index=%lu\n", + seed, ptr[i], i); + errors++; + if (errors > 20) + error(1, 0, "validation failed."); + } + seed++; + if (seed == do_validation) + seed = 0; + } + + fprintf(stdout, "Validated buffer\n"); +} + +static int rxq_num(int ifindex) +{ + struct ethtool_channels_get_req *req; + struct ethtool_channels_get_rsp *rsp; + struct ynl_error yerr; + struct ynl_sock *ys; + int num = -1; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_channels_get_req_alloc(); + ethtool_channels_get_req_set_header_dev_index(req, ifindex); + rsp = ethtool_channels_get(ys, req); + if (rsp) + num = rsp->rx_count + rsp->combined_count; + ethtool_channels_get_req_free(req); + ethtool_channels_get_rsp_free(rsp); + + ynl_sock_destroy(ys); + + return num; +} + +#define run_command(cmd, ...) \ + ({ \ + char command[256]; \ + memset(command, 0, sizeof(command)); \ + snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ + fprintf(stderr, "Running: %s\n", command); \ + system(command); \ + }) + +static int reset_flow_steering(void) +{ + /* Depending on the NIC, toggling ntuple off and on might not + * be allowed. Additionally, attempting to delete existing filters + * will fail if no filters are present. Therefore, do not enforce + * the exit status. + */ + + run_command("sudo ethtool -K %s ntuple off >&2", ifname); + run_command("sudo ethtool -K %s ntuple on >&2", ifname); + run_command( + "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", + ifname, ifname); + return 0; +} + +static const char *tcp_data_split_str(int val) +{ + switch (val) { + case 0: + return "off"; + case 1: + return "auto"; + case 2: + return "on"; + default: + return "?"; + } +} + +static int configure_headersplit(bool on) +{ + struct ethtool_rings_get_req *get_req; + struct ethtool_rings_get_rsp *get_rsp; + struct ethtool_rings_set_req *req; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = ethtool_rings_set_req_alloc(); + ethtool_rings_set_req_set_header_dev_index(req, ifindex); + /* 0 - off, 1 - auto, 2 - on */ + ethtool_rings_set_req_set_tcp_data_split(req, on ? 2 : 0); + ret = ethtool_rings_set(ys, req); + if (ret < 0) + fprintf(stderr, "YNL failed: %s\n", ys->err.msg); + ethtool_rings_set_req_free(req); + + if (ret == 0) { + get_req = ethtool_rings_get_req_alloc(); + ethtool_rings_get_req_set_header_dev_index(get_req, ifindex); + get_rsp = ethtool_rings_get(ys, get_req); + ethtool_rings_get_req_free(get_req); + if (get_rsp) + fprintf(stderr, "TCP header split: %s\n", + tcp_data_split_str(get_rsp->tcp_data_split)); + ethtool_rings_get_rsp_free(get_rsp); + } + + ynl_sock_destroy(ys); + + return ret; +} + +static int configure_rss(void) +{ + return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); +} + +static int configure_channels(unsigned int rx, unsigned int tx) +{ + return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); +} + +static int configure_flow_steering(struct sockaddr_in6 *server_sin) +{ + const char *type = "tcp6"; + const char *server_addr; + char buf[40]; + + inet_ntop(AF_INET6, &server_sin->sin6_addr, buf, sizeof(buf)); + server_addr = buf; + + if (IN6_IS_ADDR_V4MAPPED(&server_sin->sin6_addr)) { + type = "tcp4"; + server_addr = strrchr(server_addr, ':') + 1; + } + + return run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + ifname, + type, + client_ip ? "src-ip" : "", + client_ip ?: "", + server_addr, + client_ip ? "src-port" : "", + client_ip ? port : "", + port, start_queue); +} + +static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, + struct netdev_queue_id *queues, + unsigned int n_queue_index, struct ynl_sock **ys) +{ + struct netdev_bind_rx_req *req = NULL; + struct netdev_bind_rx_rsp *rsp = NULL; + struct ynl_error yerr; + + *ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!*ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + req = netdev_bind_rx_req_alloc(); + netdev_bind_rx_req_set_ifindex(req, ifindex); + netdev_bind_rx_req_set_fd(req, dmabuf_fd); + __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); + + rsp = netdev_bind_rx(*ys, req); + if (!rsp) { + perror("netdev_bind_rx"); + goto err_close; + } + + if (!rsp->_present.id) { + perror("id not present"); + goto err_close; + } + + fprintf(stderr, "got dmabuf id=%d\n", rsp->id); + dmabuf_id = rsp->id; + + netdev_bind_rx_req_free(req); + netdev_bind_rx_rsp_free(rsp); + + return 0; + +err_close: + fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); + netdev_bind_rx_req_free(req); + ynl_sock_destroy(*ys); + return -1; +} + +static void enable_reuseaddr(int fd) +{ + int opt = 1; + int ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEPORT]\n", TEST_PREFIX); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + if (ret) + error(1, errno, "%s: [FAIL, SO_REUSEADDR]\n", TEST_PREFIX); +} + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +int do_server(struct memory_buffer *mem) +{ + char ctrl_data[sizeof(int) * 20000]; + struct netdev_queue_id *queues; + size_t non_page_aligned_frags = 0; + struct sockaddr_in6 client_addr; + struct sockaddr_in6 server_sin; + size_t page_aligned_frags = 0; + size_t total_received = 0; + socklen_t client_addr_len; + bool is_devmem = false; + char *tmp_mem = NULL; + struct ynl_sock *ys; + char iobuf[819200]; + char buffer[256]; + int socket_fd; + int client_fd; + size_t i = 0; + int ret; + + ret = parse_address(server_ip, atoi(port), &server_sin); + if (ret < 0) + error(1, 0, "parse server address"); + + if (reset_flow_steering()) + error(1, 0, "Failed to reset flow steering\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to enable TCP header split\n"); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "Failed to configure rss\n"); + + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering(&server_sin)) + error(1, 0, "Failed to configure flow steering\n"); + + sleep(1); + + queues = malloc(sizeof(*queues) * num_queues); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + tmp_mem = malloc(mem->size); + if (!tmp_mem) + error(1, ENOMEM, "malloc failed"); + + socket_fd = socket(AF_INET6, SOCK_STREAM, 0); + if (socket_fd < 0) + error(1, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); + + enable_reuseaddr(socket_fd); + + fprintf(stderr, "binding to address %s:%d\n", server_ip, + ntohs(server_sin.sin6_port)); + + ret = bind(socket_fd, &server_sin, sizeof(server_sin)); + if (ret) + error(1, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); + + ret = listen(socket_fd, 1); + if (ret) + error(1, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); + + client_addr_len = sizeof(client_addr); + + inet_ntop(AF_INET6, &server_sin.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Waiting or connection on %s:%d\n", buffer, + ntohs(server_sin.sin6_port)); + client_fd = accept(socket_fd, &client_addr, &client_addr_len); + + inet_ntop(AF_INET6, &client_addr.sin6_addr, buffer, + sizeof(buffer)); + fprintf(stderr, "Got connection from %s:%d\n", buffer, + ntohs(client_addr.sin6_port)); + + while (1) { + struct iovec iov = { .iov_base = iobuf, + .iov_len = sizeof(iobuf) }; + struct dmabuf_cmsg *dmabuf_cmsg = NULL; + struct cmsghdr *cm = NULL; + struct msghdr msg = { 0 }; + struct dmabuf_token token; + ssize_t ret; + + is_devmem = false; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = ctrl_data; + msg.msg_controllen = sizeof(ctrl_data); + ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); + fprintf(stderr, "recvmsg ret=%ld\n", ret); + if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) + continue; + if (ret < 0) { + perror("recvmsg"); + continue; + } + if (ret == 0) { + fprintf(stderr, "client exited\n"); + goto cleanup; + } + + i++; + for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { + if (cm->cmsg_level != SOL_SOCKET || + (cm->cmsg_type != SCM_DEVMEM_DMABUF && + cm->cmsg_type != SCM_DEVMEM_LINEAR)) { + fprintf(stderr, "skipping non-devmem cmsg\n"); + continue; + } + + dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); + is_devmem = true; + + if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { + /* TODO: process data copied from skb's linear + * buffer. + */ + fprintf(stderr, + "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", + dmabuf_cmsg->frag_size); + + continue; + } + + token.token_start = dmabuf_cmsg->frag_token; + token.token_count = 1; + + total_received += dmabuf_cmsg->frag_size; + fprintf(stderr, + "received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", + dmabuf_cmsg->frag_offset >> PAGE_SHIFT, + dmabuf_cmsg->frag_offset % getpagesize(), + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size, dmabuf_cmsg->frag_token, + total_received, dmabuf_cmsg->dmabuf_id); + + if (dmabuf_cmsg->dmabuf_id != dmabuf_id) + error(1, 0, + "received on wrong dmabuf_id: flow steering error\n"); + + if (dmabuf_cmsg->frag_size % getpagesize()) + non_page_aligned_frags++; + else + page_aligned_frags++; + + provider->memcpy_from_device(tmp_mem, mem, + dmabuf_cmsg->frag_offset, + dmabuf_cmsg->frag_size); + + if (do_validation) + validate_buffer(tmp_mem, + dmabuf_cmsg->frag_size); + else + print_nonzero_bytes(tmp_mem, + dmabuf_cmsg->frag_size); + + ret = setsockopt(client_fd, SOL_SOCKET, + SO_DEVMEM_DONTNEED, &token, + sizeof(token)); + if (ret != 1) + error(1, 0, + "SO_DEVMEM_DONTNEED not enough tokens"); + } + if (!is_devmem) + error(1, 0, "flow steering error\n"); + + fprintf(stderr, "total_received=%lu\n", total_received); + } + + fprintf(stderr, "%s: ok\n", TEST_PREFIX); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + + fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", + page_aligned_frags, non_page_aligned_frags); + +cleanup: + + free(tmp_mem); + close(client_fd); + close(socket_fd); + ynl_sock_destroy(ys); + + return 0; +} + +void run_devmem_tests(void) +{ + struct netdev_queue_id *queues; + struct memory_buffer *mem; + struct ynl_sock *ys; + size_t i = 0; + + mem = provider->alloc(getpagesize() * NUM_PAGES); + + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) + error(1, 0, "rss error\n"); + + queues = calloc(num_queues, sizeof(*queues)); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Binding empty queues array should have failed\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (configure_headersplit(0)) + error(1, 0, "Failed to configure header split\n"); + + if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Configure dmabuf with header split off should have failed\n"); + + if (configure_headersplit(1)) + error(1, 0, "Failed to configure header split\n"); + + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + error(1, 0, "Failed to bind\n"); + + /* Deactivating a bound queue should not be legal */ + if (!configure_channels(num_queues, num_queues - 1)) + error(1, 0, "Deactivating a bound queue should be illegal.\n"); + + /* Closing the netlink socket does an implicit unbind */ + ynl_sock_destroy(ys); + + provider->free(mem); +} + +int main(int argc, char *argv[]) +{ + struct memory_buffer *mem; + int is_server = 0, opt; + int ret; + + while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { + switch (opt) { + case 'l': + is_server = 1; + break; + case 's': + server_ip = optarg; + break; + case 'c': + client_ip = optarg; + break; + case 'p': + port = optarg; + break; + case 'v': + do_validation = atoll(optarg); + break; + case 'q': + num_queues = atoi(optarg); + break; + case 't': + start_queue = atoi(optarg); + break; + case 'f': + ifname = optarg; + break; + case '?': + fprintf(stderr, "unknown option: %c\n", optopt); + break; + } + } + + if (!ifname) + error(1, 0, "Missing -f argument\n"); + + ifindex = if_nametoindex(ifname); + + if (!server_ip && !client_ip) { + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 0) + error(1, 0, "couldn't detect number of queues\n"); + if (num_queues < 2) + error(1, 0, + "number of device queues is too low\n"); + /* make sure can bind to multiple queues */ + start_queue = num_queues / 2; + num_queues /= 2; + } + + if (start_queue < 0 || num_queues < 0) + error(1, 0, "Both -t and -q are required\n"); + + run_devmem_tests(); + return 0; + } + + if (start_queue < 0 && num_queues < 0) { + num_queues = rxq_num(ifindex); + if (num_queues < 2) + error(1, 0, "number of device queues is too low\n"); + + num_queues = 1; + start_queue = rxq_num(ifindex) - num_queues; + + if (start_queue < 0) + error(1, 0, "couldn't detect number of queues\n"); + + fprintf(stderr, "using queues %d..%d\n", start_queue, start_queue + num_queues); + } + + for (; optind < argc; optind++) + fprintf(stderr, "extra arguments: %s\n", argv[optind]); + + if (start_queue < 0) + error(1, 0, "Missing -t argument\n"); + + if (num_queues < 0) + error(1, 0, "Missing -q argument\n"); + + if (!server_ip) + error(1, 0, "Missing -s argument\n"); + + if (!port) + error(1, 0, "Missing -p argument\n"); + + mem = provider->alloc(getpagesize() * NUM_PAGES); + ret = is_server ? do_server(mem) : 1; + provider->free(mem); + + return ret; +} diff --git a/tools/testing/selftests/drivers/net/hw/nic_link_layer.py b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py new file mode 100644 index 000000000000..efd921180532 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_link_layer.py @@ -0,0 +1,113 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +#Introduction: +#This file has basic link layer tests for generic NIC drivers. +#The test comprises of auto-negotiation, speed and duplex checks. +# +#Setup: +#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) +# +# DUT PC Partner PC +#┌───────────────────────┐ ┌──────────────────────────┐ +#│ │ │ │ +#│ │ │ │ +#│ ┌───────────┐ │ │ +#│ │DUT NIC │ Eth │ │ +#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ +#│ └───────────┘ │ │ +#│ │ │ │ +#│ │ │ │ +#└───────────────────────┘ └──────────────────────────┘ +# +#Configurations: +#Required minimum ethtool version is 6.10 (supports json) +#Default values: +#time_delay = 8 #time taken to wait for transitions to happen, in seconds. + +import time +import argparse +from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_eq +from lib.py import KsftFailEx, KsftSkipEx +from lib.py import NetDrvEpEnv +from lib.py import LinkConfig + +def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: + if link_config.partner_netif is None: + KsftSkipEx("Partner interface is not available") + if not link_config.check_autoneg_supported() or not link_config.check_autoneg_supported(remote=True): + KsftSkipEx(f"Auto-negotiation not supported for interface {cfg.ifname} or {link_config.partner_netif}") + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + +def verify_autonegotiation(cfg: object, expected_state: str, link_config: LinkConfig) -> None: + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + """Verifying the autonegotiation state in partner""" + partner_autoneg_output = link_config.get_ethtool_field("auto-negotiation", remote=True) + if partner_autoneg_output is None: + KsftSkipEx(f"Auto-negotiation state not available for interface {link_config.partner_netif}") + partner_autoneg_state = "on" if partner_autoneg_output is True else "off" + + ksft_eq(partner_autoneg_state, expected_state) + + """Verifying the autonegotiation state of local""" + autoneg_output = link_config.get_ethtool_field("auto-negotiation") + if autoneg_output is None: + KsftSkipEx(f"Auto-negotiation state not available for interface {cfg.ifname}") + actual_state = "on" if autoneg_output is True else "off" + + ksft_eq(actual_state, expected_state) + + """Verifying the link establishment""" + link_available = link_config.get_ethtool_field("link-detected") + if link_available is None: + KsftSkipEx(f"Link status not available for interface {cfg.ifname}") + if link_available != True: + raise KsftSkipEx("Link not established at interface {cfg.ifname} after changing auto-negotiation") + +def test_autonegotiation(cfg: object, link_config: LinkConfig, time_delay: int) -> None: + _pre_test_checks(cfg, link_config) + for state in ["off", "on"]: + if not link_config.set_autonegotiation_state(state, remote=True): + raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {link_config.partner_netif}") + if not link_config.set_autonegotiation_state(state): + raise KsftSkipEx(f"Unable to set auto-negotiation state for interface {cfg.ifname}") + time.sleep(time_delay) + verify_autonegotiation(cfg, state, link_config) + +def test_network_speed(cfg: object, link_config: LinkConfig, time_delay: int) -> None: + _pre_test_checks(cfg, link_config) + common_link_modes = link_config.common_link_modes + if not common_link_modes: + KsftSkipEx("No common link modes exist") + speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) + + if speeds and duplex_modes and len(speeds) == len(duplex_modes): + for idx in range(len(speeds)): + speed = speeds[idx] + duplex = duplex_modes[idx] + if not link_config.set_speed_and_duplex(speed, duplex): + raise KsftFailEx(f"Unable to set speed and duplex parameters for {cfg.ifname}") + time.sleep(time_delay) + if not link_config.verify_speed_and_duplex(speed, duplex): + raise KsftSkipEx(f"Error occurred while verifying speed and duplex states for interface {cfg.ifname}") + else: + if not speeds or not duplex_modes: + KsftSkipEx(f"No supported speeds or duplex modes found for interface {cfg.ifname}") + else: + KsftSkipEx("Mismatch in the number of speeds and duplex modes") + +def main() -> None: + parser = argparse.ArgumentParser(description="Run basic link layer tests for NIC driver") + parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') + args = parser.parse_args() + time_delay = args.time_delay + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + link_config = LinkConfig(cfg) + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, time_delay,)) + link_config.reset_interface() + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/nic_performance.py b/tools/testing/selftests/drivers/net/hw/nic_performance.py new file mode 100644 index 000000000000..201403b76ea3 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nic_performance.py @@ -0,0 +1,137 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +#Introduction: +#This file has basic performance test for generic NIC drivers. +#The test comprises of throughput check for TCP and UDP streams. +# +#Setup: +#Connect the DUT PC with NIC card to partner pc back via ethernet medium of your choice(RJ45, T1) +# +# DUT PC Partner PC +#┌───────────────────────┐ ┌──────────────────────────┐ +#│ │ │ │ +#│ │ │ │ +#│ ┌───────────┐ │ │ +#│ │DUT NIC │ Eth │ │ +#│ │Interface ─┼─────────────────────────┼─ any eth Interface │ +#│ └───────────┘ │ │ +#│ │ │ │ +#│ │ │ │ +#└───────────────────────┘ └──────────────────────────┘ +# +#Configurations: +#To prevent interruptions, Add ethtool, ip to the sudoers list in remote PC and get the ssh key from remote. +#Required minimum ethtool version is 6.10 +#Change the below configuration based on your hw needs. +# """Default values""" +#time_delay = 8 #time taken to wait for transitions to happen, in seconds. +#test_duration = 10 #performance test duration for the throughput check, in seconds. +#send_throughput_threshold = 80 #percentage of send throughput required to pass the check +#receive_throughput_threshold = 50 #percentage of receive throughput required to pass the check + +import time +import json +import argparse +from lib.py import ksft_run, ksft_exit, ksft_pr, ksft_true +from lib.py import KsftFailEx, KsftSkipEx, GenerateTraffic +from lib.py import NetDrvEpEnv, bkg, wait_port_listen +from lib.py import cmd +from lib.py import LinkConfig + +class TestConfig: + def __init__(self, time_delay: int, test_duration: int, send_throughput_threshold: int, receive_throughput_threshold: int) -> None: + self.time_delay = time_delay + self.test_duration = test_duration + self.send_throughput_threshold = send_throughput_threshold + self.receive_throughput_threshold = receive_throughput_threshold + +def _pre_test_checks(cfg: object, link_config: LinkConfig) -> None: + if not link_config.verify_link_up(): + KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + common_link_modes = link_config.common_link_modes + if common_link_modes is None: + KsftSkipEx("No common link modes found") + if link_config.partner_netif == None: + KsftSkipEx("Partner interface is not available") + if link_config.check_autoneg_supported(): + KsftSkipEx("Auto-negotiation not supported by local") + if link_config.check_autoneg_supported(remote=True): + KsftSkipEx("Auto-negotiation not supported by remote") + cfg.require_cmd("iperf3", remote=True) + +def check_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, protocol: str, traffic: GenerateTraffic) -> None: + common_link_modes = link_config.common_link_modes + speeds, duplex_modes = link_config.get_speed_duplex_values(common_link_modes) + """Test duration in seconds""" + duration = test_config.test_duration + + ksft_pr(f"{protocol} test") + test_type = "-u" if protocol == "UDP" else "" + + send_throughput = [] + receive_throughput = [] + for idx in range(0, len(speeds)): + if link_config.set_speed_and_duplex(speeds[idx], duplex_modes[idx]) == False: + raise KsftFailEx(f"Not able to set speed and duplex parameters for {cfg.ifname}") + time.sleep(test_config.time_delay) + if not link_config.verify_link_up(): + raise KsftSkipEx(f"Link state of interface {cfg.ifname} is DOWN") + + send_command=f"{test_type} -b 0 -t {duration} --json" + receive_command=f"{test_type} -b 0 -t {duration} --reverse --json" + + send_result = traffic.run_remote_test(cfg, command=send_command) + if send_result.ret != 0: + raise KsftSkipEx("Error occurred during data transmit: {send_result.stdout}") + + send_output = send_result.stdout + send_data = json.loads(send_output) + + """Convert throughput to Mbps""" + send_throughput.append(round(send_data['end']['sum_sent']['bits_per_second'] / 1e6, 2)) + ksft_pr(f"{protocol}: Send throughput: {send_throughput[idx]} Mbps") + + receive_result = traffic.run_remote_test(cfg, command=receive_command) + if receive_result.ret != 0: + raise KsftSkipEx("Error occurred during data receive: {receive_result.stdout}") + + receive_output = receive_result.stdout + receive_data = json.loads(receive_output) + + """Convert throughput to Mbps""" + receive_throughput.append(round(receive_data['end']['sum_received']['bits_per_second'] / 1e6, 2)) + ksft_pr(f"{protocol}: Receive throughput: {receive_throughput[idx]} Mbps") + + """Check whether throughput is not below the threshold (default values set at start)""" + for idx in range(0, len(speeds)): + send_threshold = float(speeds[idx]) * float(test_config.send_throughput_threshold / 100) + receive_threshold = float(speeds[idx]) * float(test_config.receive_throughput_threshold / 100) + ksft_true(send_throughput[idx] >= send_threshold, f"{protocol}: Send throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") + ksft_true(receive_throughput[idx] >= receive_threshold, f"{protocol}: Receive throughput is below threshold for {speeds[idx]} Mbps in {duplex_modes[idx]} duplex") + +def test_tcp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: + _pre_test_checks(cfg, link_config) + check_throughput(cfg, link_config, test_config, 'TCP', traffic) + +def test_udp_throughput(cfg: object, link_config: LinkConfig, test_config: TestConfig, traffic: GenerateTraffic) -> None: + _pre_test_checks(cfg, link_config) + check_throughput(cfg, link_config, test_config, 'UDP', traffic) + +def main() -> None: + parser = argparse.ArgumentParser(description="Run basic performance test for NIC driver") + parser.add_argument('--time-delay', type=int, default=8, help='Time taken to wait for transitions to happen(in seconds). Default is 8 seconds.') + parser.add_argument('--test-duration', type=int, default=10, help='Performance test duration for the throughput check, in seconds. Default is 10 seconds.') + parser.add_argument('--stt', type=int, default=80, help='Send throughput Threshold: Percentage of send throughput upon actual throughput required to pass the throughput check (in percentage). Default is 80.') + parser.add_argument('--rtt', type=int, default=50, help='Receive throughput Threshold: Percentage of receive throughput upon actual throughput required to pass the throughput check (in percentage). Default is 50.') + args=parser.parse_args() + test_config = TestConfig(args.time_delay, args.test_duration, args.stt, args.rtt) + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + traffic = GenerateTraffic(cfg) + link_config = LinkConfig(cfg) + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, link_config, test_config, traffic, )) + link_config.reset_interface() + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 9d7adb3cf33b..0b49ce7ae678 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -6,7 +6,7 @@ import random from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ne, ksft_ge, ksft_lt from lib.py import NetDrvEpEnv from lib.py import EthtoolFamily, NetdevFamily -from lib.py import KsftSkipEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import rand_port from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure @@ -215,7 +215,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): defer(ethtool, f"-X {cfg.ifname} default") else: other_key = 'noise' - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") @@ -238,6 +238,32 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): else: raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + if not main_ctx: + ethtool(f"-L {cfg.ifname} combined 4") + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 1" + try: + # this targets queue 4, which doesn't exist + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from targeting a nonexistent queue (context {ctx_id})") + # change the table to target queues 0 and 2 + ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") + # ntuple rule therefore targets queues 1 and 3 + ntuple2 = ethtool_create(cfg, "-N", flow) + # should replace existing filter + ksft_eq(ntuple, ntuple2) + _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), + 'noise' : (0, 2) }) + # Setting queue count to 3 should fail, queue 3 is used + try: + ethtool(f"-L {cfg.ifname} combined 3") + except CmdExitFailure: + pass + else: + raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})") + def test_rss_resize(cfg): """Test resizing of the RSS table. @@ -429,7 +455,7 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") @@ -516,7 +542,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {ports[i]} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) @@ -569,7 +595,7 @@ def test_rss_context_overlap(cfg, other_ctx=0): port = rand_port() if other_ctx: - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {other_ctx}" ntuple_id = ethtool_create(cfg, "-N", flow) ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") @@ -587,7 +613,7 @@ def test_rss_context_overlap(cfg, other_ctx=0): # Now create a rule for context 1 and make sure traffic goes to a subset if other_ctx: ntuple.exec() - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" ntuple_id = ethtool_create(cfg, "-N", flow) defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") @@ -606,6 +632,72 @@ def test_rss_context_overlap2(cfg): test_rss_context_overlap(cfg, True) +def test_delete_rss_context_busy(cfg): + """ + Test that deletion returns -EBUSY when an rss context is being used + by an ntuple filter. + """ + + require_ntuple(cfg) + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new") + ctx_deleter = defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + # attempt to delete in-use context + try: + ctx_deleter.exec_only() + ctx_deleter.cancel() + raise KsftFailEx(f"deleted context {ctx_id} used by rule {ntuple_id}") + except CmdExitFailure: + pass + + +def test_rss_ntuple_addition(cfg): + """ + Test that the queue offset (ring_cookie) of an ntuple rule is added + to the queue number read from the indirection table. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except: + raise KsftSkipEx("Not enough queues for the test") + + # Use queue 0 for normal traffic + ethtool(f"-X {cfg.ifname} equal 1") + defer(ethtool, f"-X {cfg.ifname} default") + + # create additional rss context + ctx_id = ethtool_create(cfg, "-X", "context new equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # utilize context from ntuple filter + port = rand_port() + flow = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port} context {ctx_id} action 2" + try: + ntuple_id = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + raise KsftSkipEx("Ntuple filter with RSS and nonzero action not supported") + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") + + _send_traffic_check(cfg, port, f"context {ctx_id}", { 'target': (2, 3), + 'empty' : (1,), + 'noise' : (0,) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.ethnl = EthtoolFamily() @@ -616,7 +708,8 @@ def main() -> None: test_rss_context, test_rss_context4, test_rss_context32, test_rss_context_dump, test_rss_context_queue_reconfigure, test_rss_context_overlap, test_rss_context_overlap2, - test_rss_context_out_of_order, test_rss_context4_create_with_cfg], + test_rss_context_out_of_order, test_rss_context4_create_with_cfg, + test_delete_rss_context_busy, test_rss_ntuple_addition], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..da5af2c680fa 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -2,7 +2,7 @@ import time -from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen, bkg class GenerateTraffic: def __init__(self, env, port=None): @@ -23,6 +23,24 @@ class GenerateTraffic: self.stop(verbose=True) raise Exception("iperf3 traffic did not ramp up") + def run_remote_test(self, env: object, port=None, command=None): + if port is None: + port = rand_port() + try: + server_cmd = f"iperf3 -s 1 -p {port} --one-off" + with bkg(server_cmd, host=env.remote): + #iperf3 opens TCP connection as default in server + #-u to be specified in client command for UDP + wait_port_listen(port, host=env.remote) + except Exception as e: + raise Exception(f"Unexpected error occurred while running server command: {e}") + try: + client_cmd = f"iperf3 -c {env.remote_addr} -p {port} {command}" + proc = cmd(client_cmd) + return proc + except Exception as e: + raise Exception(f"Unexpected error occurred while running client command: {e}") + def _wait_pkts(self, pkt_cnt=None, pps=None): """ Wait until we've seen pkt_cnt or until traffic ramps up to pps. diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh index 89b55e946eed..36055279ba92 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap.sh @@ -116,7 +116,7 @@ dev_del_test() log_test "Device delete" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh index 160891dcb4bc..db5806d189bb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh @@ -595,7 +595,7 @@ irif_disabled_test() log_test "Ingress RIF disabled" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid ip link set dev $rp1 nomaster __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge @@ -645,7 +645,7 @@ erif_disabled_test() log_test "Egress RIF disabled" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64 ip link del dev br0 type bridge devlink_trap_action_set $trap_name "drop" diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh index 190c1b6b5365..5d6d88b600f0 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh @@ -202,7 +202,7 @@ mtu_value_is_too_small_test() mtu_restore $rp2 - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower } @@ -235,7 +235,7 @@ __ttl_value_is_too_small_test() log_test "TTL value is too small: TTL=$ttl_val" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower } @@ -299,7 +299,7 @@ __mc_reverse_path_forwarding_test() log_test "Multicast reverse path forwarding: $desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $rp2 egress protocol $proto pref 1 handle 101 flower } @@ -347,7 +347,7 @@ __reject_route_test() log_test "Reject route: $desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid ip route del unreachable $unreachable tc filter del dev $h1 ingress protocol $proto pref 1 handle 101 flower } @@ -542,7 +542,7 @@ ipv4_lpm_miss_test() log_test "LPM miss: IPv4" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid vrf_without_routes_destroy } @@ -569,7 +569,7 @@ ipv6_lpm_miss_test() log_test "LPM miss: IPv6" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid vrf_without_routes_destroy } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 0bd5ffc218ac..29a672c2270f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -45,63 +45,52 @@ source $lib_dir/devlink_lib.sh h1_create() { simple_if_init $h1 192.0.2.1/24 + defer simple_if_fini $h1 192.0.2.1/24 + mtu_set $h1 10000 + defer mtu_restore $h1 ip -4 route add default vrf v$h1 nexthop via 192.0.2.2 -} - -h1_destroy() -{ - ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 - - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/24 + defer ip -4 route del default vrf v$h1 nexthop via 192.0.2.2 } h2_create() { simple_if_init $h2 198.51.100.1/24 + defer simple_if_fini $h2 198.51.100.1/24 + mtu_set $h2 10000 + defer mtu_restore $h2 ip -4 route add default vrf v$h2 nexthop via 198.51.100.2 -} - -h2_destroy() -{ - ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 - - mtu_restore $h2 - simple_if_fini $h2 198.51.100.1/24 + defer ip -4 route del default vrf v$h2 nexthop via 198.51.100.2 } router_create() { ip link set dev $rp1 up + defer ip link set dev $rp1 down + ip link set dev $rp2 up + defer ip link set dev $rp2 down __addr_add_del $rp1 add 192.0.2.2/24 + defer __addr_add_del $rp1 del 192.0.2.2/24 + __addr_add_del $rp2 add 198.51.100.2/24 + defer __addr_add_del $rp2 del 198.51.100.2/24 + mtu_set $rp1 10000 + defer mtu_restore $rp1 + mtu_set $rp2 10000 + defer mtu_restore $rp2 ip -4 route add blackhole 198.51.100.100 + defer ip -4 route del blackhole 198.51.100.100 devlink trap set $DEVLINK_DEV trap blackhole_route action trap -} - -router_destroy() -{ - devlink trap set $DEVLINK_DEV trap blackhole_route action drop - - ip -4 route del blackhole 198.51.100.100 - - mtu_restore $rp2 - mtu_restore $rp1 - __addr_add_del $rp2 del 198.51.100.2/24 - __addr_add_del $rp1 del 192.0.2.2/24 - - ip link set dev $rp2 down - ip link set dev $rp1 down + defer devlink trap set $DEVLINK_DEV trap blackhole_route action drop } setup_prepare() @@ -114,7 +103,11 @@ setup_prepare() rp1_mac=$(mac_get $rp1) + # Reload to ensure devlink-trap settings are back to default. + defer devlink_reload + vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -122,21 +115,6 @@ setup_prepare() router_create } -cleanup() -{ - pre_cleanup - - router_destroy - - h2_destroy - h1_destroy - - vrf_cleanup - - # Reload to ensure devlink-trap settings are back to default. - devlink_reload -} - rate_limits_test() { RET=0 @@ -214,7 +192,10 @@ __rate_test() # by the policer. Make sure measured received rate is about 1000 pps log_info "=== Tx rate: Highest, Policer rate: 1000 pps ===" + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! sleep 5 # Take measurements when rate is stable @@ -229,13 +210,16 @@ __rate_test() check_err $? "Expected non-zero policer drop rate, got 0" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop # Send packets at a rate of 1000 pps and make sure they are not dropped # by the policer log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ===" + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec + defer stop_traffic $! sleep 5 # Take measurements when rate is stable @@ -244,7 +228,7 @@ __rate_test() check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop # Unbind the policer and send packets at highest possible rate. Make # sure they are not dropped by the policer and that the measured @@ -253,7 +237,10 @@ __rate_test() devlink trap group set $DEVLINK_DEV group l3_drops nopolicer + defer_scope_push + start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac + defer stop_traffic $! rate=$(trap_rate_get) (( rate > 1000 )) @@ -265,7 +252,7 @@ __rate_test() check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps" log_info "Measured policer drop rate of $drop_rate pps" - stop_traffic + defer_scope_pop log_test "Trap policer rate" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh index e9a82cae8c9a..4ac1dae92d0f 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh @@ -176,7 +176,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } @@ -207,7 +207,7 @@ no_matching_tunnel_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh index 878125041fc3..fce885184404 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh @@ -176,7 +176,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } @@ -207,7 +207,7 @@ no_matching_tunnel_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh index 5f6eb965cfd1..7aca8e5922cf 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh @@ -183,7 +183,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } @@ -253,7 +253,7 @@ corrupted_packet_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh index f6c16cbb6cf7..4599c331240b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh @@ -188,7 +188,7 @@ ecn_decap_test() log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } @@ -262,7 +262,7 @@ corrupted_packet_test() log_test "$desc" - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index fee74f215cec..d5b6f2cc9a29 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -58,65 +58,62 @@ source qos_lib.sh h1_create() { simple_if_init $h1 + defer simple_if_fini $h1 + mtu_set $h1 10000 + defer mtu_restore $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 ip link set dev $h1.111 type vlan egress-qos-map 0:1 } -h1_destroy() -{ - vlan_destroy $h1 111 - - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 + mtu_set $h2 10000 + defer mtu_restore $h2 vlan_create $h2 222 v$h2 192.0.2.65/28 + defer vlan_destroy $h2 222 ip link set dev $h2.222 type vlan egress-qos-map 0:2 } -h2_destroy() -{ - vlan_destroy $h2 222 - - mtu_restore $h2 - simple_if_fini $h2 -} - h3_create() { simple_if_init $h3 + defer simple_if_fini $h3 + mtu_set $h3 10000 + defer mtu_restore $h3 vlan_create $h3 111 v$h3 192.0.2.34/28 - vlan_create $h3 222 v$h3 192.0.2.66/28 -} - -h3_destroy() -{ - vlan_destroy $h3 222 - vlan_destroy $h3 111 + defer vlan_destroy $h3 111 - mtu_restore $h3 - simple_if_fini $h3 + vlan_create $h3 222 v$h3 192.0.2.66/28 + defer vlan_destroy $h3 222 } switch_create() { ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 10000 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 10000 + defer mtu_restore $swp2 # prio n -> TC n, strict scheduling lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:1,2:2,3:3,4:4,5:5,6:6,7:7 + defer lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 + lldptool -T -i $swp3 -V ETS-CFG tsa=$( )"0:strict,"$( )"1:strict,"$( @@ -129,85 +126,90 @@ switch_create() sleep 1 ip link set dev $swp3 up + defer ip link set dev $swp3 down + mtu_set $swp3 10000 + defer mtu_restore $swp3 + tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 101: vlan_create $swp1 111 + defer vlan_destroy $swp1 111 + vlan_create $swp2 222 + defer vlan_destroy $swp2 222 + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 + vlan_create $swp3 222 + defer vlan_destroy $swp3 222 ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 ip link set dev br111 addrgenmode none + ip link set dev br111 up + defer ip link set dev br111 down + ip link set dev $swp1.111 master br111 + defer ip link set dev $swp1.111 nomaster + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster ip link add name br222 type bridge vlan_filtering 0 + defer ip link del dev br222 ip link set dev br222 addrgenmode none + ip link set dev br222 up + defer ip link set dev br222 down + ip link set dev $swp2.222 master br222 + defer ip link set dev $swp2.222 nomaster + ip link set dev $swp3.222 master br222 + defer ip link set dev $swp3.222 nomaster # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + defer devlink_pool_size_thtype_restore 0 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + defer devlink_pool_size_thtype_restore 4 devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + defer devlink_port_pool_th_restore $swp2 0 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + defer devlink_tc_bind_pool_th_restore $swp2 2 ingress devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + defer devlink_tc_bind_pool_th_restore $swp3 2 egress + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 -} - -switch_destroy() -{ - devlink_port_pool_th_restore $swp3 4 - devlink_tc_bind_pool_th_restore $swp3 2 egress - devlink_tc_bind_pool_th_restore $swp3 1 egress - - devlink_tc_bind_pool_th_restore $swp2 2 ingress - devlink_port_pool_th_restore $swp2 0 - - devlink_tc_bind_pool_th_restore $swp1 1 ingress - devlink_port_pool_th_restore $swp1 0 - - devlink_pool_size_thtype_restore 4 - devlink_pool_size_thtype_restore 0 - - ip link del dev br222 - ip link del dev br111 - - vlan_destroy $swp3 222 - vlan_destroy $swp3 111 - vlan_destroy $swp2 222 - vlan_destroy $swp1 111 - - tc qdisc del dev $swp3 root handle 101: - mtu_restore $swp3 - ip link set dev $swp3 down - lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0 - - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer devlink_port_pool_th_restore $swp3 4 } setup_prepare() @@ -224,6 +226,7 @@ setup_prepare() h3mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -231,18 +234,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.34 " from H1" @@ -261,21 +252,38 @@ rel() " } +__run_hi_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_2 "$what")) + check_err $? "Could not get high enough $what ingress rate" + + echo ${uc_rate[@]} +} + +run_hi_measure_rate() +{ + in_defer_scope __run_hi_measure_rate "$@" +} + test_ets_strict() { RET=0 # Run high-prio traffic on its own. - start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac local -a rate_2 - rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2")) - check_err $? "Could not get high enough prio-2 ingress rate" + rate_2=($(run_hi_measure_rate "prio 2")) local rate_2_in=${rate_2[0]} local rate_2_eg=${rate_2[1]} - stop_traffic # $h2.222 # Start low-prio stream. start_traffic $h1.111 192.0.2.33 192.0.2.34 $h3mac + defer stop_traffic $! local -a rate_1 rate_1=($(measure_rate $swp1 $h3 rx_octets_prio_1 "prio 1")) @@ -290,14 +298,9 @@ test_ets_strict() check_err $(bc <<< "$rel21 > 105") # Start the high-prio stream--now both streams run. - start_traffic $h2.222 192.0.2.65 192.0.2.66 $h3mac - rate_3=($(measure_rate $swp2 $h3 rx_octets_prio_2 "prio 2 w/ 1")) - check_err $? "Could not get high enough prio-2 ingress rate with prio-1" + rate_3=($(run_hi_measure_rate "prio 2+1")) local rate_3_in=${rate_3[0]} local rate_3_eg=${rate_3[1]} - stop_traffic # $h2.222 - - stop_traffic # $h1.111 # High-prio should have about the same throughput whether or not # low-prio is in the system. diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh index 5ac4f795e333..2b5d2c2751d5 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh @@ -69,127 +69,103 @@ mlxsw_only_on_spectrum 2+ || exit h1_create() { simple_if_init $h1 + defer simple_if_fini $h1 vlan_create $h1 111 v$h1 192.0.2.33/28 + defer vlan_destroy $h1 111 ip link set dev $h1.111 type vlan egress-qos-map 0:1 } -h1_destroy() -{ - vlan_destroy $h1 111 - - simple_if_fini $h1 -} - h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 vlan_create $h2 111 v$h2 192.0.2.34/28 -} - -h2_destroy() -{ - vlan_destroy $h2 111 - - simple_if_fini $h2 + defer vlan_destroy $h2 111 } switch_create() { # pools # ----- + # devlink_pool_size_thtype_restore needs to be done first so that we can + # reset the various limits to values that are only valid for the + # original static / dynamic setting. devlink_pool_size_thtype_save 1 - devlink_pool_size_thtype_save 6 - - devlink_port_pool_th_save $swp1 1 - devlink_port_pool_th_save $swp2 6 - - devlink_tc_bind_pool_th_save $swp1 1 ingress - devlink_tc_bind_pool_th_save $swp2 1 egress - devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 1 + + devlink_pool_size_thtype_save 6 devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE + defer_prio devlink_pool_size_thtype_restore 6 # $swp1 # ----- ip link set dev $swp1 up + defer ip link set dev $swp1 down + vlan_create $swp1 111 + defer vlan_destroy $swp1 111 ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + devlink_port_pool_th_save $swp1 1 devlink_port_pool_th_set $swp1 1 16 + defer devlink_tc_bind_pool_th_restore $swp1 1 ingress + + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16 + defer devlink_port_pool_th_restore $swp1 1 tc qdisc replace dev $swp1 root handle 1: \ ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp1 root + dcb buffer set dev $swp1 prio-buffer all:0 1:1 + defer dcb buffer set dev $swp1 prio-buffer all:0 # $swp2 # ----- ip link set dev $swp2 up + defer ip link set dev $swp2 down + vlan_create $swp2 111 + defer vlan_destroy $swp2 111 ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + devlink_port_pool_th_save $swp2 6 devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE + defer devlink_tc_bind_pool_th_restore $swp2 1 egress + + devlink_tc_bind_pool_th_save $swp2 1 egress devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE + defer devlink_port_pool_th_restore $swp2 6 tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \ burst 128K limit 500M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 11: \ ets bands 8 strict 8 priomap 7 6 + defer tc qdisc del dev $swp2 parent 1:1 handle 11: # bridge # ------ ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 + ip link set dev $swp1.111 master br1 + defer ip link set dev $swp1.111 nomaster + ip link set dev br1 up + defer ip link set dev br1 down ip link set dev $swp2.111 master br1 -} - -switch_destroy() -{ - # Do this first so that we can reset the limits to values that are only - # valid for the original static / dynamic setting. - devlink_pool_size_thtype_restore 6 - devlink_pool_size_thtype_restore 1 - - # bridge - # ------ - - ip link set dev $swp2.111 nomaster - - ip link set dev br1 down - ip link set dev $swp1.111 nomaster - ip link del dev br1 - - # $swp2 - # ----- - - tc qdisc del dev $swp2 parent 1:1 handle 11: - tc qdisc del dev $swp2 root - - devlink_tc_bind_pool_th_restore $swp2 1 egress - devlink_port_pool_th_restore $swp2 6 - - vlan_destroy $swp2 111 - ip link set dev $swp2 down - - # $swp1 - # ----- - - dcb buffer set dev $swp1 prio-buffer all:0 - tc qdisc del dev $swp1 root - - devlink_tc_bind_pool_th_restore $swp1 1 ingress - devlink_port_pool_th_restore $swp1 1 - - vlan_destroy $swp1 111 - ip link set dev $swp1 down + defer ip link set dev $swp2.111 nomaster } setup_prepare() @@ -203,23 +179,13 @@ setup_prepare() h2mac=$(mac_get $h2) vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.34 " h1->h2" @@ -251,6 +217,7 @@ max_descriptors() log_info "Send many small packets, packet size = $pktsize bytes" start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac + defer stop_traffic $! # Sleep to wait for congestion. sleep 5 @@ -268,9 +235,6 @@ max_descriptors() check_err $(bc <<< "$perc_used < $exp_perc_used") \ "Expected > $exp_perc_used% of descriptors, handle $perc_used%" - stop_traffic - sleep 1 - log_test "Maximum descriptors usage. The percentage used is $perc_used%" } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index 6d892de43fa8..cd4a5c21360c 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -73,122 +73,114 @@ source qos_lib.sh h1_create() { simple_if_init $h1 192.0.2.65/28 - mtu_set $h1 10000 -} + defer simple_if_fini $h1 192.0.2.65/28 -h1_destroy() -{ - mtu_restore $h1 - simple_if_fini $h1 192.0.2.65/28 + mtu_set $h1 10000 + defer mtu_restore $h1 } h2_create() { simple_if_init $h2 + defer simple_if_fini $h2 + mtu_set $h2 10000 + defer mtu_restore $h2 vlan_create $h2 111 v$h2 192.0.2.129/28 + defer vlan_destroy $h2 111 ip link set dev $h2.111 type vlan egress-qos-map 0:1 } -h2_destroy() -{ - vlan_destroy $h2 111 - - mtu_restore $h2 - simple_if_fini $h2 -} - h3_create() { simple_if_init $h3 192.0.2.66/28 + defer simple_if_fini $h3 192.0.2.66/28 + mtu_set $h3 10000 + defer mtu_restore $h3 vlan_create $h3 111 v$h3 192.0.2.130/28 -} - -h3_destroy() -{ - vlan_destroy $h3 111 - - mtu_restore $h3 - simple_if_fini $h3 192.0.2.66/28 + defer vlan_destroy $h3 111 } switch_create() { ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 10000 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 10000 + defer mtu_restore $swp2 ip link set dev $swp3 up + defer ip link set dev $swp3 down + mtu_set $swp3 10000 + defer mtu_restore $swp3 vlan_create $swp2 111 + defer vlan_destroy $swp2 111 + vlan_create $swp3 111 + defer vlan_destroy $swp3 111 tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp3 root handle 3: + tc qdisc replace dev $swp3 parent 3:3 handle 33: \ prio bands 8 priomap 7 7 7 7 7 7 7 7 + defer tc qdisc del dev $swp3 parent 3:3 handle 33: ip link add name br1 type bridge vlan_filtering 0 + defer ip link del dev br1 ip link set dev br1 addrgenmode none ip link set dev br1 up + ip link set dev $swp1 master br1 + defer ip link set dev $swp1 nomaster + ip link set dev $swp3 master br1 + defer ip link set dev $swp3 nomaster ip link add name br111 type bridge vlan_filtering 0 + defer ip link del dev br111 ip link set dev br111 addrgenmode none ip link set dev br111 up + ip link set dev $swp2.111 master br111 + defer ip link set dev $swp2.111 nomaster + ip link set dev $swp3.111 master br111 + defer ip link set dev $swp3.111 nomaster # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + defer devlink_port_pool_th_restore $swp2 0 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + defer devlink_tc_bind_pool_th_restore $swp2 1 ingress devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 -} - -switch_destroy() -{ - devlink_port_pool_th_restore $swp3 4 - - devlink_tc_bind_pool_th_restore $swp2 1 ingress - devlink_port_pool_th_restore $swp2 0 - - devlink_tc_bind_pool_th_restore $swp1 0 ingress - devlink_port_pool_th_restore $swp1 0 - - ip link del dev br111 - ip link del dev br1 - - tc qdisc del dev $swp3 parent 3:3 handle 33: - tc qdisc del dev $swp3 root handle 3: - - vlan_destroy $swp3 111 - vlan_destroy $swp2 111 - - mtu_restore $swp3 - ip link set dev $swp3 down - - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer devlink_port_pool_th_restore $swp3 4 } setup_prepare() @@ -205,6 +197,7 @@ setup_prepare() h3mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -212,45 +205,45 @@ setup_prepare() switch_create } -cleanup() +ping_ipv4() { - pre_cleanup + ping_test $h2 192.0.2.130 +} - switch_destroy - h3_destroy - h2_destroy - h1_destroy +__run_uc_measure_rate() +{ + local what=$1; shift + local -a uc_rate + + start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! + + uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "$what")) + check_err $? "Could not get high enough $what ingress rate" - vrf_cleanup + echo ${uc_rate[@]} } -ping_ipv4() +run_uc_measure_rate() { - ping_test $h2 192.0.2.130 + in_defer_scope __run_uc_measure_rate "$@" } test_mc_aware() { RET=0 - local -a uc_rate - start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac - uc_rate=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC-only")) - check_err $? "Could not get high enough UC-only ingress rate" - stop_traffic + local -a uc_rate=($(run_uc_measure_rate "UC-only")) local ucth1=${uc_rate[1]} start_traffic $h1 192.0.2.65 bc bc + defer stop_traffic $! local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) - local -a uc_rate_2 - start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac - uc_rate_2=($(measure_rate $swp2 $h3 rx_octets_prio_1 "UC+MC")) - check_err $? "Could not get high enough UC+MC ingress rate" - stop_traffic + local -a uc_rate_2=($(run_uc_measure_rate "UC+MC")) local ucth2=${uc_rate_2[1]} local d1=$(date +%s) @@ -272,8 +265,6 @@ test_mc_aware() local mc_ir=$(rate $u0 $u1 $interval) local mc_er=$(rate $t0 $t1 $interval) - stop_traffic - log_test "UC performance under MC overload" echo "UC-only throughput $(humanize $ucth1)" @@ -297,6 +288,7 @@ test_uc_aware() RET=0 start_traffic $h2.111 192.0.2.129 192.0.2.130 $h3mac + defer stop_traffic $! local d0=$(date +%s) local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) @@ -326,8 +318,6 @@ test_uc_aware() ((attempts == passes)) check_err $? - stop_traffic - log_test "MC performance under UC overload" echo " ingress UC throughput $(humanize ${uc_ir})" echo " egress UC throughput $(humanize ${uc_er})" diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh index 893a693ad805..45a569618424 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh @@ -186,10 +186,7 @@ bridge_vlan_flags_test() # If we did not handle references correctly, then this should produce a # trace - devlink dev reload "$DEVLINK_DEV" - - # Allow netdevices to be re-created following the reload - sleep 20 + devlink_reload log_test "bridge vlan flags" } @@ -923,12 +920,9 @@ devlink_reload_test() # devlink reload can be performed without errors RET=0 - devlink dev reload "$DEVLINK_DEV" - check_err $? "devlink reload failed" + devlink_reload log_test "devlink reload - last test" - - sleep 20 } trap cleanup EXIT diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 139175fd03e7..4aaceb6b2b60 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -21,6 +21,7 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \ burst 128K limit 1G + defer tc qdisc del dev $swp2 root handle 3: ets_switch_create @@ -30,16 +31,27 @@ switch_create() # for the DWRR process. devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + defer devlink_port_pool_th_restore $swp1 0 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + defer devlink_tc_bind_pool_th_restore $swp1 0 ingress + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + defer devlink_port_pool_th_restore $swp2 4 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 7 egress + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 6 egress + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 + defer devlink_tc_bind_pool_th_restore $swp2 5 egress # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet # priorities at $swp1 based on their 802.1p headers. ingress-qos-map is @@ -47,20 +59,6 @@ switch_create() # 1:1, which is the mapping currently hard-coded by the driver. } -switch_destroy() -{ - devlink_tc_bind_pool_th_restore $swp2 5 egress - devlink_tc_bind_pool_th_restore $swp2 6 egress - devlink_tc_bind_pool_th_restore $swp2 7 egress - devlink_port_pool_th_restore $swp2 4 - devlink_tc_bind_pool_th_restore $swp1 0 ingress - devlink_port_pool_th_restore $swp1 0 - - ets_switch_destroy - - tc qdisc del dev $swp2 root handle 3: -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 299e06a5808c..537d6baa77b7 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -75,6 +75,18 @@ source $lib_dir/lib.sh source $lib_dir/devlink_lib.sh source mlxsw_lib.sh +stop_traffic_sleep() +{ + local pid=$1; shift + + # Issuing a kill still leaves a bunch of packets lingering in the + # buffers. This traffic then arrives at the point where a follow-up test + # is already running, and can confuse the test. Therefore sleep after + # stopping traffic to flush any leftover packets. + stop_traffic "$pid" + sleep 1 +} + ipaddr() { local host=$1; shift @@ -89,39 +101,31 @@ host_create() local host=$1; shift simple_if_init $dev + defer simple_if_fini $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 + tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact # Some of the tests in this suite use multicast traffic. As this traffic # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus @@ -137,15 +141,9 @@ h2_create() # Prevent this by adding a shaper which limits the traffic in $h2 to # 1Gbps. - tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \ + tc qdisc replace dev $h2 root handle 10: tbf rate 200mbit \ burst 128K limit 1G -} - -h2_destroy() -{ - tc qdisc del dev $h2 root handle 10: - tc qdisc del dev $h2 clsact - host_destroy $h2 + defer tc qdisc del dev $h2 root handle 10: } h3_create() @@ -153,40 +151,54 @@ h3_create() host_create $h3 3 } -h3_destroy() -{ - host_destroy $h3 -} - switch_create() { local intf local vlan ip link add dev br1_10 type bridge + defer ip link del dev br1_10 + ip link add dev br1_11 type bridge + defer ip link del dev br1_11 ip link add dev br2_10 type bridge + defer ip link del dev br2_10 + ip link add dev br2_11 type bridge + defer ip link del dev br2_11 for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf done for intf in $swp1 $swp4; do for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br1_$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up done done for intf in $swp2 $swp3 $swp5; do for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br2_$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan up done done @@ -199,51 +211,27 @@ switch_create() done for intf in $swp3 $swp4; do - tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \ + tc qdisc replace dev $intf root handle 1: tbf rate 200mbit \ burst 128K limit 1G + defer tc qdisc del dev $intf root handle 1: done ip link set dev br1_10 up + defer ip link set dev br1_10 down + ip link set dev br1_11 up + defer ip link set dev br1_11 down + ip link set dev br2_10 up + defer ip link set dev br2_10 down + ip link set dev br2_11 up + defer ip link set dev br2_11 down local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size -} - -switch_destroy() -{ - local intf - local vlan - - devlink_port_pool_th_restore $swp3 8 - - ip link set dev br2_11 down - ip link set dev br2_10 down - ip link set dev br1_11 down - ip link set dev br1_10 down - - for intf in $swp4 $swp3; do - tc qdisc del dev $intf root handle 1: - done - - for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br2_11 - ip link del dev br2_10 - ip link del dev br1_11 - ip link del dev br1_10 + defer devlink_port_pool_th_restore $swp3 8 } setup_prepare() @@ -263,6 +251,7 @@ setup_prepare() h3_mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -270,18 +259,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10" @@ -372,6 +349,7 @@ build_backlog() local i=0 while :; do + sleep 1 local cur=$(busywait 1100 until_counter_is "> $cur" \ get_qdisc_backlog $vlan) local diff=$((size - cur)) @@ -449,6 +427,7 @@ __do_ecn_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! sleep 1 ecn_test_common "$name" "$get_nmarked" $vlan $limit @@ -460,9 +439,6 @@ __do_ecn_test() build_backlog $vlan $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_test() @@ -470,7 +446,8 @@ do_ecn_test() local vlan=$1; shift local limit=$1; shift - __do_ecn_test get_nmarked "$vlan" "$limit" + in_defer_scope \ + __do_ecn_test get_nmarked "$vlan" "$limit" } do_ecn_test_perband() @@ -479,10 +456,11 @@ do_ecn_test_perband() local limit=$1; shift mlxsw_only_on_spectrum 3+ || return - __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" + in_defer_scope \ + __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN" } -do_ecn_nodrop_test() +__do_ecn_nodrop_test() { local vlan=$1; shift local limit=$1; shift @@ -490,6 +468,7 @@ do_ecn_nodrop_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! sleep 1 ecn_test_common "$name" get_nmarked $vlan $limit @@ -501,12 +480,15 @@ do_ecn_nodrop_test() build_backlog $vlan $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped" +} - stop_traffic - sleep 1 +do_ecn_nodrop_test() +{ + in_defer_scope \ + __do_ecn_nodrop_test "$@" } -do_red_test() +__do_red_test() { local vlan=$1; shift local limit=$1; shift @@ -517,6 +499,7 @@ do_red_test() # is above limit. start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! # Pushing below the queue limit should work. RET=0 @@ -532,17 +515,21 @@ do_red_test() check_fail $? "Traffic went into backlog instead of being early-dropped" pct=$(check_marking get_nmarked $vlan "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + backlog=$(get_qdisc_backlog $vlan) local diff=$((limit - backlog)) pct=$((100 * diff / limit)) - ((-10 <= pct && pct <= 10)) - check_err $? "backlog $backlog / $limit expected <= 10% distance" + ((-15 <= pct && pct <= 15)) + check_err $? "backlog $backlog / $limit expected <= 15% distance" log_test "TC $((vlan - 10)): RED backlog > limit" +} - stop_traffic - sleep 1 +do_red_test() +{ + in_defer_scope \ + __do_red_test "$@" } -do_mc_backlog_test() +__do_mc_backlog_test() { local vlan=$1; shift local limit=$1; shift @@ -552,7 +539,10 @@ do_mc_backlog_test() RET=0 start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! + start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc + defer stop_traffic_sleep $! qbl=$(busywait 5000 until_counter_is ">= 500000" \ get_qdisc_backlog $vlan) @@ -565,13 +555,16 @@ do_mc_backlog_test() get_mc_transmit_queue $vlan) check_err $? "MC backlog reported by qdisc not visible in ethtool" - stop_traffic - stop_traffic - log_test "TC $((vlan - 10)): Qdisc reports MC backlog" } -do_mark_test() +do_mc_backlog_test() +{ + in_defer_scope \ + __do_mc_backlog_test "$@" +} + +__do_mark_test() { local vlan=$1; shift local limit=$1; shift @@ -586,6 +579,7 @@ do_mark_test() start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \ $h3_mac tos=0x01 + defer stop_traffic_sleep $! # Create a bit of a backlog and observe no mirroring due to marks. qevent_rule_install_$subtest @@ -600,7 +594,7 @@ do_mark_test() # Above limit, everything should be mirrored, we should see lots of # packets. build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null - busywait_for_counter 1100 +10000 \ + busywait_for_counter 1100 +2500 \ $fetch_counter > /dev/null check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd" @@ -615,12 +609,15 @@ do_mark_test() else log_test "TC $((vlan - 10)): marked packets $subtest'd" fi +} - stop_traffic - sleep 1 +do_mark_test() +{ + in_defer_scope \ + __do_mark_test "$@" } -do_drop_test() +__do_drop_test() { local vlan=$1; shift local limit=$1; shift @@ -635,6 +632,7 @@ do_drop_test() RET=0 start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac + defer stop_traffic_sleep $! # Create a bit of a backlog and observe no mirroring due to drops. qevent_rule_install_$subtest @@ -651,25 +649,30 @@ do_drop_test() build_backlog $vlan $((3 * limit / 2)) udp >/dev/null base=$($fetch_counter) - send_packets $vlan udp 11 + send_packets $vlan udp 100 - now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter) - check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen" + now=$(busywait 1100 until_counter_is ">= $((base + 95))" $fetch_counter) + check_err $? "${trigger}ped packets not observed: 100 expected, $((now - base)) seen" # When no extra traffic is injected, there should be no mirroring. - busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null + busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter >/dev/null check_fail $? "Spurious packets observed" # When the rule is uninstalled, there should be no mirroring. qevent_rule_uninstall_$subtest - send_packets $vlan udp 11 - busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null - check_fail $? "Spurious packets observed after uninstall" + send_packets $vlan udp 100 + now=$(busywait 1100 until_counter_is ">= $((base + 110))" \ + $fetch_counter) + check_fail $? "$((now - base)) spurious packets observed after uninstall" log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd" +} - stop_traffic - sleep 1 +do_drop_test() +{ + in_defer_scope \ + __do_drop_test "$@" } qevent_rule_install_mirror() diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh index 8ecddafa79b3..8902a115d9cd 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh @@ -20,8 +20,8 @@ source sch_red_core.sh # $BACKLOG2 are far enough not to overlap, so that we can assume that if we do # see (do not see) marking, it is actually due to the configuration of that one # TC, and not due to configuration of the other TC leaking over. -BACKLOG1=200000 -BACKLOG2=500000 +BACKLOG1=400000 +BACKLOG2=1000000 install_root_qdisc() { @@ -35,7 +35,7 @@ install_qdisc_tc0() tc qdisc add dev $swp3 parent 10:8 handle 108: red \ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \ - probability 1.0 avpkt 8000 burst 38 "${args[@]}" + probability 1.0 avpkt 8000 burst 51 "${args[@]}" } install_qdisc_tc1() @@ -44,7 +44,7 @@ install_qdisc_tc1() tc qdisc add dev $swp3 parent 10:7 handle 107: red \ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \ - probability 1.0 avpkt 8000 burst 63 "${args[@]}" + probability 1.0 avpkt 8000 burst 126 "${args[@]}" } install_qdisc() @@ -80,36 +80,34 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc do_ecn_test 10 $BACKLOG1 do_ecn_test 11 $BACKLOG2 - - uninstall_qdisc } ecn_test_perband() { install_qdisc ecn + defer uninstall_qdisc do_ecn_test_perband 10 $BACKLOG1 do_ecn_test_perband 11 $BACKLOG2 - - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc do_ecn_nodrop_test 10 $BACKLOG1 do_ecn_nodrop_test 11 $BACKLOG2 - - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc # Make sure that we get the non-zero value if there is any. local cur=$(busywait 1100 until_counter_is "> 0" \ @@ -120,50 +118,44 @@ red_test() do_red_test 10 $BACKLOG1 do_red_test 11 $BACKLOG2 - - uninstall_qdisc } mc_backlog_test() { install_qdisc + defer uninstall_qdisc # Note that the backlog numbers here do not correspond to RED # configuration, but are arbitrary. do_mc_backlog_test 10 $BACKLOG1 do_mc_backlog_test 11 $BACKLOG2 - - uninstall_qdisc } red_mirror_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc do_drop_mirror_test 10 $BACKLOG1 early_drop do_drop_mirror_test 11 $BACKLOG2 early_drop - - uninstall_qdisc } red_trap_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc do_drop_trap_test 10 $BACKLOG1 early_drop do_drop_trap_test 11 $BACKLOG2 early_drop - - uninstall_qdisc } ecn_mirror_test() { install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc do_mark_mirror_test 10 $BACKLOG1 do_mark_mirror_test 11 $BACKLOG2 - - uninstall_qdisc } bail_on_lldpad "configure DCB" "configure Qdiscs" diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh index 159108d02895..e9043771787b 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh @@ -32,45 +32,51 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc + do_ecn_test 10 $BACKLOG - uninstall_qdisc } ecn_test_perband() { install_qdisc ecn + defer uninstall_qdisc + do_ecn_test_perband 10 $BACKLOG - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc + do_ecn_nodrop_test 10 $BACKLOG - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc + do_red_test 10 $BACKLOG - uninstall_qdisc } mc_backlog_test() { install_qdisc + defer uninstall_qdisc + # Note that the backlog value here does not correspond to RED # configuration, but is arbitrary. do_mc_backlog_test 10 $BACKLOG - uninstall_qdisc } red_mirror_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc + do_drop_mirror_test 10 $BACKLOG - uninstall_qdisc } bail_on_lldpad "configure DCB" "configure Qdiscs" diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh index 83a0210e7544..bc7ea2df49fb 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh @@ -218,7 +218,7 @@ psample_capture_start() psample_capture_stop() { - { kill %% && wait %%; } 2>/dev/null + kill_process %% } __tc_sample_rate_test() @@ -499,7 +499,7 @@ tc_sample_md_out_tc_occ_test() backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]') # Kill mausezahn. - { kill %% && wait %%; } 2>/dev/null + kill_process %% psample_capture_stop diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index 06021b2059b7..b175f4d966e5 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -20,22 +20,26 @@ SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") # Simple script to test dynamic targets in netconsole SRCIF="" # to be populated later -SRCIP=192.168.1.1 +SRCIP=192.0.2.1 DSTIF="" # to be populated later -DSTIP=192.168.1.2 +DSTIP=192.0.2.2 PORT="6666" MSG="netconsole selftest" +USERDATA_KEY="key" +USERDATA_VALUE="value" TARGET=$(mktemp -u netcons_XXXXX) DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk) NETCONS_CONFIGFS="/sys/kernel/config/netconsole" NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" +KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" # IDs for netdevsim NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" # Used to create and delete namespaces source "${SCRIPTDIR}"/../../net/lib.sh @@ -43,7 +47,6 @@ source "${SCRIPTDIR}"/../../net/net_helper.sh # Create netdevsim interfaces create_ifaces() { - local NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" @@ -122,6 +125,8 @@ function cleanup() { # delete netconsole dynamic reconfiguration echo 0 > "${NETCONS_PATH}"/enabled + # Remove key + rmdir "${KEY_PATH}" # Remove the configfs entry rmdir "${NETCONS_PATH}" @@ -136,6 +141,18 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function set_user_data() { + if [[ ! -d "${NETCONS_PATH}""/userdata" ]] + then + echo "Userdata path not available in ${NETCONS_PATH}/userdata" + exit "${ksft_skip}" + fi + + mkdir -p "${KEY_PATH}" + VALUE_PATH="${KEY_PATH}""/value" + echo "${USERDATA_VALUE}" > "${VALUE_PATH}" +} + function listen_port_and_save_to() { local OUTPUT=${1} # Just wait for 2 seconds @@ -146,6 +163,10 @@ function listen_port_and_save_to() { function validate_result() { local TMPFILENAME="$1" + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -158,6 +179,12 @@ function validate_result() { exit "${ksft_fail}" fi + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi + # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" @@ -185,6 +212,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then + echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -d "${NETCONS_CONFIGFS}" ]; then echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2 exit "${ksft_skip}" @@ -220,6 +252,8 @@ trap cleanup EXIT set_network # Create a dynamic target for netconsole create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data # Listed for netconsole port inside the namespace and destination interface listen_port_and_save_to "${OUTPUT_FILE}" & # Wait for socat to start and listen to the port. diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index 5bace0b7fb57..07b7c46d3311 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -4,11 +4,14 @@ TEST_PROGS = devlink.sh \ devlink_in_netns.sh \ devlink_trap.sh \ ethtool-coalesce.sh \ + ethtool-features.sh \ ethtool-fec.sh \ ethtool-pause.sh \ ethtool-ring.sh \ fib.sh \ + fib_notifications.sh \ hw_stats_l3.sh \ + macsec-offload.sh \ nexthop.sh \ peer.sh \ psample.sh \ diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config index adf45a3a78b4..5117c78ddf0a 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/config +++ b/tools/testing/selftests/drivers/net/netdevsim/config @@ -1,6 +1,7 @@ CONFIG_DUMMY=y CONFIG_GENEVE=m CONFIG_IPV6=y +CONFIG_MACSEC=m CONFIG_NETDEVSIM=m CONFIG_NET_SCH_MQPRIO=y CONFIG_NET_SCH_MULTIQ=y diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh new file mode 100644 index 000000000000..bc210dc6ad2d --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-features.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) + +set -o pipefail + +FEATS=" + tx-checksum-ip-generic + tx-scatter-gather + tx-tcp-segmentation + generic-segmentation-offload + generic-receive-offload" + +for feat in $FEATS ; do + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".active" 2>/dev/null) + check $? "$s" true + + s=$(ethtool --json -k $NSIM_NETDEV | jq ".[].\"$feat\".fixed" 2>/dev/null) + check $? "$s" false +done + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh index 8d91191a098c..9896580c3d85 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh @@ -94,7 +94,7 @@ route_addition_check() sleep 1 $IP route add $route dev dummy1 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications $offload_failed rm -f $outfile @@ -148,7 +148,7 @@ route_deletion_check() sleep 1 $IP route del $route dev dummy1 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications rm -f $outfile @@ -191,7 +191,7 @@ route_replacement_check() sleep 1 $IP route replace $route dev dummy2 sleep 1 - kill %% && wait %% &> /dev/null + kill_process %% route_notify_check $outfile $expected_num_notifications rm -f $outfile diff --git a/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh new file mode 100755 index 000000000000..98033e6667d2 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/macsec-offload.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +source ethtool-common.sh + +NSIM_NETDEV=$(make_netdev) +MACSEC_NETDEV=macsec_nsim + +set -o pipefail + +if ! ethtool -k $NSIM_NETDEV | grep -q 'macsec-hw-offload: on'; then + echo "SKIP: netdevsim doesn't support MACsec offload" + exit 4 +fi + +if ! ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac 2>/dev/null; then + echo "SKIP: couldn't create macsec device" + exit 4 +fi +ip link del $MACSEC_NETDEV + +# +# test macsec offload API +# + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}" type macsec port 4 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}2" type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}3" type macsec sci abbacdde01020304 offload mac +check $? + +ip link add link $NSIM_NETDEV "${MACSEC_NETDEV}4" type macsec port 8 offload mac 2> /dev/null +check $? '' '' 1 + +ip macsec add "${MACSEC_NETDEV}" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ + key 00 0123456789abcdef0123456789abcdef +check $? + +ip macsec add "${MACSEC_NETDEV}" rx port 1235 address "1c:ed:de:ad:be:ef" 2> /dev/null +check $? '' '' 1 + +# can't disable macsec offload when SAs are configured +ip link set "${MACSEC_NETDEV}" type macsec offload off 2> /dev/null +check $? '' '' 1 + +ip macsec offload "${MACSEC_NETDEV}" off 2> /dev/null +check $? '' '' 1 + +# toggle macsec offload via rtnetlink +ip link set "${MACSEC_NETDEV}2" type macsec offload off +check $? + +ip link set "${MACSEC_NETDEV}2" type macsec offload mac +check $? + +# toggle macsec offload via genetlink +ip macsec offload "${MACSEC_NETDEV}2" off +check $? + +ip macsec offload "${MACSEC_NETDEV}2" mac +check $? + +for dev in ${MACSEC_NETDEV}{,2,3} ; do + ip link del $dev + check $? +done + + +# +# test ethtool features when toggling offload +# + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload off +TMP_FEATS_OFF_1="$(ethtool -k $MACSEC_NETDEV)" + +ip link set $MACSEC_NETDEV type macsec offload mac +TMP_FEATS_ON_2="$(ethtool -k $MACSEC_NETDEV)" + +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_2" ] +check $? + +ip link del $MACSEC_NETDEV + +ip link add link $NSIM_NETDEV $MACSEC_NETDEV type macsec +check $? + +TMP_FEATS_OFF_2="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_OFF_1" = "$TMP_FEATS_OFF_2" ] +check $? + +ip link set $MACSEC_NETDEV type macsec offload mac +check $? + +TMP_FEATS_ON_3="$(ethtool -k $MACSEC_NETDEV)" +[ "$TMP_FEATS_ON_1" = "$TMP_FEATS_ON_3" ] +check $? + + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/shaper.py b/tools/testing/selftests/drivers/net/shaper.py new file mode 100755 index 000000000000..11310f19bfa0 --- /dev/null +++ b/tools/testing/selftests/drivers/net/shaper.py @@ -0,0 +1,461 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_true, KsftSkipEx +from lib.py import EthtoolFamily, NetshaperFamily +from lib.py import NetDrvEnv +from lib.py import NlError +from lib.py import cmd + +def get_shapers(cfg, nl_shaper) -> None: + try: + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Default configuration: no shapers configured. + ksft_eq(len(shapers), 0) + +def get_caps(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex}, dump=True) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + + # Each device implementing shaper support must support some + # features in at least a scope. + ksft_true(len(caps)> 0) + +def set_qshapers(cfg, nl_shaper) -> None: + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support queue scope shapers with bw_max and metric bps") + + cfg.queues = True; + netnl = EthtoolFamily() + channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + if channels['combined-count'] == 0: + cfg.rx_type = 'rx' + cfg.nr_queues = channels['rx-count'] + else: + cfg.rx_type = 'combined' + cfg.nr_queues = channels['combined-count'] + if cfg.nr_queues < 3: + raise KsftSkipEx(f"device does not support enough queues min 3 found {cfg.nr_queues}") + + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}) + + # Querying a specific shaper not yet configured must fail. + raised = False + try: + shaper_q0 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 0}}) + except (NlError): + raised = True + ksft_eq(raised, True) + + shaper_q1 = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper_q1, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 20000}]) + +def del_qshapers(cfg, nl_shaper) -> None: + if not cfg.queues: + raise KsftSkipEx("queue shapers not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def set_nshapers(cfg, nl_shaper) -> None: + # Check required features. + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'netdev'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested netdev scope shapers with weight") + + cfg.netdev = True; + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev', 'id': 0}, + 'bw-max': 100000}) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 100000}]) + +def del_nshapers(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by device, skipping delete") + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def basic_groups(cfg, nl_shaper) -> None: + if not cfg.netdev: + raise KsftSkipEx("netdev shaper not supported by the device") + if cfg.nr_queues < 3: + raise KsftSkipEx(f"netdev does not have enough queues min 3 reported {cfg.nr_queues}") + + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-weight' in caps: + raise KsftSkipEx("device does not support queue scope shapers with weight") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 1 }) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting all the leaves shaper does not affect the node one + # when the latter has 'netdev' scope. + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 1) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'netdev'}}) + +def qgroups(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-bw-max' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support node scope shapers with bw_max and metric bps") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'queue'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("shapers not supported by the device") + raise + if not 'support-nesting' in caps or not 'support-weight' in caps or not 'support-metric-bps' in caps: + raise KsftSkipEx("device does not support nested queue scope shapers with weight") + + cfg.groups = True; + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}) + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}, + 'parent': {'scope': 'netdev'}, + 'metric': 'bps', + 'bw-max': 10000}) + + # Grouping to a specified, not existing node scope shaper must fail + raised = False + try: + nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 3}], + 'handle': {'scope':'node', 'id': node_id + 1}, + 'metric': 'bps', + 'bw-max': 10000}) + + except (NlError): + raised = True + ksft_eq(raised, True) + + # Add to an existing node + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}], + 'handle': {'scope':'node', 'id': node_id}}) + ksft_eq(node_handle, {'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + + shaper = nl_shaper.get({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + ksft_eq(shaper, {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}) + + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 2}}) + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 1}}) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 4}]) + + # Finish and verify the complete cleanup. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': 3}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def delegation(cfg, nl_shaper) -> None: + if not cfg.groups: + raise KsftSkipEx("device does not support node scope") + try: + caps = nl_shaper.cap_get({'ifindex': cfg.ifindex, + 'scope':'node'}) + except NlError as e: + if e.error == 95: + raise KsftSkipEx("node scope shapers not supported by the device") + raise + if not 'support-nesting' in caps: + raise KsftSkipEx("device does not support node scope shapers nesting") + + node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 10000}) + node_id = node_handle['handle']['id'] + + # Create the nested node and validate the hierarchy + nested_node_handle = nl_shaper.group({ + 'ifindex': cfg.ifindex, + 'leaves':[{'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}], + 'handle': {'scope':'node'}, + 'metric': 'bps', + 'bw-max': 5000}) + nested_node_id = nested_node_handle['handle']['id'] + ksft_true(nested_node_id != node_id) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': nested_node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'node', 'id': nested_node_id}, + 'metric': 'bps', + 'bw-max': 5000}]) + + # Deleting a non empty node will move the leaves downstream. + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'node', 'id': nested_node_id}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 1}, + 'weight': 3}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 2}, + 'weight': 2}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'node', 'id': node_id}, + 'handle': {'scope': 'queue', 'id': 3}, + 'weight': 1}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'node', 'id': node_id}, + 'metric': 'bps', + 'bw-max': 10000}]) + + # Final cleanup. + for i in range(1, 4): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(len(shapers), 0) + +def queue_update(cfg, nl_shaper) -> None: + if cfg.nr_queues < 4: + raise KsftSkipEx(f"netdev does not have enough queues min 4 reported {cfg.nr_queues}") + if not cfg.queues: + raise KsftSkipEx("device does not support queue scope") + + for i in range(3): + nl_shaper.set({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}, + 'metric': 'bps', + 'bw-max': (i + 1) * 1000}) + # Delete a channel, with no shapers configured on top of the related + # queue: no changes expected + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 3", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 2}, + 'metric': 'bps', + 'bw-max': 3000}]) + + # Delete a channel, with a shaper configured on top of the related + # queue: the shaper must be deleted, too + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} 2", timeout=10) + + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Restore the original channels number, no expected changes + cmd(f"ethtool -L {cfg.dev['ifname']} {cfg.rx_type} {cfg.nr_queues}", timeout=10) + shapers = nl_shaper.get({'ifindex': cfg.ifindex}, dump=True) + ksft_eq(shapers, [{'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 0}, + 'metric': 'bps', + 'bw-max': 1000}, + {'ifindex': cfg.ifindex, + 'parent': {'scope': 'netdev'}, + 'handle': {'scope': 'queue', 'id': 1}, + 'metric': 'bps', + 'bw-max': 2000}]) + + # Final cleanup. + for i in range(0, 2): + nl_shaper.delete({'ifindex': cfg.ifindex, + 'handle': {'scope': 'queue', 'id': i}}) + +def main() -> None: + with NetDrvEnv(__file__, queue_count=4) as cfg: + cfg.queues = False + cfg.netdev = False + cfg.groups = False + cfg.nr_queues = 0 + ksft_run([get_shapers, + get_caps, + set_qshapers, + del_qshapers, + set_nshapers, + del_nshapers, + basic_groups, + qgroups, + delegation, + queue_update], args=(cfg, NetshaperFamily())) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 90c238ba6a4b..a0dc5d4bf733 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -9,7 +9,8 @@ execveat.ephemeral execveat.denatured non-regular null-argv -/load_address_* +/load_address.* +!load_address.c /recursion-depth xxxxxxxx* pipe diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index f0c0ff20d6cf..828b66a10c63 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +file_stressor diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index c647fd6a0446..66305fc34c60 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts +TEST_GEN_PROGS := devpts_pts file_stressor TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/file_stressor.c b/tools/testing/selftests/filesystems/file_stressor.c new file mode 100644 index 000000000000..1136f93a9977 --- /dev/null +++ b/tools/testing/selftests/filesystems/file_stressor.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <limits.h> +#include <pthread.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../kselftest_harness.h" + +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, + const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static inline int sys_fsmount(int fd, unsigned int flags, + unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +FIXTURE(file_stressor) { + int fd_tmpfs; + int nr_procs; + int max_fds; + pid_t *pids_openers; + pid_t *pids_getdents; + int *fd_proc_pid; +}; + +FIXTURE_SETUP(file_stressor) +{ + int fd_context; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + ASSERT_EQ(mkdir("/slab_typesafe_by_rcu", 0755), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + self->fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(self->fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + ASSERT_EQ(sys_move_mount(self->fd_tmpfs, "", -EBADF, "/slab_typesafe_by_rcu", MOVE_MOUNT_F_EMPTY_PATH), 0); + + self->nr_procs = sysconf(_SC_NPROCESSORS_ONLN); + self->pids_openers = malloc(sizeof(pid_t) * self->nr_procs); + ASSERT_NE(self->pids_openers, NULL); + self->pids_getdents = malloc(sizeof(pid_t) * self->nr_procs); + ASSERT_NE(self->pids_getdents, NULL); + self->fd_proc_pid = malloc(sizeof(int) * self->nr_procs); + ASSERT_NE(self->fd_proc_pid, NULL); + self->max_fds = 500; +} + +FIXTURE_TEARDOWN(file_stressor) +{ + for (int i = 0; i < self->nr_procs; i++) { + int wstatus; + pid_t pid; + + pid = waitpid(self->pids_openers[i], &wstatus, 0); + ASSERT_EQ(pid, self->pids_openers[i]); + ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus)); + + pid = waitpid(self->pids_getdents[i], &wstatus, 0); + ASSERT_EQ(pid, self->pids_getdents[i]); + ASSERT_TRUE(!WIFEXITED(wstatus) || !WIFSIGNALED(wstatus)); + } + free(self->pids_openers); + free(self->pids_getdents); + ASSERT_EQ(close(self->fd_tmpfs), 0); + + umount2("/slab_typesafe_by_rcu", 0); + ASSERT_EQ(rmdir("/slab_typesafe_by_rcu"), 0); +} + +TEST_F_TIMEOUT(file_stressor, slab_typesafe_by_rcu, 900 * 2) +{ + for (int i = 0; i < self->nr_procs; i++) { + pid_t pid_self; + + self->pids_openers[i] = fork(); + ASSERT_GE(self->pids_openers[i], 0); + + if (self->pids_openers[i] != 0) + continue; + + self->pids_openers[i] = getpid(); + for (;;) { + for (int i = 0; i < self->max_fds; i++) { + char path[PATH_MAX]; + int fd; + + sprintf(path, "/slab_typesafe_by_rcu/file-%d-%d", self->pids_openers[i], i); + fd = open(path, O_CREAT | O_RDONLY | O_CLOEXEC, 0644); + if (fd < 0) + continue; + } + + close_range(3, ~0U, 0); + } + + exit(0); + } + + for (int i = 0; i < self->nr_procs; i++) { + char path[PATH_MAX]; + + sprintf(path, "/proc/%d/fd/", self->pids_openers[i]); + self->fd_proc_pid[i] = open(path, O_DIRECTORY | O_RDONLY | O_CLOEXEC); + ASSERT_GE(self->fd_proc_pid[i], 0); + } + + for (int i = 0; i < self->nr_procs; i++) { + self->pids_getdents[i] = fork(); + ASSERT_GE(self->pids_getdents[i], 0); + + if (self->pids_getdents[i] != 0) + continue; + + self->pids_getdents[i] = getpid(); + for (;;) { + char ents[1024]; + ssize_t nr_read; + + /* + * Concurrently read /proc/<pid>/fd/ which rougly does: + * + * f = fget_task_next(p, &fd); + * if (!f) + * break; + * data.mode = f->f_mode; + * fput(f); + * + * Which means that it'll try to get a reference to a + * file in another task's file descriptor table. + * + * Under heavy file load it is increasingly likely that + * the other task will manage to close @file and @file + * is being recycled due to SLAB_TYPEAFE_BY_RCU + * concurrently. This will trigger various warnings in + * the file reference counting code. + */ + do { + nr_read = syscall(SYS_getdents64, self->fd_proc_pid[i], ents, sizeof(ents)); + } while (nr_read >= 0); + + lseek(self->fd_proc_pid[i], 0, SEEK_SET); + } + + exit(0); + } + + ASSERT_EQ(clock_nanosleep(CLOCK_MONOTONIC, 0, &(struct timespec){ .tv_sec = 900 /* 15 min */ }, NULL), 0); + + for (int i = 0; i < self->nr_procs; i++) { + kill(self->pids_openers[i], SIGKILL); + kill(self->pids_getdents[i], SIGKILL); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore index 52ae618fdd98..e23a18c8b37f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/.gitignore +++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only dev_in_maps +set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 56b2b48a765b..e8d1adb021af 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -TEST_GEN_PROGS := dev_in_maps +TEST_GEN_PROGS := dev_in_maps set_layers_via_fds CFLAGS := -Wall -Werror diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 2862aae58b79..3b796264223f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -17,32 +17,7 @@ #include "../../kselftest.h" #include "log.h" - -static int sys_fsopen(const char *fsname, unsigned int flags) -{ - return syscall(__NR_fsopen, fsname, flags); -} - -static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux) -{ - return syscall(__NR_fsconfig, fd, cmd, key, value, aux); -} - -static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags) -{ - return syscall(__NR_fsmount, fd, flags, attr_flags); -} -static int sys_mount(const char *src, const char *tgt, const char *fst, - unsigned long flags, const void *data) -{ - return syscall(__NR_mount, src, tgt, fst, flags, data); -} -static int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags); -} +#include "wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c new file mode 100644 index 000000000000..1d0ae785a667 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ // Use ll64 + +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" +#include "log.h" +#include "wrappers.h" + +FIXTURE(set_layers_via_fds) { +}; + +FIXTURE_SETUP(set_layers_via_fds) +{ + ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0); +} + +FIXTURE_TEARDOWN(set_layers_via_fds) +{ + umount2("/set_layers_via_fds", 0); + ASSERT_EQ(rmdir("/set_layers_via_fds"), 0); +} + +TEST_F(set_layers_via_fds, set_layers_via_fds) +{ + int fd_context, fd_tmpfs, fd_overlay; + int layer_fds[] = { [0 ... 8] = -EBADF }; + bool layers_found[] = { [0 ... 8] = false }; + size_t len = 0; + char *line = NULL; + FILE *f_mountinfo; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0); + + layer_fds[0] = openat(fd_tmpfs, "w", O_DIRECTORY); + ASSERT_GE(layer_fds[0], 0); + + layer_fds[1] = openat(fd_tmpfs, "u", O_DIRECTORY); + ASSERT_GE(layer_fds[1], 0); + + layer_fds[2] = openat(fd_tmpfs, "l1", O_DIRECTORY); + ASSERT_GE(layer_fds[2], 0); + + layer_fds[3] = openat(fd_tmpfs, "l2", O_DIRECTORY); + ASSERT_GE(layer_fds[3], 0); + + layer_fds[4] = openat(fd_tmpfs, "l3", O_DIRECTORY); + ASSERT_GE(layer_fds[4], 0); + + layer_fds[5] = openat(fd_tmpfs, "l4", O_DIRECTORY); + ASSERT_GE(layer_fds[5], 0); + + layer_fds[6] = openat(fd_tmpfs, "d1", O_DIRECTORY); + ASSERT_GE(layer_fds[6], 0); + + layer_fds[7] = openat(fd_tmpfs, "d2", O_DIRECTORY); + ASSERT_GE(layer_fds[7], 0); + + layer_fds[8] = openat(fd_tmpfs, "d3", O_DIRECTORY); + ASSERT_GE(layer_fds[8], 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[0]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[1]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + + ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0); + + f_mountinfo = fopen("/proc/self/mountinfo", "r"); + ASSERT_NE(f_mountinfo, NULL); + + while (getline(&line, &len, f_mountinfo) != -1) { + char *haystack = line; + + if (strstr(haystack, "workdir=/tmp/w")) + layers_found[0] = true; + if (strstr(haystack, "upperdir=/tmp/u")) + layers_found[1] = true; + if (strstr(haystack, "lowerdir+=/tmp/l1")) + layers_found[2] = true; + if (strstr(haystack, "lowerdir+=/tmp/l2")) + layers_found[3] = true; + if (strstr(haystack, "lowerdir+=/tmp/l3")) + layers_found[4] = true; + if (strstr(haystack, "lowerdir+=/tmp/l4")) + layers_found[5] = true; + if (strstr(haystack, "datadir+=/tmp/d1")) + layers_found[6] = true; + if (strstr(haystack, "datadir+=/tmp/d2")) + layers_found[7] = true; + if (strstr(haystack, "datadir+=/tmp/d3")) + layers_found[8] = true; + } + free(line); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(layers_found[i], true); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); + ASSERT_EQ(fclose(f_mountinfo), 0); +} + +TEST_F(set_layers_via_fds, set_500_layers_via_fds) +{ + int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower; + int layer_fds[500] = { [0 ... 499] = -EBADF }; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + char path[100]; + + sprintf(path, "l%d", i); + ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0); + layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY); + ASSERT_GE(layer_fds[i], 0); + } + + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); + fd_work = openat(fd_tmpfs, "w", O_DIRECTORY); + ASSERT_GE(fd_work, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY); + ASSERT_GE(fd_upper, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0); + fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY); + ASSERT_GE(fd_lower, 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0); + ASSERT_EQ(close(fd_work), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0); + ASSERT_EQ(close(fd_upper), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0); + ASSERT_EQ(close(fd_lower), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h new file mode 100644 index 000000000000..071b95fd2ac0 --- /dev/null +++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-2.0 +// +#ifndef __SELFTEST_OVERLAYFS_WRAPPERS_H__ +#define __SELFTEST_OVERLAYFS_WRAPPERS_H__ + +#define _GNU_SOURCE + +#include <linux/types.h> +#include <linux/mount.h> +#include <sys/syscall.h> + +static inline int sys_fsopen(const char *fsname, unsigned int flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fsconfig(int fd, unsigned int cmd, const char *key, + const char *value, int aux) +{ + return syscall(__NR_fsconfig, fd, cmd, key, value, aux); +} + +static inline int sys_fsmount(int fd, unsigned int flags, + unsigned int attr_flags) +{ + return syscall(__NR_fsmount, fd, flags, attr_flags); +} + +static inline int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return syscall(__NR_mount, src, tgt, fst, flags, data); +} + +#ifndef MOVE_MOUNT_F_EMPTY_PATH +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#endif + +static inline int sys_move_mount(int from_dfd, const char *from_pathname, + int to_dfd, const char *to_pathname, + unsigned int flags) +{ + return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, + to_pathname, flags); +} + +#endif diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c index c773334bbcc9..8eb6aa606a0d 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c @@ -27,7 +27,7 @@ static const char *const known_fs[] = { "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos", "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2", "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs", - "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs", + "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem", "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs", "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf", diff --git a/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc new file mode 100644 index 000000000000..35e8d47d6072 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/00basic/mount_options.tc @@ -0,0 +1,101 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Test tracefs GID mount option +# requires: "[gid=<gid>]":README + +fail() { + local msg="$1" + + echo "FAILED: $msg" + exit_fail +} + +find_alternate_gid() { + local original_gid="$1" + tac /etc/group | grep -v ":$original_gid:" | head -1 | cut -d: -f3 +} + +mount_tracefs_with_options() { + local mount_point="$1" + local options="$2" + + mount -t tracefs -o "$options" nodev "$mount_point" + + setup +} + +unmount_tracefs() { + local mount_point="$1" + + # Need to make sure the mount isn't busy so that we can umount it + (cd $mount_point; finish_ftrace;) + + cleanup +} + +create_instance() { + local mount_point="$1" + local instance="$mount_point/instances/$(mktemp -u test-XXXXXX)" + + mkdir "$instance" + echo "$instance" +} + +remove_instance() { + local instance="$1" + + rmdir "$instance" +} + +check_gid() { + local mount_point="$1" + local expected_gid="$2" + + echo "Checking permission group ..." + + cd "$mount_point" + + for file in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable"; do + local gid=`stat -c "%g" $file` + if [ "$gid" -ne "$expected_gid" ]; then + cd - # Return to the previous working directory (tracefs root) + fail "$(realpath $file): Expected group $expected_gid; Got group $gid" + fi + done + + cd - # Return to the previous working directory (tracefs root) +} + +test_gid_mount_option() { + local mount_point=$(get_mount_point) + local mount_options=$(get_mnt_options "$mount_point") + local original_group=$(stat -c "%g" .) + local other_group=$(find_alternate_gid "$original_group") + + # Set up mount options with new GID for testing + local new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"` + if [ "$new_options" = "$mount_options" ]; then + new_options="$mount_options,gid=$other_group" + mount_options="$mount_options,gid=$original_group" + fi + + # Unmount existing tracefs instance and mount with new GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$new_options" + + check_gid "$mount_point" "$other_group" + + # Check that files created after the mount inherit the GID + local instance=$(create_instance "$mount_point") + check_gid "$instance" "$other_group" + remove_instance "$instance" + + # Unmount and remount with the original GID + unmount_tracefs "$mount_point" + mount_tracefs_with_options "$mount_point" "$mount_options" + check_gid "$mount_point" "$original_group" +} + +test_gid_mount_option + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc index 094419e190c2..e71cc3ad0bdf 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc @@ -1,24 +1,14 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 # description: Test file and directory ownership changes for eventfs +# requires: "[gid=<gid>]":README original_group=`stat -c "%g" .` original_owner=`stat -c "%u" .` -mount_point=`stat -c '%m' .` +local mount_point=$(get_mount_point) -# If stat -c '%m' does not work (e.g. busybox) or failed, try to use the -# current working directory (which should be a tracefs) as the mount point. -if [ ! -d "$mount_point" ]; then - if mount | grep -qw $PWD ; then - mount_point=$PWD - else - # If PWD doesn't work, that is an environmental problem. - exit_unresolved - fi -fi - -mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'` +mount_options=$(get_mnt_options "$mount_point") # find another owner and group that is not the original other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3` diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc new file mode 100644 index 000000000000..ffff8646733c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiler with function graph tracing +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer + +# The function graph tracer can now be run along side of the function +# profiler. But there was a bug that caused the combination of the two +# to crash. It also required the function graph tracer to be started +# first. +# +# This test triggers that bug +# +# We need both function_graph and profiling to run this test + +fail() { # mesg + echo $1 + exit_fail +} + +echo "Enabling function graph tracer:" +echo function_graph > current_tracer +echo "enable profiler" + +# Older kernels do not allow function_profile to be enabled with +# function graph tracer. If the below fails, mark it as unsupported +echo 1 > function_profile_enabled || exit_unsupported + +# Let it run for a bit to make sure nothing explodes +sleep 1 + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc index e34c0bdef3ed..4307d4eef417 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc @@ -29,7 +29,7 @@ set -e : "Test printing the error code in signed decimal format" echo 0 > options/funcgraph-retval-hex -count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l` +count=`cat trace | grep 'proc_reg_write' | grep -e '=-5 ' -e '= -5 ' | wc -l` if [ $count -eq 0 ]; then fail "Return value can not be printed in signed decimal format" fi diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions index 779f3e62ec90..84d6a9c7ad67 100644 --- a/tools/testing/selftests/ftrace/test.d/functions +++ b/tools/testing/selftests/ftrace/test.d/functions @@ -193,3 +193,28 @@ ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file # " Command: " and "^\n" => 13 test $(expr 13 + $pos) -eq $N } + +# Helper to get the tracefs mount point +get_mount_point() { + local mount_point=`stat -c '%m' .` + + # If stat -c '%m' does not work (e.g. busybox) or failed, try to use the + # current working directory (which should be a tracefs) as the mount point. + if [ ! -d "$mount_point" ]; then + if mount | grep -qw "$PWD"; then + mount_point=$PWD + else + # If PWD doesn't work, that is an environmental problem. + exit_unresolved + fi + fi + echo "$mount_point" +} + +# Helper function to retrieve mount options for a given mount point +get_mnt_options() { + local mnt_point="$1" + local opts=$(mount | grep -m1 "$mnt_point" | sed -e 's/.*(\(.*\)).*/\1/') + + echo "$opts" +}
\ No newline at end of file diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 72be55ac4bdf..0336353bd15f 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -17,6 +17,9 @@ TEST_PROGS += hid-tablet.sh TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh +TEST_FILES := run-hid-tools-tests.sh +TEST_FILES += tests + CXX ?= $(CROSS_COMPILE)g++ HOSTPKG_CONFIG := pkg-config @@ -229,7 +232,7 @@ $(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT) $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $< $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@ -$(OUTPUT)/%.o: %.c $(BPF_SKELS) +$(OUTPUT)/%.o: %.c $(BPF_SKELS) hid_common.h $(call msg,CC,,$@) $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@ diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index 86f4d66379f7..1e979fb3542b 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -4,13 +4,6 @@ #include "hid_common.h" #include <bpf/bpf.h> -struct attach_prog_args { - int prog_fd; - unsigned int hid; - int retval; - int insert_head; -}; - struct hid_hw_request_syscall_args { __u8 data[10]; unsigned int hid; @@ -21,11 +14,8 @@ struct hid_hw_request_syscall_args { }; FIXTURE(hid_bpf) { - int dev_id; - int uhid_fd; + struct uhid_device hid; int hidraw_fd; - int hid_id; - pthread_t tid; struct hid *skel; struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */ }; @@ -54,10 +44,10 @@ static void detach_bpf(FIXTURE_DATA(hid_bpf) * self) FIXTURE_TEARDOWN(hid_bpf) { void *uhid_err; - uhid_destroy(_metadata, self->uhid_fd); + uhid_destroy(_metadata, &self->hid); detach_bpf(self); - pthread_join(self->tid, &uhid_err); + pthread_join(self->hid.tid, &uhid_err); } #define TEARDOWN_LOG(fmt, ...) do { \ TH_LOG(fmt, ##__VA_ARGS__); \ @@ -66,23 +56,10 @@ FIXTURE_TEARDOWN(hid_bpf) { FIXTURE_SETUP(hid_bpf) { - time_t t; int err; - /* initialize random number generator */ - srand((unsigned int)time(&t)); - - self->dev_id = rand() % 1024; - - self->uhid_fd = setup_uhid(_metadata, self->dev_id); - - /* locate the uev, self, variant);ent file of the created device */ - self->hid_id = get_hid_id(self->dev_id); - ASSERT_GT(self->hid_id, 0) - TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); - - err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); - ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); + err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a36, rdesc, sizeof(rdesc)); + ASSERT_OK(err); } struct test_program { @@ -129,7 +106,7 @@ static void load_programs(const struct test_program programs[], ops_hid_id = bpf_map__initial_value(map, NULL); ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data"); - *ops_hid_id = self->hid_id; + *ops_hid_id = self->hid.hid_id; } /* we disable the auto-attach feature of all maps because we @@ -157,7 +134,7 @@ static void load_programs(const struct test_program programs[], hid__attach(self->skel); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -192,7 +169,7 @@ TEST_F(hid_bpf, raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* check that hid_first_event() was executed */ ASSERT_EQ(self->skel->data->callback_check, 42) TH_LOG("callback_check1"); @@ -208,7 +185,7 @@ TEST_F(hid_bpf, raw_event) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* check that hid_first_event() was executed */ ASSERT_EQ(self->skel->data->callback_check, 47) TH_LOG("callback_check1"); @@ -239,7 +216,7 @@ TEST_F(hid_bpf, subprog_raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -252,7 +229,7 @@ TEST_F(hid_bpf, subprog_raw_event) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -303,7 +280,7 @@ TEST_F(hid_bpf, test_attach_detach) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -326,14 +303,14 @@ TEST_F(hid_bpf, test_attach_detach) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); /* inject another event */ memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 47; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -352,7 +329,7 @@ TEST_F(hid_bpf, test_attach_detach) memset(buf, 0, sizeof(buf)); buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -382,7 +359,7 @@ TEST_F(hid_bpf, test_hid_change_report) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -412,7 +389,7 @@ TEST_F(hid_bpf, test_hid_user_input_report_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ args.data[1] = 2; /* report ID */ args.data[2] = 42; /* report ID */ @@ -458,7 +435,7 @@ TEST_F(hid_bpf, test_hid_user_output_report_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ args.data[1] = 2; /* report ID */ args.data[2] = 42; /* report ID */ @@ -506,7 +483,7 @@ TEST_F(hid_bpf, test_hid_user_raw_request_call) LOAD_BPF; - args.hid = self->hid_id; + args.hid = self->hid.hid_id; args.data[0] = 1; /* report ID */ prog_fd = bpf_program__fd(self->skel->progs.hid_user_raw_request); @@ -539,7 +516,7 @@ TEST_F(hid_bpf, test_hid_filter_raw_request_call) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -565,7 +542,7 @@ TEST_F(hid_bpf, test_hid_filter_raw_request_call) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf); @@ -641,7 +618,7 @@ TEST_F(hid_bpf, test_hid_filter_output_report_call) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -667,7 +644,7 @@ TEST_F(hid_bpf, test_hid_filter_output_report_call) /* detach the program */ detach_bpf(self); - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); err = write(self->hidraw_fd, buf, 3); @@ -742,7 +719,7 @@ TEST_F(hid_bpf, test_multiply_events_wq) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -780,7 +757,7 @@ TEST_F(hid_bpf, test_multiply_events) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -816,7 +793,7 @@ TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call) buf[1] = 2; buf[2] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -867,7 +844,7 @@ TEST_F(hid_bpf, test_hid_attach_flags) /* inject one event */ buf[0] = 1; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); diff --git a/tools/testing/selftests/hid/hid_common.h b/tools/testing/selftests/hid/hid_common.h index f151f151a1ed..f77f69c6657d 100644 --- a/tools/testing/selftests/hid/hid_common.h +++ b/tools/testing/selftests/hid/hid_common.h @@ -19,6 +19,16 @@ __typeof__(b) _b = (b); \ _a < _b ? _a : _b; }) +struct uhid_device { + int dev_id; /* uniq (random) number to identify the device */ + int uhid_fd; + int hid_id; /* HID device id in the system */ + __u16 bus; + __u32 vid; + __u32 pid; + pthread_t tid; /* thread for reading uhid events */ +}; + static unsigned char rdesc[] = { 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */ 0x09, 0x21, /* Usage (Vendor Usage 0x21) */ @@ -122,7 +132,9 @@ static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uh } } -static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) +static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb, + __u16 bus, __u32 vid, __u32 pid, __u8 *rdesc, + size_t rdesc_size) { struct uhid_event ev; char buf[25]; @@ -133,10 +145,10 @@ static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) ev.type = UHID_CREATE; strcpy((char *)ev.u.create.name, buf); ev.u.create.rd_data = rdesc; - ev.u.create.rd_size = sizeof(rdesc); - ev.u.create.bus = BUS_USB; - ev.u.create.vendor = 0x0001; - ev.u.create.product = 0x0a37; + ev.u.create.rd_size = rdesc_size; + ev.u.create.bus = bus; + ev.u.create.vendor = vid; + ev.u.create.product = pid; ev.u.create.version = 0; ev.u.create.country = 0; @@ -146,14 +158,14 @@ static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb) return uhid_write(_metadata, fd, &ev); } -static void uhid_destroy(struct __test_metadata *_metadata, int fd) +static void uhid_destroy(struct __test_metadata *_metadata, struct uhid_device *hid) { struct uhid_event ev; memset(&ev, 0, sizeof(ev)); ev.type = UHID_DESTROY; - uhid_write(_metadata, fd, &ev); + uhid_write(_metadata, hid->uhid_fd, &ev); } static int uhid_event(struct __test_metadata *_metadata, int fd) @@ -281,7 +293,8 @@ static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid return 0; } -static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size) +static int uhid_send_event(struct __test_metadata *_metadata, struct uhid_device *hid, + __u8 *buf, size_t size) { struct uhid_event ev; @@ -294,36 +307,20 @@ static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, memcpy(ev.u.input2.data, buf, size); - return uhid_write(_metadata, fd, &ev); + return uhid_write(_metadata, hid->uhid_fd, &ev); } -static int setup_uhid(struct __test_metadata *_metadata, int rand_nb) +static bool match_sysfs_device(struct uhid_device *hid, const char *workdir, struct dirent *dir) { - int fd; - const char *path = "/dev/uhid"; - int ret; - - fd = open(path, O_RDWR | O_CLOEXEC); - ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd); - - ret = uhid_create(_metadata, fd, rand_nb); - ASSERT_EQ(0, ret) { - TH_LOG("create uhid device failed: %d", ret); - close(fd); - } - - return fd; -} - -static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir) -{ - const char *target = "0003:0001:0A37.*"; + char target[20] = ""; char phys[512]; char uevent[1024]; char temp[512]; int fd, nread; bool found = false; + snprintf(target, sizeof(target), "%04X:%04X:%04X.*", hid->bus, hid->vid, hid->pid); + if (fnmatch(target, dir->d_name, 0)) return false; @@ -334,7 +331,7 @@ static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *d if (fd < 0) return false; - sprintf(phys, "PHYS=%d", dev_id); + sprintf(phys, "PHYS=%d", hid->dev_id); nread = read(fd, temp, ARRAY_SIZE(temp)); if (nread > 0 && (strstr(temp, phys)) != NULL) @@ -345,7 +342,7 @@ static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *d return found; } -static int get_hid_id(int dev_id) +static int get_hid_id(struct uhid_device *hid) { const char *workdir = "/sys/devices/virtual/misc/uhid"; const char *str_id; @@ -360,10 +357,10 @@ static int get_hid_id(int dev_id) d = opendir(workdir); if (d) { while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) + if (!match_sysfs_device(hid, workdir, dir)) continue; - str_id = dir->d_name + sizeof("0003:0001:0A37."); + str_id = dir->d_name + sizeof("0000:0000:0000."); found = (int)strtol(str_id, NULL, 16); break; @@ -377,7 +374,7 @@ static int get_hid_id(int dev_id) return found; } -static int get_hidraw(int dev_id) +static int get_hidraw(struct uhid_device *hid) { const char *workdir = "/sys/devices/virtual/misc/uhid"; char sysfs[1024]; @@ -394,7 +391,7 @@ static int get_hidraw(int dev_id) continue; while ((dir = readdir(d)) != NULL) { - if (!match_sysfs_device(dev_id, workdir, dir)) + if (!match_sysfs_device(hid, workdir, dir)) continue; sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name); @@ -421,12 +418,12 @@ static int get_hidraw(int dev_id) return found; } -static int open_hidraw(int dev_id) +static int open_hidraw(struct uhid_device *hid) { int hidraw_number; char hidraw_path[64] = { 0 }; - hidraw_number = get_hidraw(dev_id); + hidraw_number = get_hidraw(hid); if (hidraw_number < 0) return hidraw_number; @@ -434,3 +431,44 @@ static int open_hidraw(int dev_id) sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number); return open(hidraw_path, O_RDWR | O_NONBLOCK); } + +static int setup_uhid(struct __test_metadata *_metadata, struct uhid_device *hid, + __u16 bus, __u32 vid, __u32 pid, const __u8 *rdesc, size_t rdesc_size) +{ + const char *path = "/dev/uhid"; + time_t t; + int ret; + + /* initialize random number generator */ + srand((unsigned int)time(&t)); + + hid->dev_id = rand() % 1024; + hid->bus = bus; + hid->vid = vid; + hid->pid = pid; + + hid->uhid_fd = open(path, O_RDWR | O_CLOEXEC); + ASSERT_GE(hid->uhid_fd, 0) TH_LOG("open uhid-cdev failed; %d", hid->uhid_fd); + + ret = uhid_create(_metadata, hid->uhid_fd, hid->dev_id, bus, vid, pid, + (__u8 *)rdesc, rdesc_size); + ASSERT_EQ(0, ret) { + TH_LOG("create uhid device failed: %d", ret); + close(hid->uhid_fd); + return ret; + } + + /* locate the uevent file of the created device */ + hid->hid_id = get_hid_id(hid); + ASSERT_GT(hid->hid_id, 0) + TH_LOG("Could not locate uhid device id: %d", hid->hid_id); + + ret = uhid_start_listener(_metadata, &hid->tid, hid->uhid_fd); + ASSERT_EQ(0, ret) { + TH_LOG("could not start udev listener: %d", ret); + close(hid->uhid_fd); + return ret; + } + + return 0; +} diff --git a/tools/testing/selftests/hid/hidraw.c b/tools/testing/selftests/hid/hidraw.c index f8b4f7ff292c..821db37ba4bb 100644 --- a/tools/testing/selftests/hid/hidraw.c +++ b/tools/testing/selftests/hid/hidraw.c @@ -9,11 +9,8 @@ #endif /* HIDIOCREVOKE */ FIXTURE(hidraw) { - int dev_id; - int uhid_fd; + struct uhid_device hid; int hidraw_fd; - int hid_id; - pthread_t tid; }; static void close_hidraw(FIXTURE_DATA(hidraw) * self) { @@ -25,10 +22,10 @@ static void close_hidraw(FIXTURE_DATA(hidraw) * self) FIXTURE_TEARDOWN(hidraw) { void *uhid_err; - uhid_destroy(_metadata, self->uhid_fd); + uhid_destroy(_metadata, &self->hid); close_hidraw(self); - pthread_join(self->tid, &uhid_err); + pthread_join(self->hid.tid, &uhid_err); } #define TEARDOWN_LOG(fmt, ...) do { \ TH_LOG(fmt, ##__VA_ARGS__); \ @@ -37,25 +34,12 @@ FIXTURE_TEARDOWN(hidraw) { FIXTURE_SETUP(hidraw) { - time_t t; int err; - /* initialize random number generator */ - srand((unsigned int)time(&t)); + err = setup_uhid(_metadata, &self->hid, BUS_USB, 0x0001, 0x0a37, rdesc, sizeof(rdesc)); + ASSERT_OK(err); - self->dev_id = rand() % 1024; - - self->uhid_fd = setup_uhid(_metadata, self->dev_id); - - /* locate the uev, self, variant);ent file of the created device */ - self->hid_id = get_hid_id(self->dev_id); - ASSERT_GT(self->hid_id, 0) - TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id); - - err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd); - ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err); - - self->hidraw_fd = open_hidraw(self->dev_id); + self->hidraw_fd = open_hidraw(&self->hid); ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw"); } @@ -79,7 +63,7 @@ TEST_F(hidraw, raw_event) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -101,7 +85,7 @@ TEST_F(hidraw, raw_event_revoked) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -117,7 +101,7 @@ TEST_F(hidraw, raw_event_revoked) /* inject one other event */ buf[0] = 1; buf[1] = 43; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); /* read the data from hidraw */ memset(buf, 0, sizeof(buf)); @@ -161,7 +145,7 @@ TEST_F(hidraw, poll_revoked) /* inject one event */ buf[0] = 1; buf[1] = 42; - uhid_send_event(_metadata, self->uhid_fd, buf, 6); + uhid_send_event(_metadata, &self->hid, buf, 6); while (true) { ready = poll(pfds, 1, 5000); diff --git a/tools/testing/selftests/intel_pstate/run.sh b/tools/testing/selftests/intel_pstate/run.sh index e7008f614ad7..6a3b8503264e 100755 --- a/tools/testing/selftests/intel_pstate/run.sh +++ b/tools/testing/selftests/intel_pstate/run.sh @@ -44,6 +44,11 @@ if [ $UID != 0 ] && [ $EVALUATE_ONLY == 0 ]; then exit $ksft_skip fi +if ! command -v cpupower &> /dev/null; then + echo $msg cpupower could not be found, please install it >&2 + exit $ksft_skip +fi + max_cpus=$(($(nproc)-1)) function run_test () { @@ -87,9 +92,9 @@ mkt_freq=${_mkt_freq}0 # Get the ranges from cpupower _min_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $1 } ') -min_freq=$(($_min_freq / 1000)) +min_freq=$((_min_freq / 1000)) _max_freq=$(cpupower frequency-info -l | tail -1 | awk ' { print $2 } ') -max_freq=$(($_max_freq / 1000)) +max_freq=$((_max_freq / 1000)) [ $EVALUATE_ONLY -eq 0 ] && for freq in `seq $max_freq -100 $min_freq` diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile index fd6477911f24..84abeb2f0949 100644 --- a/tools/testing/selftests/iommu/Makefile +++ b/tools/testing/selftests/iommu/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS += -Wall -O2 -Wno-unused-function CFLAGS += $(KHDR_INCLUDES) +LDLIBS += -lcap TEST_GEN_PROGS := TEST_GEN_PROGS += iommufd diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 4927b9add5ad..a1b2b657999d 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */ +#include <asm/unistd.h> #include <stdlib.h> +#include <sys/capability.h> #include <sys/mman.h> #include <sys/eventfd.h> @@ -49,6 +51,9 @@ static __attribute__((constructor)) void setup_sizes(void) vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0); assert(vrc == buffer); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } FIXTURE(iommufd) @@ -128,6 +133,11 @@ TEST_F(iommufd, cmd_length) TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length); TEST_LENGTH(iommu_option, IOMMU_OPTION, val64); TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved); + TEST_LENGTH(iommu_ioas_map_file, IOMMU_IOAS_MAP_FILE, iova); + TEST_LENGTH(iommu_viommu_alloc, IOMMU_VIOMMU_ALLOC, out_viommu_id); + TEST_LENGTH(iommu_vdevice_alloc, IOMMU_VDEVICE_ALLOC, virt_id); + TEST_LENGTH(iommu_ioas_change_process, IOMMU_IOAS_CHANGE_PROCESS, + __reserved); #undef TEST_LENGTH } @@ -186,6 +196,144 @@ TEST_F(iommufd, global_options) EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd)); } +static void drop_cap_ipc_lock(struct __test_metadata *_metadata) +{ + cap_t caps; + cap_value_t cap_list[1] = { CAP_IPC_LOCK }; + + caps = cap_get_proc(); + ASSERT_NE(caps, NULL); + ASSERT_NE(-1, + cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR)); + ASSERT_NE(-1, cap_set_proc(caps)); + cap_free(caps); +} + +static long get_proc_status_value(pid_t pid, const char *var) +{ + FILE *fp; + char buf[80], tag[80]; + long val = -1; + + snprintf(buf, sizeof(buf), "/proc/%d/status", pid); + fp = fopen(buf, "r"); + if (!fp) + return val; + + while (fgets(buf, sizeof(buf), fp)) + if (fscanf(fp, "%s %ld\n", tag, &val) == 2 && !strcmp(tag, var)) + break; + + fclose(fp); + return val; +} + +static long get_vm_pinned(pid_t pid) +{ + return get_proc_status_value(pid, "VmPin:"); +} + +static long get_vm_locked(pid_t pid) +{ + return get_proc_status_value(pid, "VmLck:"); +} + +FIXTURE(change_process) +{ + int fd; + uint32_t ioas_id; +}; + +FIXTURE_VARIANT(change_process) +{ + int accounting; +}; + +FIXTURE_SETUP(change_process) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + + drop_cap_ipc_lock(_metadata); + if (variant->accounting != IOPT_PAGES_ACCOUNT_NONE) { + struct iommu_option set_limit_cmd = { + .size = sizeof(set_limit_cmd), + .option_id = IOMMU_OPTION_RLIMIT_MODE, + .op = IOMMU_OPTION_OP_SET, + .val64 = (variant->accounting == IOPT_PAGES_ACCOUNT_MM), + }; + ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &set_limit_cmd)); + } + + test_ioctl_ioas_alloc(&self->ioas_id); + test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL); +} + +FIXTURE_TEARDOWN(change_process) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(change_process, account_none) +{ + .accounting = IOPT_PAGES_ACCOUNT_NONE, +}; + +FIXTURE_VARIANT_ADD(change_process, account_user) +{ + .accounting = IOPT_PAGES_ACCOUNT_USER, +}; + +FIXTURE_VARIANT_ADD(change_process, account_mm) +{ + .accounting = IOPT_PAGES_ACCOUNT_MM, +}; + +TEST_F(change_process, basic) +{ + pid_t parent = getpid(); + pid_t child; + __u64 iova; + struct iommu_ioas_change_process cmd = { + .size = sizeof(cmd), + }; + + /* Expect failure if non-file maps exist */ + test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova); + EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + test_ioctl_ioas_unmap(iova, PAGE_SIZE); + + /* Change process works in current process. */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Change process works in another process */ + child = fork(); + if (!child) { + int nlock = PAGE_SIZE / 1024; + + /* Parent accounts for locked memory before */ + ASSERT_EQ(nlock, get_vm_pinned(parent)); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(parent)); + ASSERT_EQ(0, get_vm_pinned(getpid())); + ASSERT_EQ(0, get_vm_locked(getpid())); + + ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_CHANGE_PROCESS, &cmd)); + + /* Child accounts for locked memory after */ + ASSERT_EQ(0, get_vm_pinned(parent)); + ASSERT_EQ(0, get_vm_locked(parent)); + ASSERT_EQ(nlock, get_vm_pinned(getpid())); + if (variant->accounting == IOPT_PAGES_ACCOUNT_MM) + ASSERT_EQ(nlock, get_vm_locked(getpid())); + + exit(0); + } + ASSERT_NE(-1, child); + ASSERT_EQ(child, waitpid(child, NULL, 0)); +} + FIXTURE(iommufd_ioas) { int fd; @@ -220,6 +368,8 @@ FIXTURE_SETUP(iommufd_ioas) for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_id, &self->hwpt_id, &self->device_id); + test_cmd_dev_check_cache_all(self->device_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); self->base_iova = MOCK_APERTURE_START; } } @@ -360,9 +510,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested) EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, parent_hwpt_id)); - /* hwpt_invalidate only supports a user-managed hwpt (nested) */ + /* hwpt_invalidate does not support a parent hwpt */ num_inv = 1; - test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs, + test_err_hwpt_invalidate(EINVAL, parent_hwpt_id, inv_reqs, IOMMU_HWPT_INVALIDATE_DATA_SELFTEST, sizeof(*inv_reqs), &num_inv); assert(!num_inv); @@ -1372,6 +1522,7 @@ FIXTURE_VARIANT(iommufd_mock_domain) { unsigned int mock_domains; bool hugepages; + bool file; }; FIXTURE_SETUP(iommufd_mock_domain) @@ -1384,9 +1535,12 @@ FIXTURE_SETUP(iommufd_mock_domain) ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains); - for (i = 0; i != variant->mock_domains; i++) + for (i = 0; i != variant->mock_domains; i++) { test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i], &self->hwpt_ids[i], &self->idev_ids[i]); + test_cmd_dev_check_cache_all(self->idev_ids[0], + IOMMU_TEST_DEV_CACHE_DEFAULT); + } self->hwpt_id = self->hwpt_ids[0]; self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS; @@ -1410,26 +1564,45 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain) { .mock_domains = 1, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains) { .mock_domains = 2, .hugepages = false, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage) { .mock_domains = 1, .hugepages = true, + .file = false, }; FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) { .mock_domains = 2, .hugepages = true, + .file = false, }; +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file) +{ + .mock_domains = 1, + .hugepages = false, + .file = true, +}; + +FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_file_hugepage) +{ + .mock_domains = 1, + .hugepages = true, + .file = true, +}; + + /* Have the kernel check that the user pages made it to the iommu_domain */ #define check_mock_iova(_ptr, _iova, _length) \ ({ \ @@ -1455,7 +1628,10 @@ FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage) } \ }) -TEST_F(iommufd_mock_domain, basic) +static void +test_basic_mmap(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) { size_t buf_size = self->mmap_buf_size; uint8_t *buf; @@ -1478,6 +1654,40 @@ TEST_F(iommufd_mock_domain, basic) test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova); } +static void +test_basic_file(struct __test_metadata *_metadata, + struct _test_data_iommufd_mock_domain *self, + const struct _fixture_variant_iommufd_mock_domain *variant) +{ + size_t buf_size = self->mmap_buf_size; + uint8_t *buf; + __u64 iova; + int mfd_tmp; + int prot = PROT_READ | PROT_WRITE; + + /* Simple one page map */ + test_ioctl_ioas_map_file(mfd, 0, PAGE_SIZE, &iova); + check_mock_iova(mfd_buffer, iova, PAGE_SIZE); + + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd_tmp); + ASSERT_NE(MAP_FAILED, buf); + + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size + 1, &iova); + + ASSERT_EQ(0, ftruncate(mfd_tmp, 0)); + test_err_ioctl_ioas_map_file(EINVAL, mfd_tmp, 0, buf_size, &iova); + + close(mfd_tmp); +} + +TEST_F(iommufd_mock_domain, basic) +{ + if (variant->file) + test_basic_file(_metadata, self, variant); + else + test_basic_mmap(_metadata, self, variant); +} + TEST_F(iommufd_mock_domain, ro_unshare) { uint8_t *buf; @@ -1513,9 +1723,13 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1532,7 +1746,12 @@ TEST_F(iommufd_mock_domain, all_aligns) size_t length = end - start; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } check_mock_iova(buf + start, iova, length); check_refs(buf + start / PAGE_SIZE * PAGE_SIZE, end / PAGE_SIZE * PAGE_SIZE - @@ -1544,6 +1763,8 @@ TEST_F(iommufd_mock_domain, all_aligns) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, all_aligns_copy) @@ -1554,9 +1775,13 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int start; unsigned int end; uint8_t *buf; + int prot = PROT_READ | PROT_WRITE; + int mfd; - buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1, - 0); + if (variant->file) + buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); + else + buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); check_refs(buf, buf_size, 0); @@ -1575,7 +1800,12 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) uint32_t mock_stdev_id; __u64 iova; - test_ioctl_ioas_map(buf + start, length, &iova); + if (variant->file) { + test_ioctl_ioas_map_file(mfd, start, length, + &iova); + } else { + test_ioctl_ioas_map(buf + start, length, &iova); + } /* Add and destroy a domain while the area exists */ old_id = self->hwpt_ids[1]; @@ -1596,15 +1826,18 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) } check_refs(buf, buf_size, 0); ASSERT_EQ(0, munmap(buf, buf_size)); + if (variant->file) + close(mfd); } TEST_F(iommufd_mock_domain, user_copy) { + void *buf = variant->file ? mfd_buffer : buffer; struct iommu_test_cmd access_cmd = { .size = sizeof(access_cmd), .op = IOMMU_TEST_OP_ACCESS_PAGES, .access_pages = { .length = BUFFER_SIZE, - .uptr = (uintptr_t)buffer }, + .uptr = (uintptr_t)buf }, }; struct iommu_ioas_copy copy_cmd = { .size = sizeof(copy_cmd), @@ -1623,9 +1856,13 @@ TEST_F(iommufd_mock_domain, user_copy) /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */ test_ioctl_ioas_alloc(&ioas_id); - test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); - + if (variant->file) { + test_ioctl_ioas_map_id_file(ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_create_access(ioas_id, &access_cmd.id, MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES); @@ -1635,12 +1872,17 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); /* Now replace the ioas with a new one */ test_ioctl_ioas_alloc(&new_ioas_id); - test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE, - ©_cmd.src_iova); + if (variant->file) { + test_ioctl_ioas_map_id_file(new_ioas_id, mfd, 0, BUFFER_SIZE, + ©_cmd.src_iova); + } else { + test_ioctl_ioas_map_id(new_ioas_id, buf, BUFFER_SIZE, + ©_cmd.src_iova); + } test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id); /* Destroy the old ioas and cleanup copied mapping */ @@ -1654,7 +1896,7 @@ TEST_F(iommufd_mock_domain, user_copy) &access_cmd)); copy_cmd.src_ioas_id = new_ioas_id; ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, ©_cmd)); - check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE); + check_mock_iova(buf, MOCK_APERTURE_START, BUFFER_SIZE); test_cmd_destroy_access_pages( access_cmd.id, access_cmd.access_pages.out_access_pages_id); @@ -2386,4 +2628,332 @@ TEST_F(vfio_compat_mock_domain, huge_map) } } +FIXTURE(iommufd_viommu) +{ + int fd; + uint32_t ioas_id; + uint32_t stdev_id; + uint32_t hwpt_id; + uint32_t nested_hwpt_id; + uint32_t device_id; + uint32_t viommu_id; +}; + +FIXTURE_VARIANT(iommufd_viommu) +{ + unsigned int viommu; +}; + +FIXTURE_SETUP(iommufd_viommu) +{ + self->fd = open("/dev/iommu", O_RDWR); + ASSERT_NE(-1, self->fd); + test_ioctl_ioas_alloc(&self->ioas_id); + test_ioctl_set_default_memory_limit(); + + if (variant->viommu) { + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + + test_cmd_mock_domain(self->ioas_id, &self->stdev_id, NULL, + &self->device_id); + + /* Allocate a nesting parent hwpt */ + test_cmd_hwpt_alloc(self->device_id, self->ioas_id, + IOMMU_HWPT_ALLOC_NEST_PARENT, + &self->hwpt_id); + + /* Allocate a vIOMMU taking refcount of the parent hwpt */ + test_cmd_viommu_alloc(self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, + &self->viommu_id); + + /* Allocate a regular nested hwpt */ + test_cmd_hwpt_alloc_nested(self->device_id, self->viommu_id, 0, + &self->nested_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + } +} + +FIXTURE_TEARDOWN(iommufd_viommu) +{ + teardown_iommufd(self->fd, _metadata); +} + +FIXTURE_VARIANT_ADD(iommufd_viommu, no_viommu) +{ + .viommu = 0, +}; + +FIXTURE_VARIANT_ADD(iommufd_viommu, mock_viommu) +{ + .viommu = 1, +}; + +TEST_F(iommufd_viommu, viommu_auto_destroy) +{ +} + +TEST_F(iommufd_viommu, viommu_negative_tests) +{ + uint32_t device_id = self->device_id; + uint32_t ioas_id = self->ioas_id; + uint32_t hwpt_id; + + if (self->device_id) { + /* Negative test -- invalid hwpt (hwpt_id=0) */ + test_err_viommu_alloc(ENOENT, device_id, 0, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + + /* Negative test -- not a nesting parent hwpt */ + test_cmd_hwpt_alloc(device_id, ioas_id, 0, &hwpt_id); + test_err_viommu_alloc(EINVAL, device_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + test_ioctl_destroy(hwpt_id); + + /* Negative test -- unsupported viommu type */ + test_err_viommu_alloc(EOPNOTSUPP, device_id, self->hwpt_id, + 0xdead, NULL); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->hwpt_id)); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, self->viommu_id)); + } else { + test_err_viommu_alloc(ENOENT, self->device_id, self->hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, NULL); + } +} + +TEST_F(iommufd_viommu, viommu_alloc_nested_iopf) +{ + struct iommu_hwpt_selftest data = { + .iotlb = IOMMU_TEST_IOTLB_DEFAULT, + }; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t iopf_hwpt_id; + uint32_t fault_id; + uint32_t fault_fd; + + if (self->device_id) { + test_ioctl_fault_alloc(&fault_id, &fault_fd); + test_err_hwpt_alloc_iopf( + ENOENT, dev_id, viommu_id, UINT32_MAX, + IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_err_hwpt_alloc_iopf( + EOPNOTSUPP, dev_id, viommu_id, fault_id, + IOMMU_HWPT_FAULT_ID_VALID | (1 << 31), &iopf_hwpt_id, + IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data)); + test_cmd_hwpt_alloc_iopf( + dev_id, viommu_id, fault_id, IOMMU_HWPT_FAULT_ID_VALID, + &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST, &data, + sizeof(data)); + + test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id); + EXPECT_ERRNO(EBUSY, + _test_ioctl_destroy(self->fd, iopf_hwpt_id)); + test_cmd_trigger_iopf(dev_id, fault_fd); + + test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id); + test_ioctl_destroy(iopf_hwpt_id); + close(fault_fd); + test_ioctl_destroy(fault_id); + } +} + +TEST_F(iommufd_viommu, vdevice_alloc) +{ + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + + if (dev_id) { + /* Set vdev_id to 0x99, unset it, and set to 0x88 */ + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + test_err_vdevice_alloc(EEXIST, viommu_id, dev_id, 0x99, + &vdev_id); + test_ioctl_destroy(vdev_id); + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x88, &vdev_id); + test_ioctl_destroy(vdev_id); + } else { + test_err_vdevice_alloc(ENOENT, viommu_id, dev_id, 0x99, NULL); + } +} + +TEST_F(iommufd_viommu, vdevice_cache) +{ + struct iommu_viommu_invalidate_selftest inv_reqs[2] = {}; + uint32_t viommu_id = self->viommu_id; + uint32_t dev_id = self->device_id; + uint32_t vdev_id = 0; + uint32_t num_inv; + + if (dev_id) { + test_cmd_vdevice_alloc(viommu_id, dev_id, 0x99, &vdev_id); + + test_cmd_dev_check_cache_all(dev_id, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Check data_type by passing zero-length array */ + num_inv = 0; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: Invalid data_type */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST_INVALID, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: structure size sanity */ + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs) + 1, &num_inv); + assert(!num_inv); + + num_inv = 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 1, &num_inv); + assert(!num_inv); + + /* Negative test: invalid flag is passed */ + num_inv = 1; + inv_reqs[0].flags = 0xffffffff; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid data_uptr when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, NULL, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid entry_len when array is not empty */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + 0, &num_inv); + assert(!num_inv); + + /* Negative test: invalid cache_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* Negative test: invalid vdev_id */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x9; + inv_reqs[0].cache_id = 0; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(!num_inv); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid flags configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0xffffffff; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 1; + test_err_viommu_invalidate(EOPNOTSUPP, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* + * Invalidate the 1st cache entry but fail the 2nd request + * due to invalid cache_id configuration in the 2nd request. + */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 0; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = MOCK_DEV_CACHE_ID_MAX + 1; + test_err_viommu_invalidate(EINVAL, viommu_id, inv_reqs, + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 2nd cache entry and verify */ + num_inv = 1; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 1; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache(dev_id, 0, 0); + test_cmd_dev_check_cache(dev_id, 1, 0); + test_cmd_dev_check_cache(dev_id, 2, + IOMMU_TEST_DEV_CACHE_DEFAULT); + test_cmd_dev_check_cache(dev_id, 3, + IOMMU_TEST_DEV_CACHE_DEFAULT); + + /* Invalidate the 3rd and 4th cache entries and verify */ + num_inv = 2; + inv_reqs[0].flags = 0; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].cache_id = 2; + inv_reqs[1].flags = 0; + inv_reqs[1].vdev_id = 0x99; + inv_reqs[1].cache_id = 3; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 2); + test_cmd_dev_check_cache_all(dev_id, 0); + + /* Invalidate all cache entries for nested_dev_id[1] and verify */ + num_inv = 1; + inv_reqs[0].vdev_id = 0x99; + inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL; + test_cmd_viommu_invalidate(viommu_id, inv_reqs, + sizeof(*inv_reqs), &num_inv); + assert(num_inv == 1); + test_cmd_dev_check_cache_all(dev_id, 0); + test_ioctl_destroy(vdev_id); + } +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c index c5d5e69452b0..22f6fd5f0f74 100644 --- a/tools/testing/selftests/iommu/iommufd_fail_nth.c +++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c @@ -47,6 +47,9 @@ static __attribute__((constructor)) void setup_buffer(void) buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); + + mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, + &mfd); } /* @@ -331,6 +334,42 @@ TEST_FAIL_NTH(basic_fail_nth, map_domain) return 0; } +/* iopt_area_fill_domains() and iopt_area_fill_domain() */ +TEST_FAIL_NTH(basic_fail_nth, map_file_domain) +{ + uint32_t ioas_id; + __u32 stdev_id; + __u32 hwpt_id; + __u64 iova; + + self->fd = open("/dev/iommu", O_RDWR); + if (self->fd == -1) + return -1; + + if (_test_ioctl_ioas_alloc(self->fd, &ioas_id)) + return -1; + + if (_test_ioctl_set_temp_memory_limit(self->fd, 32)) + return -1; + + fail_nth_enable(); + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + + if (_test_ioctl_ioas_map_file(self->fd, ioas_id, mfd, 0, 262144, &iova, + IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) + return -1; + + if (_test_ioctl_destroy(self->fd, stdev_id)) + return -1; + + if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL)) + return -1; + return 0; +} + TEST_FAIL_NTH(basic_fail_nth, map_two_domains) { uint32_t ioas_id; @@ -582,6 +621,8 @@ TEST_FAIL_NTH(basic_fail_nth, device) uint32_t stdev_id; uint32_t idev_id; uint32_t hwpt_id; + uint32_t viommu_id; + uint32_t vdev_id; __u64 iova; self->fd = open("/dev/iommu", O_RDWR); @@ -624,6 +665,19 @@ TEST_FAIL_NTH(basic_fail_nth, device) if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL)) return -1; + + if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, + IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id, + IOMMU_HWPT_DATA_NONE, 0, 0)) + return -1; + + if (_test_cmd_viommu_alloc(self->fd, idev_id, hwpt_id, + IOMMU_VIOMMU_TYPE_SELFTEST, 0, &viommu_id)) + return -1; + + if (_test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, 0, &vdev_id)) + return -1; + return 0; } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 40f6f14ce136..d979f5b0efe8 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -22,6 +22,12 @@ #define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG)) #define BIT_WORD(nr) ((nr) / __BITS_PER_LONG) +enum { + IOPT_PAGES_ACCOUNT_NONE = 0, + IOPT_PAGES_ACCOUNT_USER = 1, + IOPT_PAGES_ACCOUNT_MM = 2, +}; + #define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d)) static inline void set_bit(unsigned int nr, unsigned long *addr) @@ -40,12 +46,28 @@ static inline bool test_bit(unsigned int nr, unsigned long *addr) static void *buffer; static unsigned long BUFFER_SIZE; +static void *mfd_buffer; +static int mfd; + static unsigned long PAGE_SIZE; #define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) +{ + int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; + int mfd = memfd_create("buffer", mfd_flags); + + if (mfd <= 0) + return MAP_FAILED; + if (ftruncate(mfd, length)) + return MAP_FAILED; + *mfd_p = mfd; + return mmap(0, length, prot, flags, mfd, 0); +} + /* * Have the kernel check the refcount on pages. I don't know why a freshly * mmap'd anon non-compound page starts out with a ref of 3 @@ -234,6 +256,30 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_i test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \ }) +#define test_cmd_dev_check_cache(device_id, cache_id, expected) \ + ({ \ + struct iommu_test_cmd test_cmd = { \ + .size = sizeof(test_cmd), \ + .op = IOMMU_TEST_OP_DEV_CHECK_CACHE, \ + .id = device_id, \ + .check_dev_cache = { \ + .id = cache_id, \ + .cache = expected, \ + }, \ + }; \ + ASSERT_EQ(0, ioctl(self->fd, \ + _IOMMU_TEST_CMD( \ + IOMMU_TEST_OP_DEV_CHECK_CACHE), \ + &test_cmd)); \ + }) + +#define test_cmd_dev_check_cache_all(device_id, expected) \ + ({ \ + int c; \ + for (c = 0; c < MOCK_DEV_CACHE_NUM; c++) \ + test_cmd_dev_check_cache(device_id, c, expected); \ + }) + static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, uint32_t data_type, uint32_t lreq, uint32_t *nreqs) @@ -265,6 +311,38 @@ static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs, data_type, lreq, nreqs)); \ }) +static int _test_cmd_viommu_invalidate(int fd, __u32 viommu_id, void *reqs, + uint32_t data_type, uint32_t lreq, + uint32_t *nreqs) +{ + struct iommu_hwpt_invalidate cmd = { + .size = sizeof(cmd), + .hwpt_id = viommu_id, + .data_type = data_type, + .data_uptr = (uint64_t)reqs, + .entry_len = lreq, + .entry_num = *nreqs, + }; + int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd); + *nreqs = cmd.entry_num; + return rc; +} + +#define test_cmd_viommu_invalidate(viommu, reqs, lreq, nreqs) \ + ({ \ + ASSERT_EQ(0, \ + _test_cmd_viommu_invalidate(self->fd, viommu, reqs, \ + IOMMU_VIOMMU_INVALIDATE_DATA_SELFTEST, \ + lreq, nreqs)); \ + }) +#define test_err_viommu_invalidate(_errno, viommu_id, reqs, data_type, lreq, \ + nreqs) \ + ({ \ + EXPECT_ERRNO(_errno, _test_cmd_viommu_invalidate( \ + self->fd, viommu_id, reqs, \ + data_type, lreq, nreqs)); \ + }) + static int _test_cmd_access_replace_ioas(int fd, __u32 access_id, unsigned int ioas_id) { @@ -589,6 +667,47 @@ static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova, EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \ iova, length, NULL)) +static int _test_ioctl_ioas_map_file(int fd, unsigned int ioas_id, int mfd, + size_t start, size_t length, __u64 *iova, + unsigned int flags) +{ + struct iommu_ioas_map_file cmd = { + .size = sizeof(cmd), + .flags = flags, + .ioas_id = ioas_id, + .fd = mfd, + .start = start, + .length = length, + }; + int ret; + + if (flags & IOMMU_IOAS_MAP_FIXED_IOVA) + cmd.iova = *iova; + + ret = ioctl(fd, IOMMU_IOAS_MAP_FILE, &cmd); + *iova = cmd.iova; + return ret; +} + +#define test_ioctl_ioas_map_file(mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_err_ioctl_ioas_map_file(_errno, mfd, start, length, iova_p) \ + EXPECT_ERRNO( \ + _errno, \ + _test_ioctl_ioas_map_file( \ + self->fd, self->ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + +#define test_ioctl_ioas_map_id_file(ioas_id, mfd, start, length, iova_p) \ + ASSERT_EQ(0, \ + _test_ioctl_ioas_map_file( \ + self->fd, ioas_id, mfd, start, length, iova_p, \ + IOMMU_IOAS_MAP_WRITEABLE | IOMMU_IOAS_MAP_READABLE)) + static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit) { struct iommu_test_cmd memlimit_cmd = { @@ -762,3 +881,58 @@ static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd) #define test_cmd_trigger_iopf(device_id, fault_fd) \ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd)) + +static int _test_cmd_viommu_alloc(int fd, __u32 device_id, __u32 hwpt_id, + __u32 type, __u32 flags, __u32 *viommu_id) +{ + struct iommu_viommu_alloc cmd = { + .size = sizeof(cmd), + .flags = flags, + .type = type, + .dev_id = device_id, + .hwpt_id = hwpt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VIOMMU_ALLOC, &cmd); + if (ret) + return ret; + if (viommu_id) + *viommu_id = cmd.out_viommu_id; + return 0; +} + +#define test_cmd_viommu_alloc(device_id, hwpt_id, type, viommu_id) \ + ASSERT_EQ(0, _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) +#define test_err_viommu_alloc(_errno, device_id, hwpt_id, type, viommu_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_viommu_alloc(self->fd, device_id, hwpt_id, \ + type, 0, viommu_id)) + +static int _test_cmd_vdevice_alloc(int fd, __u32 viommu_id, __u32 idev_id, + __u64 virt_id, __u32 *vdev_id) +{ + struct iommu_vdevice_alloc cmd = { + .size = sizeof(cmd), + .dev_id = idev_id, + .viommu_id = viommu_id, + .virt_id = virt_id, + }; + int ret; + + ret = ioctl(fd, IOMMU_VDEVICE_ALLOC, &cmd); + if (ret) + return ret; + if (vdev_id) + *vdev_id = cmd.out_vdevice_id; + return 0; +} + +#define test_cmd_vdevice_alloc(viommu_id, idev_id, virt_id, vdev_id) \ + ASSERT_EQ(0, _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) +#define test_err_vdevice_alloc(_errno, viommu_id, idev_id, virt_id, vdev_id) \ + EXPECT_ERRNO(_errno, \ + _test_cmd_vdevice_alloc(self->fd, viommu_id, idev_id, \ + virt_id, vdev_id)) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 960cf6a77198..41593d2e7de9 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -55,6 +55,7 @@ LIBKVM_aarch64 += lib/aarch64/vgic.c LIBKVM_s390x += lib/s390x/diag318_test_handler.c LIBKVM_s390x += lib/s390x/processor.c LIBKVM_s390x += lib/s390x/ucall.c +LIBKVM_s390x += lib/s390x/facility.c LIBKVM_riscv += lib/riscv/handlers.S LIBKVM_riscv += lib/riscv/processor.c @@ -67,7 +68,7 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test -TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features +TEST_GEN_PROGS_x86_64 += x86_64/feature_msrs_test TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test @@ -156,6 +157,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs TEST_GEN_PROGS_aarch64 += aarch64/arch_timer_edge_cases TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions TEST_GEN_PROGS_aarch64 += aarch64/hypercalls +TEST_GEN_PROGS_aarch64 += aarch64/mmio_abort TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test TEST_GEN_PROGS_aarch64 += aarch64/psci_test TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs @@ -189,6 +191,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test +TEST_GEN_PROGS_s390x += s390x/cpumodel_subfuncs_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test @@ -241,13 +244,18 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \ -fno-builtin-memcmp -fno-builtin-memcpy \ -fno-builtin-memset -fno-builtin-strnlen \ - -fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \ - -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ - -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \ - $(KHDR_INCLUDES) + -fno-stack-protector -fno-PIE -fno-strict-aliasing \ + -I$(LINUX_TOOL_INCLUDE) -I$(LINUX_TOOL_ARCH_INCLUDE) \ + -I$(LINUX_HDR_PATH) -Iinclude -I$(<D) -Iinclude/$(ARCH_DIR) \ + -I ../rseq -I.. $(EXTRA_CFLAGS) $(KHDR_INCLUDES) ifeq ($(ARCH),s390) CFLAGS += -march=z10 endif +ifeq ($(ARCH),x86) +ifeq ($(shell echo "void foo(void) { }" | $(CC) -march=x86-64-v2 -x c - -c -o /dev/null 2>/dev/null; echo "$$?"),0) + CFLAGS += -march=x86-64-v2 +endif +endif ifeq ($(ARCH),arm64) tools_dir := $(top_srcdir)/tools arm64_tools_dir := $(tools_dir)/arch/arm64/tools/ diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c index 2582c49e525a..ff7a949fc96a 100644 --- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c @@ -433,15 +433,15 @@ static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bp vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_BRK_INS, guest_sw_bp_handler); + ESR_ELx_EC_BRK64, guest_sw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler); + ESR_ELx_EC_BREAKPT_CUR, guest_hw_bp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_WP_CURRENT, guest_wp_handler); + ESR_ELx_EC_WATCHPT_CUR, guest_wp_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SSTEP_CURRENT, guest_ss_handler); + ESR_ELx_EC_SOFTSTP_CUR, guest_ss_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_SVC64, guest_svc_handler); + ESR_ELx_EC_SVC64, guest_svc_handler); /* Specify bpn/wpn/ctx_bpn to be tested */ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn); diff --git a/tools/testing/selftests/kvm/aarch64/mmio_abort.c b/tools/testing/selftests/kvm/aarch64/mmio_abort.c new file mode 100644 index 000000000000..8b7a80a51b1c --- /dev/null +++ b/tools/testing/selftests/kvm/aarch64/mmio_abort.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * mmio_abort - Tests for userspace MMIO abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +extern char test_mmio_abort_insn; + +static void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_extabt(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); +} diff --git a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c index 943d65fc6b0b..58304bbc2036 100644 --- a/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c +++ b/tools/testing/selftests/kvm/aarch64/no-vgic-v3.c @@ -150,7 +150,7 @@ static void test_guest_no_gicv3(void) vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_UNKNOWN, guest_undef_handler); + ESR_ELx_EC_UNKNOWN, guest_undef_handler); test_run_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c index d29b08198b42..ec33a8f9c908 100644 --- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c +++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c @@ -544,9 +544,9 @@ static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu, vcpu_init_descriptor_tables(vcpu); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_DABT, no_dabt_handler); + ESR_ELx_EC_DABT_CUR, no_dabt_handler); vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, - ESR_EC_IABT, no_iabt_handler); + ESR_ELx_EC_IABT_CUR, no_iabt_handler); } static void setup_gva_maps(struct kvm_vm *vm) diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c index 61731a950def..eaa7655fefc1 100644 --- a/tools/testing/selftests/kvm/aarch64/psci_test.c +++ b/tools/testing/selftests/kvm/aarch64/psci_test.c @@ -54,6 +54,15 @@ static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id) return res.a0; } +static uint64_t psci_system_off2(uint64_t type, uint64_t cookie) +{ + struct arm_smccc_res res; + + smccc_hvc(PSCI_1_3_FN64_SYSTEM_OFF2, type, cookie, 0, 0, 0, 0, 0, &res); + + return res.a0; +} + static uint64_t psci_features(uint32_t func_id) { struct arm_smccc_res res; @@ -188,11 +197,94 @@ static void host_test_system_suspend(void) kvm_vm_free(vm); } +static void guest_test_system_off2(void) +{ + uint64_t ret; + + /* assert that SYSTEM_OFF2 is discoverable */ + GUEST_ASSERT(psci_features(PSCI_1_3_FN_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + GUEST_ASSERT(psci_features(PSCI_1_3_FN64_SYSTEM_OFF2) & + PSCI_1_3_OFF_TYPE_HIBERNATE_OFF); + + /* With non-zero 'cookie' field, it should fail */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + /* + * This would normally never return, so KVM sets the return value + * to PSCI_RET_INTERNAL_FAILURE. The test case *does* return, so + * that it can test both values for HIBERNATE_OFF. + */ + ret = psci_system_off2(PSCI_1_3_OFF_TYPE_HIBERNATE_OFF, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + /* + * Revision F.b of the PSCI v1.3 specification documents zero as an + * alias for HIBERNATE_OFF, since that's the value used in earlier + * revisions of the spec and some implementations in the field. + */ + ret = psci_system_off2(0, 1); + GUEST_ASSERT(ret == PSCI_RET_INVALID_PARAMS); + + ret = psci_system_off2(0, 0); + GUEST_ASSERT(ret == PSCI_RET_INTERNAL_FAILURE); + + GUEST_DONE(); +} + +static void host_test_system_off2(void) +{ + struct kvm_vcpu *source, *target; + struct kvm_mp_state mps; + uint64_t psci_version = 0; + int nr_shutdowns = 0; + struct kvm_run *run; + struct ucall uc; + + setup_vm(guest_test_system_off2, &source, &target); + + vcpu_get_reg(target, KVM_REG_ARM_PSCI_VERSION, &psci_version); + + TEST_ASSERT(psci_version >= PSCI_VERSION(1, 3), + "Unexpected PSCI version %lu.%lu", + PSCI_VERSION_MAJOR(psci_version), + PSCI_VERSION_MINOR(psci_version)); + + vcpu_power_off(target); + run = source->run; + + enter_guest(source); + while (run->exit_reason == KVM_EXIT_SYSTEM_EVENT) { + TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SHUTDOWN, + "Unhandled system event: %u (expected: %u)", + run->system_event.type, KVM_SYSTEM_EVENT_SHUTDOWN); + TEST_ASSERT(run->system_event.ndata >= 1, + "Unexpected amount of system event data: %u (expected, >= 1)", + run->system_event.ndata); + TEST_ASSERT(run->system_event.data[0] & KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2, + "PSCI_OFF2 flag not set. Flags %llu (expected %llu)", + run->system_event.data[0], KVM_SYSTEM_EVENT_SHUTDOWN_FLAG_PSCI_OFF2); + + nr_shutdowns++; + + /* Restart the vCPU */ + mps.mp_state = KVM_MP_STATE_RUNNABLE; + vcpu_mp_state_set(source, &mps); + + enter_guest(source); + } + + TEST_ASSERT(get_ucall(source, &uc) == UCALL_DONE, "Guest did not exit cleanly"); + TEST_ASSERT(nr_shutdowns == 2, "Two shutdown events were expected, but saw %d", nr_shutdowns); +} + int main(void) { TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND)); host_test_cpu_on(); host_test_system_suspend(); + host_test_system_off2(); return 0; } diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c index 2a3fe7914b72..a79b7f18452d 100644 --- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c +++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c @@ -68,6 +68,8 @@ struct test_feature_reg { } static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = { + S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DoubleLock, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, WRPs, 0), S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP), REG_FTR_END, @@ -134,6 +136,13 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0), @@ -200,6 +209,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1), TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1), TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1), + TEST_REG(SYS_ID_AA64PFR1_EL1, ftr_id_aa64pfr1_el1), TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), @@ -433,6 +443,101 @@ static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only) } } +#define MPAM_IDREG_TEST 6 +static void test_user_set_mpam_reg(struct kvm_vcpu *vcpu) +{ + uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE]; + struct reg_mask_range range = { + .addr = (__u64)masks, + }; + uint64_t val; + int idx, err; + + /* + * If ID_AA64PFR0.MPAM is _not_ officially modifiable and is zero, + * check that if it can be set to 1, (i.e. it is supported by the + * hardware), that it can't be set to other values. + */ + + /* Get writable masks for feature ID registers */ + memset(range.reserved, 0, sizeof(range.reserved)); + vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range); + + /* Writeable? Nothing to test! */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR0_EL1); + if ((masks[idx] & ID_AA64PFR0_EL1_MPAM_MASK) == ID_AA64PFR0_EL1_MPAM_MASK) { + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val); + + /* Try to set MPAM=0. This should always be possible. */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=0 worked\n"); + + /* Try to set MPAM=1 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR0_EL1.MPAM is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM=1 was writable\n"); + + /* Try to set MPAM=2 */ + val &= ~ID_AA64PFR0_EL1_MPAM_MASK; + val |= FIELD_PREP(ID_AA64PFR0_EL1_MPAM_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM value should not be ignored\n"); + + /* And again for ID_AA64PFR1_EL1.MPAM_frac */ + idx = encoding_to_range_idx(SYS_ID_AA64PFR1_EL1); + if ((masks[idx] & ID_AA64PFR1_EL1_MPAM_frac_MASK) == ID_AA64PFR1_EL1_MPAM_frac_MASK) { + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is officially writable, nothing to test\n"); + return; + } + + /* Get the id register value */ + vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), &val); + + /* Try to set MPAM_frac=0. This should always be possible. */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 0); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_fail("ID_AA64PFR0_EL1.MPAM_frac=0 was not accepted\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=0 worked\n"); + + /* Try to set MPAM_frac=1 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 1); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_skip("ID_AA64PFR1_EL1.MPAM_frac is not writable, nothing to test\n"); + else + ksft_test_result_pass("ID_AA64PFR0_EL1.MPAM_frac=1 was writable\n"); + + /* Try to set MPAM_frac=2 */ + val &= ~ID_AA64PFR1_EL1_MPAM_frac_MASK; + val |= FIELD_PREP(ID_AA64PFR1_EL1_MPAM_frac_MASK, 2); + err = __vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1), val); + if (err) + ksft_test_result_pass("ID_AA64PFR1_EL1.MPAM_frac not arbitrarily modifiable\n"); + else + ksft_test_result_fail("ID_AA64PFR1_EL1.MPAM_frac value should not be ignored\n"); +} + static void test_guest_reg_read(struct kvm_vcpu *vcpu) { bool done = false; @@ -569,14 +674,16 @@ int main(void) test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) + ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) + ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + - ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - - ARRAY_SIZE(test_regs) + 2; + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 2 + + MPAM_IDREG_TEST; ksft_set_plan(test_cnt); test_vm_ftr_id_regs(vcpu, aarch64_only); test_vcpu_ftr_id_regs(vcpu); + test_user_set_mpam_reg(vcpu); test_guest_reg_read(vcpu); diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c index d31b9f64ba14..f9c0c86d7e85 100644 --- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c +++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c @@ -300,7 +300,7 @@ static void guest_sync_handler(struct ex_regs *regs) uint64_t esr, ec; esr = read_sysreg(esr_el1); - ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(esr); __GUEST_ASSERT(expected_ec == ec, "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx", @@ -338,10 +338,10 @@ static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx) * Reading/writing the event count/type registers should cause * an UNDEFINED exception. */ - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx)); - TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_cntr(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->read_typer(pmc_idx)); + TEST_EXCEPTION(ESR_ELx_EC_UNKNOWN, acc->write_typer(pmc_idx, 0)); /* * The bit corresponding to the (unimplemented) counter in * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ. @@ -425,7 +425,7 @@ static void create_vpmu_vm(void *guest_code) vpmu_vm.vm = vm_create(1); vm_init_descriptor_tables(vpmu_vm.vm); - for (ec = 0; ec < ESR_EC_NUM; ec++) { + for (ec = 0; ec < ESR_ELx_EC_MAX + 1; ec++) { vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec, guest_sync_handler); } diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c index ba0c8e996035..ce687f8d248f 100644 --- a/tools/testing/selftests/kvm/guest_memfd_test.c +++ b/tools/testing/selftests/kvm/guest_memfd_test.c @@ -134,7 +134,7 @@ static void test_create_guest_memfd_invalid(struct kvm_vm *vm) size); } - for (flag = 0; flag; flag <<= 1) { + for (flag = BIT(0); flag; flag <<= 1) { fd = __vm_create_guest_memfd(vm, page_size, flag); TEST_ASSERT(fd == -1 && errno == EINVAL, "guest_memfd() with flag '0x%lx' should fail with EINVAL", diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c index bce73bcb973c..94bd6ed24cf3 100644 --- a/tools/testing/selftests/kvm/hardware_disable_test.c +++ b/tools/testing/selftests/kvm/hardware_disable_test.c @@ -20,7 +20,6 @@ #define SLEEPING_THREAD_NUM (1 << 4) #define FORK_NUM (1ULL << 9) #define DELAY_US_MAX 2000 -#define GUEST_CODE_PIO_PORT 4 sem_t *sem; diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h index de977d131082..1e8d0d531fbd 100644 --- a/tools/testing/selftests/kvm/include/aarch64/processor.h +++ b/tools/testing/selftests/kvm/include/aarch64/processor.h @@ -12,6 +12,8 @@ #include <linux/stringify.h> #include <linux/types.h> +#include <asm/brk-imm.h> +#include <asm/esr.h> #include <asm/sysreg.h> @@ -100,19 +102,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -#define ESR_EC_NUM 64 -#define ESR_EC_SHIFT 26 -#define ESR_EC_MASK (ESR_EC_NUM - 1) - -#define ESR_EC_UNKNOWN 0x0 -#define ESR_EC_SVC64 0x15 -#define ESR_EC_IABT 0x21 -#define ESR_EC_DABT 0x25 -#define ESR_EC_HW_BP_CURRENT 0x31 -#define ESR_EC_SSTEP_CURRENT 0x33 -#define ESR_EC_WP_CURRENT 0x35 -#define ESR_EC_BRK_INS 0x3c - /* Access flag */ #define PTE_AF (1ULL << 10) diff --git a/tools/testing/selftests/kvm/include/s390x/facility.h b/tools/testing/selftests/kvm/include/s390x/facility.h new file mode 100644 index 000000000000..00a1ced6538b --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/facility.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Get the facility bits with the STFLE instruction + */ + +#ifndef SELFTEST_KVM_FACILITY_H +#define SELFTEST_KVM_FACILITY_H + +#include <linux/bitops.h> + +/* alt_stfle_fac_list[16] + stfle_fac_list[16] */ +#define NB_STFL_DOUBLEWORDS 32 + +extern uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +extern bool stfle_flag; + +static inline bool test_bit_inv(unsigned long nr, const unsigned long *ptr) +{ + return test_bit(nr ^ (BITS_PER_LONG - 1), ptr); +} + +static inline void stfle(uint64_t *fac, unsigned int nb_doublewords) +{ + register unsigned long r0 asm("0") = nb_doublewords - 1; + + asm volatile(" .insn s,0xb2b00000,0(%1)\n" + : "+d" (r0) + : "a" (fac) + : "memory", "cc"); +} + +static inline void setup_facilities(void) +{ + stfle(stfl_doublewords, NB_STFL_DOUBLEWORDS); + stfle_flag = true; +} + +static inline bool test_facility(int nr) +{ + if (!stfle_flag) + setup_facilities(); + return test_bit_inv(nr, stfl_doublewords); +} + +#endif diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 481bd2fd6a32..33fef6fd9617 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -32,4 +32,10 @@ static inline void cpu_relax(void) barrier(); } +/* Get the instruction length */ +static inline int insn_length(unsigned char code) +{ + return ((((int)code + 64) >> 7) + 1) << 1; +} + #endif diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index e247f99e0473..645200e95f89 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -1049,6 +1049,11 @@ static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu) vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); } +static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu) +{ + vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid); +} + void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu, struct kvm_x86_cpu_property property, uint32_t value); diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c index fe4dc3693112..698e34f39241 100644 --- a/tools/testing/selftests/kvm/lib/aarch64/processor.c +++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c @@ -450,7 +450,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) } struct handlers { - handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM]; + handler_fn exception_handlers[VECTOR_NUM][ESR_ELx_EC_MAX + 1]; }; void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu) @@ -469,7 +469,7 @@ void route_exception(struct ex_regs *regs, int vector) switch (vector) { case VECTOR_SYNC_CURRENT: case VECTOR_SYNC_LOWER_64: - ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK; + ec = ESR_ELx_EC(read_sysreg(esr_el1)); valid_ec = true; break; case VECTOR_IRQ_CURRENT: @@ -508,7 +508,7 @@ void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec, assert(VECTOR_IS_SYNC(vector)); assert(vector < VECTOR_NUM); - assert(ec < ESR_EC_NUM); + assert(ec <= ESR_ELx_EC_MAX); handlers->exception_handlers[vector][ec] = handler; } diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a2b7df5f1d39..480e3a40d197 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -720,9 +720,6 @@ static void __vm_mem_region_delete(struct kvm_vm *vm, rb_erase(®ion->hva_node, &vm->regions.hva_tree); hash_del(®ion->slot_node); - region->region.memory_size = 0; - vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); - sparsebit_free(®ion->unused_phy_pages); sparsebit_free(®ion->protected_phy_pages); ret = munmap(region->mmap_start, region->mmap_size); @@ -1197,7 +1194,12 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot)); + struct userspace_mem_region *region = memslot2region(vm, slot); + + region->region.memory_size = 0; + vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); + + __vm_mem_region_delete(vm, region); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, diff --git a/tools/testing/selftests/kvm/lib/s390x/facility.c b/tools/testing/selftests/kvm/lib/s390x/facility.c new file mode 100644 index 000000000000..d540812d911a --- /dev/null +++ b/tools/testing/selftests/kvm/lib/s390x/facility.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * Contains the definition for the global variables to have the test facitlity feature. + */ + +#include "facility.h" + +uint64_t stfl_doublewords[NB_STFL_DOUBLEWORDS]; +bool stfle_flag; diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 974bcd2df6d7..636b29ba8985 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -506,6 +506,8 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG; sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR; + if (kvm_cpu_has(X86_FEATURE_XSAVE)) + sregs.cr4 |= X86_CR4_OSXSAVE; sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX); kvm_seg_set_unusable(&sregs.ldt); @@ -519,6 +521,20 @@ static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) vcpu_sregs_set(vcpu, &sregs); } +static void vcpu_init_xcrs(struct kvm_vm *vm, struct kvm_vcpu *vcpu) +{ + struct kvm_xcrs xcrs = { + .nr_xcrs = 1, + .xcrs[0].xcr = 0, + .xcrs[0].value = kvm_cpu_supported_xcr0(), + }; + + if (!kvm_cpu_has(X86_FEATURE_XSAVE)) + return; + + vcpu_xcrs_set(vcpu, &xcrs); +} + static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr, int dpl, unsigned short selector) { @@ -675,6 +691,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) vcpu = __vm_vcpu_add(vm, vcpu_id); vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid()); vcpu_init_sregs(vm, vcpu); + vcpu_init_xcrs(vm, vcpu); /* Setup guest general purpose registers */ vcpu_regs_get(vcpu, ®s); @@ -686,6 +703,13 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) mp_state.mp_state = 0; vcpu_mp_state_set(vcpu, &mp_state); + /* + * Refresh CPUID after setting SREGS and XCR0, so that KVM's "runtime" + * updates to guest CPUID, e.g. for OSXSAVE and XSAVE state size, are + * reflected into selftests' vCPU CPUID cache, i.e. so that the cache + * is consistent with vCPU state. + */ + vcpu_get_cpuid(vcpu); return vcpu; } diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c index 089b8925b6b2..d7ac122820bf 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c +++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c @@ -200,7 +200,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx) if (vmx->eptp_gpa) { uint64_t ept_paddr; struct eptPageTablePointer eptp = { - .memory_type = VMX_BASIC_MEM_TYPE_WB, + .memory_type = X86_MEMTYPE_WB, .page_walk_length = 3, /* + 1 */ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS), .address = vmx->eptp_gpa >> PAGE_SHIFT_4K, diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index e3343f0df9e1..c81a84990eab 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -169,12 +169,14 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; +#ifdef __x86_64__ case 'q': p.disable_slot_zap_quirk = true; TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL); break; +#endif case 'h': default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 893366982f77..e3711beff7f3 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -113,7 +113,9 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +#ifdef __x86_64__ static bool disable_slot_zap_quirk; +#endif static bool verbose; #define pr_info_v(...) \ @@ -415,7 +417,7 @@ static bool _guest_should_exit(void) */ static noinline void host_perform_sync(struct sync_area *sync) { - alarm(2); + alarm(10); atomic_store_explicit(&sync->sync_flag, true, memory_order_release); while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire)) @@ -579,8 +581,10 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; +#ifdef __x86_64__ if (disable_slot_zap_quirk) vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); +#endif movesrcgpa = vm_slot2gpa(data, data->nslots - 1); @@ -971,11 +975,13 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; +#ifdef __x86_64__ case 'q': disable_slot_zap_quirk = true; TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL); break; +#endif case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { diff --git a/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c new file mode 100644 index 000000000000..27255880dabd --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/cpumodel_subfuncs_test.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright IBM Corp. 2024 + * + * Authors: + * Hariharan Mari <hari55@linux.ibm.com> + * + * The tests compare the result of the KVM ioctl for obtaining CPU subfunction data with those + * from an ASM block performing the same CPU subfunction. Currently KVM doesn't mask instruction + * query data reported via the CPU Model, allowing us to directly compare it with the data + * acquired through executing the queries in the test. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include "facility.h" + +#include "kvm_util.h" + +#define PLO_FUNCTION_MAX 256 + +/* Query available CPU subfunctions */ +struct kvm_s390_vm_cpu_subfunc cpu_subfunc; + +static void get_cpu_machine_subfuntions(struct kvm_vm *vm, + struct kvm_s390_vm_cpu_subfunc *cpu_subfunc) +{ + int r; + + r = __kvm_device_attr_get(vm->fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_MACHINE_SUBFUNC, cpu_subfunc); + + TEST_ASSERT(!r, "Get cpu subfunctions failed r=%d errno=%d", r, errno); +} + +static inline int plo_test_bit(unsigned char nr) +{ + unsigned long function = nr | 0x100; + int cc; + + asm volatile(" lgr 0,%[function]\n" + /* Parameter registers are ignored for "test bit" */ + " plo 0,0,0,0(0)\n" + " ipm %0\n" + " srl %0,28\n" + : "=d" (cc) + : [function] "d" (function) + : "cc", "0"); + return cc == 0; +} + +/* Testing Perform Locked Operation (PLO) CPU subfunction's ASM block */ +static void test_plo_asm_block(u8 (*query)[32]) +{ + for (int i = 0; i < PLO_FUNCTION_MAX; ++i) { + if (plo_test_bit(i)) + (*query)[i >> 3] |= 0x80 >> (i & 7); + } +} + +/* Testing Crypto Compute Message Authentication Code (KMAC) CPU subfunction's ASM block */ +static void test_kmac_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb91e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Chaining (KMC) CPU subfunction's ASM block */ +static void test_kmc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92f0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message (KM) CPU subfunction's ASM block */ +static void test_km_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92e0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Intermediate Message Digest (KIMD) CPU subfunction's ASM block */ +static void test_kimd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93e0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Last Message Digest (KLMD) CPU subfunction's ASM block */ +static void test_klmd_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93f0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Counter (KMCTR) CPU subfunction's ASM block */ +static void test_kmctr_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb92d0000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Cipher Feedback (KMF) CPU subfunction's ASM block */ +static void test_kmf_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92a0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Output Feedback (KMO) CPU subfunction's ASM block */ +static void test_kmo_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92b0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Cryptographic Computation (PCC) CPU subfunction's ASM block */ +static void test_pcc_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb92c0000,0,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Perform Random Number Operation (PRNO) CPU subfunction's ASM block */ +static void test_prno_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93c0000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Cipher Message with Authentication (KMA) CPU subfunction's ASM block */ +static void test_kma_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rrf,0xb9290000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Crypto Compute Digital Signature Authentication (KDSA) CPU subfunction's ASM block */ +static void test_kdsa_asm_block(u8 (*query)[16]) +{ + asm volatile(" la %%r1,%[query]\n" + " xgr %%r0,%%r0\n" + " .insn rre,0xb93a0000,0,2\n" + : [query] "=R" (*query) + : + : "cc", "r0", "r1"); +} + +/* Testing Sort Lists (SORTL) CPU subfunction's ASM block */ +static void test_sortl_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rre,0xb9380000,2,4\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* Testing Deflate Conversion Call (DFLTCC) CPU subfunction's ASM block */ +static void test_dfltcc_asm_block(u8 (*query)[32]) +{ + asm volatile(" lghi 0,0\n" + " la 1,%[query]\n" + " .insn rrf,0xb9390000,2,4,6,0\n" + : [query] "=R" (*query) + : + : "cc", "0", "1"); +} + +/* + * Testing Perform Function with Concurrent Results (PFCR) + * CPU subfunctions's ASM block + */ +static void test_pfcr_asm_block(u8 (*query)[16]) +{ + asm volatile(" lghi 0,0\n" + " .insn rsy,0xeb0000000016,0,0,%[query]\n" + : [query] "=QS" (*query) + : + : "cc", "0"); +} + +typedef void (*testfunc_t)(u8 (*array)[]); + +struct testdef { + const char *subfunc_name; + u8 *subfunc_array; + size_t array_size; + testfunc_t test; + int facility_bit; +} testlist[] = { + /* + * PLO was introduced in the very first 64-bit machine generation. + * Hence it is assumed PLO is always installed in Z Arch. + */ + { "PLO", cpu_subfunc.plo, sizeof(cpu_subfunc.plo), test_plo_asm_block, 1 }, + /* MSA - Facility bit 17 */ + { "KMAC", cpu_subfunc.kmac, sizeof(cpu_subfunc.kmac), test_kmac_asm_block, 17 }, + { "KMC", cpu_subfunc.kmc, sizeof(cpu_subfunc.kmc), test_kmc_asm_block, 17 }, + { "KM", cpu_subfunc.km, sizeof(cpu_subfunc.km), test_km_asm_block, 17 }, + { "KIMD", cpu_subfunc.kimd, sizeof(cpu_subfunc.kimd), test_kimd_asm_block, 17 }, + { "KLMD", cpu_subfunc.klmd, sizeof(cpu_subfunc.klmd), test_klmd_asm_block, 17 }, + /* MSA - Facility bit 77 */ + { "KMCTR", cpu_subfunc.kmctr, sizeof(cpu_subfunc.kmctr), test_kmctr_asm_block, 77 }, + { "KMF", cpu_subfunc.kmf, sizeof(cpu_subfunc.kmf), test_kmf_asm_block, 77 }, + { "KMO", cpu_subfunc.kmo, sizeof(cpu_subfunc.kmo), test_kmo_asm_block, 77 }, + { "PCC", cpu_subfunc.pcc, sizeof(cpu_subfunc.pcc), test_pcc_asm_block, 77 }, + /* MSA5 - Facility bit 57 */ + { "PPNO", cpu_subfunc.ppno, sizeof(cpu_subfunc.ppno), test_prno_asm_block, 57 }, + /* MSA8 - Facility bit 146 */ + { "KMA", cpu_subfunc.kma, sizeof(cpu_subfunc.kma), test_kma_asm_block, 146 }, + /* MSA9 - Facility bit 155 */ + { "KDSA", cpu_subfunc.kdsa, sizeof(cpu_subfunc.kdsa), test_kdsa_asm_block, 155 }, + /* SORTL - Facility bit 150 */ + { "SORTL", cpu_subfunc.sortl, sizeof(cpu_subfunc.sortl), test_sortl_asm_block, 150 }, + /* DFLTCC - Facility bit 151 */ + { "DFLTCC", cpu_subfunc.dfltcc, sizeof(cpu_subfunc.dfltcc), test_dfltcc_asm_block, 151 }, + /* Concurrent-function facility - Facility bit 201 */ + { "PFCR", cpu_subfunc.pfcr, sizeof(cpu_subfunc.pfcr), test_pfcr_asm_block, 201 }, +}; + +int main(int argc, char *argv[]) +{ + struct kvm_vm *vm; + int idx; + + ksft_print_header(); + + vm = vm_create(1); + + memset(&cpu_subfunc, 0, sizeof(cpu_subfunc)); + get_cpu_machine_subfuntions(vm, &cpu_subfunc); + + ksft_set_plan(ARRAY_SIZE(testlist)); + for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { + if (test_facility(testlist[idx].facility_bit)) { + u8 *array = malloc(testlist[idx].array_size); + + testlist[idx].test((u8 (*)[testlist[idx].array_size])array); + + TEST_ASSERT_EQ(memcmp(testlist[idx].subfunc_array, + array, testlist[idx].array_size), 0); + + ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); + free(array); + } else { + ksft_test_result_skip("%s feature is not avaialable\n", + testlist[idx].subfunc_name); + } + } + + kvm_vm_free(vm); + ksft_finished(); +} diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c index f257beec1430..0c112319dab1 100644 --- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -16,7 +16,11 @@ #include <linux/capability.h> #include <linux/sizes.h> +#define PGM_SEGMENT_TRANSLATION 0x10 + #define VM_MEM_SIZE (4 * SZ_1M) +#define VM_MEM_EXT_SIZE (2 * SZ_1M) +#define VM_MEM_MAX_M ((VM_MEM_SIZE + VM_MEM_EXT_SIZE) / SZ_1M) /* so directly declare capget to check caps without libcap */ int capget(cap_user_header_t header, cap_user_data_t data); @@ -58,6 +62,50 @@ asm("test_gprs_asm:\n" " j 0b\n" ); +/* Test program manipulating memory */ +extern char test_mem_asm[]; +asm("test_mem_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " xgr %r1,%r1\n" + " l %r1,0(%r5,%r6)\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + +/* Test program manipulating storage keys */ +extern char test_skey_asm[]; +asm("test_skey_asm:\n" + "xgr %r0, %r0\n" + + "0:\n" + " ahi %r0,1\n" + " st %r1,0(%r5,%r6)\n" + + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " sske %r1,%r6\n" + " xgr %r1,%r1\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " rrbe %r1,%r6\n" + " iske %r1,%r6\n" + " ahi %r0,1\n" + " diag 0,0,0x44\n" + + " j 0b\n" +); + FIXTURE(uc_kvm) { struct kvm_s390_sie_block *sie_block; @@ -67,6 +115,7 @@ FIXTURE(uc_kvm) uintptr_t base_hva; uintptr_t code_hva; int kvm_run_size; + vm_paddr_t pgd; void *vm_mem; int vcpu_fd; int kvm_fd; @@ -116,7 +165,7 @@ FIXTURE_SETUP(uc_kvm) self->base_gpa = 0; self->code_gpa = self->base_gpa + (3 * SZ_1M); - self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_MAX_M * SZ_1M); ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); self->base_hva = (uintptr_t)self->vm_mem; self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; @@ -222,16 +271,112 @@ TEST(uc_cap_hpage) close(kvm_fd); } -/* verify SIEIC exit +/* calculate host virtual addr from guest physical addr */ +static void *gpa2hva(FIXTURE_DATA(uc_kvm) *self, u64 gpa) +{ + return (void *)(self->base_hva - self->base_gpa + gpa); +} + +/* map / make additional memory available */ +static int uc_map_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); +} + +/* unmap previously mapped memory */ +static int uc_unmap_ext(FIXTURE_DATA(uc_kvm) *self, u64 vcpu_addr, u64 length) +{ + struct kvm_s390_ucas_mapping map = { + .user_addr = (u64)gpa2hva(self, vcpu_addr), + .vcpu_addr = vcpu_addr, + .length = length, + }; + pr_info("ucas unmap %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + return ioctl(self->vcpu_fd, KVM_S390_UCAS_UNMAP, &map); +} + +/* handle ucontrol exit by mapping the accessed segment */ +static void uc_handle_exit_ucontrol(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_run *run = self->run; + u64 seg_addr; + int rc; + + TEST_ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + switch (run->s390_ucontrol.pgm_code) { + case PGM_SEGMENT_TRANSLATION: + seg_addr = run->s390_ucontrol.trans_exc_code & ~(SZ_1M - 1); + pr_info("ucontrol pic segment translation 0x%llx, mapping segment 0x%lx\n", + run->s390_ucontrol.trans_exc_code, seg_addr); + /* map / make additional memory available */ + rc = uc_map_ext(self, seg_addr, SZ_1M); + TEST_ASSERT_EQ(0, rc); + break; + default: + TEST_FAIL("UNEXPECTED PGM CODE %d", run->s390_ucontrol.pgm_code); + } +} + +/* + * Handle the SIEIC exit + * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued + */ +static void uc_skey_enable(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* disable KSS */ + sie_block->cpuflags &= ~CPUSTAT_KSS; + /* disable skey inst interception */ + sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); +} + +/* + * Handle the instruction intercept + * Returns if interception is handled / execution can be continued + */ +static bool uc_handle_insn_ic(FIXTURE_DATA(uc_kvm) *self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + int ilen = insn_length(sie_block->ipa >> 8); + struct kvm_run *run = self->run; + + switch (run->s390_sieic.ipa) { + case 0xB229: /* ISKE */ + case 0xB22b: /* SSKE */ + case 0xB22a: /* RRBE */ + uc_skey_enable(self); + + /* rewind to reexecute intercepted instruction */ + run->psw_addr = run->psw_addr - ilen; + pr_info("rewind guest addr to 0x%.16llx\n", run->psw_addr); + return true; + default: + return false; + } +} + +/* + * Handle the SIEIC exit * * fail on codes not expected in the test cases + * Returns if interception is handled / execution can be continued */ -static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; struct kvm_run *run = self->run; /* check SIE interception code */ - pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + pr_info("sieic: 0x%.2x 0x%.4x 0x%.8x\n", run->s390_sieic.icptcode, run->s390_sieic.ipa, run->s390_sieic.ipb); @@ -239,7 +384,10 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) case ICPT_INST: /* end execution in caller on intercepted instruction */ pr_info("sie instruction interception\n"); - return false; + return uc_handle_insn_ic(self); + case ICPT_KSS: + uc_skey_enable(self); + return true; case ICPT_OPEREXC: /* operation exception */ TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); @@ -250,11 +398,17 @@ static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) } /* verify VM state on exit */ -static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) *self) { struct kvm_run *run = self->run; switch (run->exit_reason) { + case KVM_EXIT_S390_UCONTROL: + /** check program interruption code + * handle page fault --> ucas map + */ + uc_handle_exit_ucontrol(self); + break; case KVM_EXIT_S390_SIEIC: return uc_handle_sieic(self); default: @@ -264,7 +418,7 @@ static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) } /* run the VM until interrupted */ -static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +static int uc_run_once(FIXTURE_DATA(uc_kvm) *self) { int rc; @@ -275,7 +429,7 @@ static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) return rc; } -static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) *self) { struct kvm_s390_sie_block *sie_block = self->sie_block; @@ -286,6 +440,89 @@ static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) TEST_ASSERT_EQ(0x440000, sie_block->ipb); } +TEST_F(uc_kvm, uc_no_user_region) +{ + struct kvm_userspace_memory_region region = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + struct kvm_userspace_memory_region2 region2 = { + .slot = 1, + .guest_phys_addr = self->code_gpa, + .memory_size = VM_MEM_EXT_SIZE, + .userspace_addr = (uintptr_t)self->code_hva, + }; + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION, ®ion)); + ASSERT_EQ(EINVAL, errno); + + ASSERT_EQ(-1, ioctl(self->vm_fd, KVM_SET_USER_MEMORY_REGION2, ®ion2)); + ASSERT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_map_unmap) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + const u64 disp = 1; + int rc; + + /* copy test_mem_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_mem_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_mem_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* set register content for test_mem_asm to access not mapped memory*/ + sync_regs->gprs[1] = 0x55; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = VM_MEM_SIZE + disp; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* run and expect to fail with ucontrol pic segment translation */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + ASSERT_EQ(self->base_gpa + VM_MEM_SIZE, run->s390_ucontrol.trans_exc_code); + + /* fail to map memory with not segment aligned address */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE + disp, VM_MEM_EXT_SIZE); + ASSERT_GT(0, rc) + TH_LOG("ucas map for non segment address should fail but didn't; " + "result %d not expected, %s", rc, strerror(errno)); + + /* map / make additional memory available */ + rc = uc_map_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas map result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* assert registers and memory are in expected state */ + ASSERT_EQ(2, sync_regs->gprs[0]); + ASSERT_EQ(0x55, sync_regs->gprs[1]); + ASSERT_EQ(0x55, *(u32 *)gpa2hva(self, self->base_gpa + VM_MEM_SIZE + disp)); + + /* unmap and run loop again */ + rc = uc_unmap_ext(self, self->base_gpa + VM_MEM_SIZE, VM_MEM_EXT_SIZE); + ASSERT_EQ(0, rc) + TH_LOG("ucas unmap result %d not expected, %s", rc, strerror(errno)); + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(KVM_EXIT_S390_UCONTROL, run->exit_reason); + ASSERT_EQ(PGM_SEGMENT_TRANSLATION, run->s390_ucontrol.pgm_code); + /* handle ucontrol exit and remap memory after previous map and unmap */ + ASSERT_EQ(true, uc_handle_exit(self)); +} + TEST_F(uc_kvm, uc_gprs) { struct kvm_sync_regs *sync_regs = &self->run->s.regs; @@ -329,4 +566,73 @@ TEST_F(uc_kvm, uc_gprs) ASSERT_EQ(1, sync_regs->gprs[0]); } +TEST_F(uc_kvm, uc_skey) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + u64 test_vaddr = VM_MEM_SIZE - (SZ_1M / 2); + struct kvm_run *run = self->run; + const u8 skeyvalue = 0x34; + + /* copy test_skey_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_skey_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_skey_asm, PAGE_SIZE); + + /* set register content for test_skey_asm to access not mapped memory */ + sync_regs->gprs[1] = skeyvalue; + sync_regs->gprs[5] = self->base_gpa; + sync_regs->gprs[6] = test_vaddr; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(true, uc_handle_exit(self)); + ASSERT_EQ(1, sync_regs->gprs[0]); + + /* ISKE */ + ASSERT_EQ(0, uc_run_once(self)); + + /* + * Bail out and skip the test after uc_skey_enable was executed but iske + * is still intercepted. Instructions are not handled by the kernel. + * Thus there is no need to test this here. + */ + TEST_ASSERT_EQ(0, sie_block->cpuflags & CPUSTAT_KSS); + TEST_ASSERT_EQ(0, sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)); + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_REQUIRE(sie_block->ipa != 0xb229); + + /* ISKE contd. */ + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(2, sync_regs->gprs[0]); + /* assert initial skey (ACC = 0, R & C = 1) */ + ASSERT_EQ(0x06, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* SSKE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(3, sync_regs->gprs[0]); + ASSERT_EQ(skeyvalue, sync_regs->gprs[1]); + uc_assert_diag44(self); + + /* RRBE + ISKE */ + sync_regs->gprs[1] = skeyvalue; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + ASSERT_EQ(4, sync_regs->gprs[0]); + /* assert R reset but rest of skey unchanged */ + ASSERT_EQ(skeyvalue & 0xfa, sync_regs->gprs[1]); + ASSERT_EQ(0, sync_regs->gprs[1] & 0x04); + uc_assert_diag44(self); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c index 903940c54d2d..f4ce5a185a7d 100644 --- a/tools/testing/selftests/kvm/x86_64/amx_test.c +++ b/tools/testing/selftests/kvm/x86_64/amx_test.c @@ -86,6 +86,8 @@ static inline void __xsavec(struct xstate *xstate, uint64_t rfbm) static void check_xtile_info(void) { + GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); + GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0)); GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE); @@ -122,29 +124,12 @@ static void set_tilecfg(struct tile_config *cfg) } } -static void init_regs(void) -{ - uint64_t cr4, xcr0; - - GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE)); - - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xcr0 = xgetbv(0); - xcr0 |= XFEATURE_MASK_XTILE; - xsetbv(0x0, xcr0); - GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE); -} - static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg, struct tile_data *tiledata, struct xstate *xstate) { - init_regs(); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE) && + this_cpu_has(X86_FEATURE_OSXSAVE)); check_xtile_info(); GUEST_SYNC(1); diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c index 8c579ce714e9..7b3fda6842bc 100644 --- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c @@ -12,17 +12,16 @@ #include "kvm_util.h" #include "processor.h" -/* CPUIDs known to differ */ -struct { - u32 function; - u32 index; -} mangled_cpuids[] = { - /* - * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR, - * which are not controlled for by this test. - */ - {.function = 0xd, .index = 0}, - {.function = 0xd, .index = 1}, +struct cpuid_mask { + union { + struct { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + }; + u32 regs[4]; + }; }; static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid) @@ -56,17 +55,29 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid) GUEST_DONE(); } -static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie) +static struct cpuid_mask get_const_cpuid_mask(const struct kvm_cpuid_entry2 *entry) { - int i; - - for (i = 0; i < sizeof(mangled_cpuids); i++) { - if (mangled_cpuids[i].function == entrie->function && - mangled_cpuids[i].index == entrie->index) - return true; + struct cpuid_mask mask; + + memset(&mask, 0xff, sizeof(mask)); + + switch (entry->function) { + case 0x1: + mask.regs[X86_FEATURE_OSXSAVE.reg] &= ~BIT(X86_FEATURE_OSXSAVE.bit); + break; + case 0x7: + mask.regs[X86_FEATURE_OSPKE.reg] &= ~BIT(X86_FEATURE_OSPKE.bit); + break; + case 0xd: + /* + * CPUID.0xD.{0,1}.EBX enumerate XSAVE size based on the current + * XCR0 and IA32_XSS MSR values. + */ + if (entry->index < 2) + mask.ebx = 0; + break; } - - return false; + return mask; } static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, @@ -79,6 +90,8 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent); for (i = 0; i < cpuid1->nent; i++) { + struct cpuid_mask mask; + e1 = &cpuid1->entries[i]; e2 = &cpuid2->entries[i]; @@ -88,15 +101,19 @@ static void compare_cpuids(const struct kvm_cpuid2 *cpuid1, i, e1->function, e1->index, e1->flags, e2->function, e2->index, e2->flags); - if (is_cpuid_mangled(e1)) - continue; + /* Mask off dynamic bits, e.g. OSXSAVE, when comparing entries. */ + mask = get_const_cpuid_mask(e1); - TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx && - e1->ecx == e2->ecx && e1->edx == e2->edx, + TEST_ASSERT((e1->eax & mask.eax) == (e2->eax & mask.eax) && + (e1->ebx & mask.ebx) == (e2->ebx & mask.ebx) && + (e1->ecx & mask.ecx) == (e2->ecx & mask.ecx) && + (e1->edx & mask.edx) == (e2->edx & mask.edx), "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x", e1->function, e1->index, - e1->eax, e1->ebx, e1->ecx, e1->edx, - e2->eax, e2->ebx, e2->ecx, e2->edx); + e1->eax & mask.eax, e1->ebx & mask.ebx, + e1->ecx & mask.ecx, e1->edx & mask.edx, + e2->eax & mask.eax, e2->ebx & mask.ebx, + e2->ecx & mask.ecx, e2->edx & mask.edx); } } diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c index 624dc725e14d..28cc66454601 100644 --- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c +++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c @@ -19,30 +19,42 @@ #include "kvm_util.h" #include "processor.h" -static inline bool cr4_cpuid_is_sync(void) -{ - uint64_t cr4 = get_cr4(); - - return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE)); -} +#define MAGIC_HYPERCALL_PORT 0x80 static void guest_code(void) { - uint64_t cr4; + u32 regs[4] = { + [KVM_CPUID_EAX] = X86_FEATURE_OSXSAVE.function, + [KVM_CPUID_ECX] = X86_FEATURE_OSXSAVE.index, + }; - /* turn on CR4.OSXSAVE */ - cr4 = get_cr4(); - cr4 |= X86_CR4_OSXSAVE; - set_cr4(cr4); + /* CR4.OSXSAVE should be enabled by default (for selftests vCPUs). */ + GUEST_ASSERT(get_cr4() & X86_CR4_OSXSAVE); /* verify CR4.OSXSAVE == CPUID.OSXSAVE */ - GUEST_ASSERT(cr4_cpuid_is_sync()); - - /* notify hypervisor to change CR4 */ - GUEST_SYNC(0); - - /* check again */ - GUEST_ASSERT(cr4_cpuid_is_sync()); + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); + + /* + * Notify hypervisor to clear CR4.0SXSAVE, do CPUID and save output, + * and then restore CR4. Do this all in assembly to ensure no AVX + * instructions are executed while OSXSAVE=0. + */ + asm volatile ( + "out %%al, $" __stringify(MAGIC_HYPERCALL_PORT) "\n\t" + "cpuid\n\t" + "mov %%rdi, %%cr4\n\t" + : "+a" (regs[KVM_CPUID_EAX]), + "=b" (regs[KVM_CPUID_EBX]), + "+c" (regs[KVM_CPUID_ECX]), + "=d" (regs[KVM_CPUID_EDX]) + : "D" (get_cr4()) + ); + + /* Verify KVM cleared OSXSAVE in CPUID when it was cleared in CR4. */ + GUEST_ASSERT(!(regs[X86_FEATURE_OSXSAVE.reg] & BIT(X86_FEATURE_OSXSAVE.bit))); + + /* Verify restoring CR4 also restored OSXSAVE in CPUID. */ + GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); GUEST_DONE(); } @@ -62,13 +74,16 @@ int main(int argc, char *argv[]) vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); - switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: + if (vcpu->run->io.port == MAGIC_HYPERCALL_PORT && + vcpu->run->io.direction == KVM_EXIT_IO_OUT) { /* emulate hypervisor clearing CR4.OSXSAVE */ vcpu_sregs_get(vcpu, &sregs); sregs.cr4 &= ~X86_CR4_OSXSAVE; vcpu_sregs_set(vcpu, &sregs); - break; + continue; + } + + switch (get_ucall(vcpu, &uc)) { case UCALL_ABORT: REPORT_GUEST_ASSERT(uc); break; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index 76cc2df9238a..2d814c1d1dc4 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -166,7 +166,7 @@ int main(void) /* Test single step */ target_rip = CAST_TO_RIP(ss_start); target_dr6 = 0xffff4ff0ULL; - for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) { + for (i = 0; i < ARRAY_SIZE(ss_size); i++) { target_rip += ss_size[i]; memset(&debug, 0, sizeof(debug)); debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP | diff --git a/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c new file mode 100644 index 000000000000..a72f13ae2edb --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/feature_msrs_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020, Red Hat, Inc. + */ +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" + +static bool is_kvm_controlled_msr(uint32_t msr) +{ + return msr == MSR_IA32_VMX_CR0_FIXED1 || msr == MSR_IA32_VMX_CR4_FIXED1; +} + +/* + * For VMX MSRs with a "true" variant, KVM requires userspace to set the "true" + * MSR, and doesn't allow setting the hidden version. + */ +static bool is_hidden_vmx_msr(uint32_t msr) +{ + switch (msr) { + case MSR_IA32_VMX_PINBASED_CTLS: + case MSR_IA32_VMX_PROCBASED_CTLS: + case MSR_IA32_VMX_EXIT_CTLS: + case MSR_IA32_VMX_ENTRY_CTLS: + return true; + default: + return false; + } +} + +static bool is_quirked_msr(uint32_t msr) +{ + return msr != MSR_AMD64_DE_CFG; +} + +static void test_feature_msr(uint32_t msr) +{ + const uint64_t supported_mask = kvm_get_feature_msr(msr); + uint64_t reset_value = is_quirked_msr(msr) ? supported_mask : 0; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + /* + * Don't bother testing KVM-controlled MSRs beyond verifying that the + * MSR can be read from userspace. Any value is effectively legal, as + * KVM is bound by x86 architecture, not by ABI. + */ + if (is_kvm_controlled_msr(msr)) + return; + + /* + * More goofy behavior. KVM reports the host CPU's actual revision ID, + * but initializes the vCPU's revision ID to an arbitrary value. + */ + if (msr == MSR_IA32_UCODE_REV) + reset_value = host_cpu_is_intel ? 0x100000000ULL : 0x01000065; + + /* + * For quirked MSRs, KVM's ABI is to initialize the vCPU's value to the + * full set of features supported by KVM. For non-quirked MSRs, and + * when the quirk is disabled, KVM must zero-initialize the MSR and let + * userspace do the configuration. + */ + vm = vm_create_with_one_vcpu(&vcpu, NULL); + TEST_ASSERT(vcpu_get_msr(vcpu, msr) == reset_value, + "Wanted 0x%lx for %squirked MSR 0x%x, got 0x%lx", + reset_value, is_quirked_msr(msr) ? "" : "non-", msr, + vcpu_get_msr(vcpu, msr)); + if (!is_hidden_vmx_msr(msr)) + vcpu_set_msr(vcpu, msr, supported_mask); + kvm_vm_free(vm); + + if (is_hidden_vmx_msr(msr)) + return; + + if (!kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2) || + !(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + return; + + vm = vm_create(1); + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_STUFF_FEATURE_MSRS); + + vcpu = vm_vcpu_add(vm, 0, NULL); + TEST_ASSERT(!vcpu_get_msr(vcpu, msr), + "Quirk disabled, wanted '0' for MSR 0x%x, got 0x%lx", + msr, vcpu_get_msr(vcpu, msr)); + kvm_vm_free(vm); +} + +int main(int argc, char *argv[]) +{ + const struct kvm_msr_list *feature_list; + int i; + + /* + * Skip the entire test if MSR_FEATURES isn't supported, other tests + * will cover the "regular" list of MSRs, the coverage here is purely + * opportunistic and not interesting on its own. + */ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); + + (void)kvm_get_msr_index_list(); + + feature_list = kvm_get_feature_msr_index_list(); + for (i = 0; i < feature_list->nmsrs; i++) + test_feature_msr(feature_list->indices[i]); +} diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c deleted file mode 100644 index d09b3cbcadc6..000000000000 --- a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Test that KVM_GET_MSR_INDEX_LIST and - * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended - * - * Copyright (C) 2020, Red Hat, Inc. - */ -#include <fcntl.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/ioctl.h> - -#include "test_util.h" -#include "kvm_util.h" -#include "processor.h" - -int main(int argc, char *argv[]) -{ - const struct kvm_msr_list *feature_list; - int i; - - /* - * Skip the entire test if MSR_FEATURES isn't supported, other tests - * will cover the "regular" list of MSRs, the coverage here is purely - * opportunistic and not interesting on its own. - */ - TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES)); - - (void)kvm_get_msr_index_list(); - - feature_list = kvm_get_feature_msr_index_list(); - for (i = 0; i < feature_list->nmsrs; i++) - kvm_get_feature_msr(feature_list->indices[i]); -} diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c index eda88080c186..9cbf283ebc55 100644 --- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c +++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c @@ -72,8 +72,6 @@ int main(int argc, char *argv[]) } done: - vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info); - kvm_vm_free(vm); return 0; diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 2e9197eb1652..ae77698e6e97 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -41,8 +41,8 @@ static void guest_sev_code(void) /* Stash state passed via VMSA before any compiled code runs. */ extern void guest_code_xsave(void); asm("guest_code_xsave:\n" - "mov $-1, %eax\n" - "mov $-1, %edx\n" + "mov $" __stringify(XFEATURE_MASK_X87_AVX) ", %eax\n" + "xor %edx, %edx\n" "xsave (%rdi)\n" "jmp guest_sev_es_code"); @@ -70,12 +70,6 @@ static void test_sync_vmsa(uint32_t policy) double x87val = M_PI; struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 }; - struct kvm_sregs sregs; - struct kvm_xcrs xcrs = { - .nr_xcrs = 1, - .xcrs[0].xcr = 0, - .xcrs[0].value = XFEATURE_MASK_X87_AVX, - }; vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu); gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR, @@ -84,11 +78,6 @@ static void test_sync_vmsa(uint32_t policy) vcpu_args_set(vcpu, 1, gva); - vcpu_sregs_get(vcpu, &sregs); - sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE; - vcpu_sregs_set(vcpu, &sregs); - - vcpu_xcrs_set(vcpu, &xcrs); asm("fninit\n" "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n" "fldl %3\n" @@ -192,6 +181,8 @@ static void test_sev_es_shutdown(void) int main(int argc, char *argv[]) { + const u64 xf_mask = XFEATURE_MASK_X87_AVX; + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); test_sev(guest_sev_code, SEV_POLICY_NO_DBG); @@ -204,7 +195,7 @@ int main(int argc, char *argv[]) test_sev_es_shutdown(); if (kvm_has_cap(KVM_CAP_XCRS) && - (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { + (xgetbv(0) & kvm_cpu_supported_xcr0() & xf_mask) == xf_mask) { test_sync_vmsa(0); test_sync_vmsa(SEV_POLICY_NO_DBG); } diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c index 1c756db329e5..141b7fc0c965 100644 --- a/tools/testing/selftests/kvm/x86_64/state_test.c +++ b/tools/testing/selftests/kvm/x86_64/state_test.c @@ -145,11 +145,6 @@ static void __attribute__((__flatten__)) guest_code(void *arg) memset(buffer, 0xcc, sizeof(buffer)); - set_cr4(get_cr4() | X86_CR4_OSXSAVE); - GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE)); - - xsetbv(0, xgetbv(0) | supported_xcr0); - /* * Modify state for all supported xfeatures to take them out of * their "init" state, i.e. to make them show up in XSTATE_BV. diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index 7c92536551cc..a1f5ff45d518 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -207,6 +207,29 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code) TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); } +KVM_ONE_VCPU_TEST(vmx_pmu_caps, perf_capabilities_unsupported, guest_code) +{ + uint64_t val; + int i, r; + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, host_cap.capabilities); + + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_PDCM); + + val = vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES); + TEST_ASSERT_EQ(val, 0); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(i)); + TEST_ASSERT(!r, "Setting PERF_CAPABILITIES bit %d (= 0x%llx) should fail without PDCM", + i, BIT_ULL(i)); + } +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_is_pmu_enabled()); diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c index 95ce192d0753..c8a5c5e51661 100644 --- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c +++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c @@ -48,16 +48,16 @@ do { \ static void guest_code(void) { - uint64_t xcr0_reset; + uint64_t initial_xcr0; uint64_t supported_xcr0; int i, vector; set_cr4(get_cr4() | X86_CR4_OSXSAVE); - xcr0_reset = xgetbv(0); + initial_xcr0 = xgetbv(0); supported_xcr0 = this_cpu_supported_xcr0(); - GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP); + GUEST_ASSERT(initial_xcr0 == supported_xcr0); /* Check AVX */ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, @@ -79,6 +79,11 @@ static void guest_code(void) ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, XFEATURE_MASK_XTILE); + vector = xsetbv_safe(0, XFEATURE_MASK_FP); + __GUEST_ASSERT(!vector, + "Expected success on XSETBV(FP), got vector '0x%x'", + vector); + vector = xsetbv_safe(0, supported_xcr0); __GUEST_ASSERT(!vector, "Expected success on XSETBV(0x%lx), got vector '0x%x'", diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile index 35418a4790be..a080eb54a215 100644 --- a/tools/testing/selftests/livepatch/Makefile +++ b/tools/testing/selftests/livepatch/Makefile @@ -10,7 +10,8 @@ TEST_PROGS := \ test-state.sh \ test-ftrace.sh \ test-sysfs.sh \ - test-syscall.sh + test-syscall.sh \ + test-kprobe.sh TEST_FILES := settings diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index fc4c6a016d38..e5d06fb40233 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -6,7 +6,10 @@ MAX_RETRIES=600 RETRY_INTERVAL=".1" # seconds -KLP_SYSFS_DIR="/sys/kernel/livepatch" +SYSFS_KERNEL_DIR="/sys/kernel" +SYSFS_KLP_DIR="$SYSFS_KERNEL_DIR/livepatch" +SYSFS_DEBUG_DIR="$SYSFS_KERNEL_DIR/debug" +SYSFS_KPROBES_DIR="$SYSFS_DEBUG_DIR/kprobes" # Kselftest framework requirement - SKIP code is 4 ksft_skip=4 @@ -55,22 +58,26 @@ function die() { } function push_config() { - DYNAMIC_DEBUG=$(grep '^kernel/livepatch' /sys/kernel/debug/dynamic_debug/control | \ + DYNAMIC_DEBUG=$(grep '^kernel/livepatch' "$SYSFS_DEBUG_DIR/dynamic_debug/control" | \ awk -F'[: ]' '{print "file " $1 " line " $2 " " $4}') FTRACE_ENABLED=$(sysctl --values kernel.ftrace_enabled) + KPROBE_ENABLED=$(cat "$SYSFS_KPROBES_DIR/enabled") } function pop_config() { if [[ -n "$DYNAMIC_DEBUG" ]]; then - echo -n "$DYNAMIC_DEBUG" > /sys/kernel/debug/dynamic_debug/control + echo -n "$DYNAMIC_DEBUG" > "$SYSFS_DEBUG_DIR/dynamic_debug/control" fi if [[ -n "$FTRACE_ENABLED" ]]; then sysctl kernel.ftrace_enabled="$FTRACE_ENABLED" &> /dev/null fi + if [[ -n "$KPROBE_ENABLED" ]]; then + echo "$KPROBE_ENABLED" > "$SYSFS_KPROBES_DIR/enabled" + fi } function set_dynamic_debug() { - cat <<-EOF > /sys/kernel/debug/dynamic_debug/control + cat <<-EOF > "$SYSFS_DEBUG_DIR/dynamic_debug/control" file kernel/livepatch/* +p func klp_try_switch_task -p EOF @@ -183,7 +190,7 @@ function load_lp_nowait() { __load_mod "$mod" "$@" # Wait for livepatch in sysfs ... - loop_until '[[ -e "/sys/kernel/livepatch/$mod" ]]' || + loop_until '[[ -e "$SYSFS_KLP_DIR/$mod" ]]' || die "failed to load module $mod (sysfs)" } @@ -196,7 +203,7 @@ function load_lp() { load_lp_nowait "$mod" "$@" # Wait until the transition finishes ... - loop_until 'grep -q '^0$' /sys/kernel/livepatch/$mod/transition' || + loop_until 'grep -q '^0$' $SYSFS_KLP_DIR/$mod/transition' || die "failed to complete transition" } @@ -246,12 +253,12 @@ function unload_lp() { function disable_lp() { local mod="$1" - log "% echo 0 > /sys/kernel/livepatch/$mod/enabled" - echo 0 > /sys/kernel/livepatch/"$mod"/enabled + log "% echo 0 > $SYSFS_KLP_DIR/$mod/enabled" + echo 0 > "$SYSFS_KLP_DIR/$mod/enabled" # Wait until the transition finishes and the livepatch gets # removed from sysfs... - loop_until '[[ ! -e "/sys/kernel/livepatch/$mod" ]]' || + loop_until '[[ ! -e "$SYSFS_KLP_DIR/$mod" ]]' || die "failed to disable livepatch $mod" } @@ -322,7 +329,7 @@ function check_sysfs_rights() { local rel_path="$1"; shift local expected_rights="$1"; shift - local path="$KLP_SYSFS_DIR/$mod/$rel_path" + local path="$SYSFS_KLP_DIR/$mod/$rel_path" local rights=$(/bin/stat --format '%A' "$path") if test "$rights" != "$expected_rights" ; then die "Unexpected access rights of $path: $expected_rights vs. $rights" @@ -338,7 +345,7 @@ function check_sysfs_value() { local rel_path="$1"; shift local expected_value="$1"; shift - local path="$KLP_SYSFS_DIR/$mod/$rel_path" + local path="$SYSFS_KLP_DIR/$mod/$rel_path" local value=`cat $path` if test "$value" != "$expected_value" ; then die "Unexpected value in $path: $expected_value vs. $value" diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh index 32b150e25b10..37bbc3fb2780 100755 --- a/tools/testing/selftests/livepatch/test-callbacks.sh +++ b/tools/testing/selftests/livepatch/test-callbacks.sh @@ -46,7 +46,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux $MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state @@ -94,7 +94,7 @@ livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET' $MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init $MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init $MOD_TARGET: ${MOD_TARGET}_init -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_LIVE] Normal state @@ -146,7 +146,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -195,7 +195,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -227,7 +227,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -310,7 +310,7 @@ $MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full fo livepatch: pre-patch callback failed for object '$MOD_TARGET' livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET' insmod: ERROR: could not insert module test_modules/$MOD_TARGET.ko: No such device -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -364,7 +364,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: pre_unpatch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state @@ -412,7 +412,7 @@ load_lp_nowait $MOD_LIVEPATCH # Wait until the livepatch reports in-transition state, i.e. that it's # stalled on $MOD_TARGET_BUSY::busymod_work_func() -loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' || +loop_until 'grep -q '^1$' $SYSFS_KLP_DIR/$MOD_LIVEPATCH/transition' || die "failed to stall transition" load_mod $MOD_TARGET @@ -438,7 +438,7 @@ $MOD_TARGET: ${MOD_TARGET}_init $MOD_TARGET: ${MOD_TARGET}_exit livepatch: reverting patch '$MOD_LIVEPATCH' on unloading module '$MOD_TARGET' $MOD_LIVEPATCH: post_unpatch_callback: $MOD_TARGET -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': reversing transition from patching to unpatching livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -483,14 +483,14 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': starting unpatching transition livepatch: '$MOD_LIVEPATCH2': completing unpatching transition $MOD_LIVEPATCH2: post_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': unpatching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH': starting unpatching transition @@ -539,7 +539,7 @@ livepatch: '$MOD_LIVEPATCH2': starting patching transition livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux livepatch: '$MOD_LIVEPATCH2': starting unpatching transition diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh index 730218bce99c..fe14f248913a 100755 --- a/tools/testing/selftests/livepatch/test-ftrace.sh +++ b/tools/testing/selftests/livepatch/test-ftrace.sh @@ -53,7 +53,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-kprobe.sh b/tools/testing/selftests/livepatch/test-kprobe.sh new file mode 100755 index 000000000000..115065156016 --- /dev/null +++ b/tools/testing/selftests/livepatch/test-kprobe.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2024 SUSE +# Author: Michael Vetter <mvetter@suse.com> + +. $(dirname $0)/functions.sh + +MOD_LIVEPATCH=test_klp_livepatch +MOD_KPROBE=test_klp_kprobe + +setup_config + +# Kprobe a function and verify that we can't livepatch that same function +# when it uses a post_handler since only one IPMODIFY maybe be registered +# to any given function at a time. + +start_test "livepatch interaction with kprobed function with post_handler" + +echo 1 > "$SYSFS_KPROBES_DIR/enabled" + +load_mod $MOD_KPROBE has_post_handler=true +load_failing_mod $MOD_LIVEPATCH +unload_mod $MOD_KPROBE + +check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=true +% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16) +livepatch: failed to patch object 'vmlinux' +livepatch: failed to enable patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy +% rmmod test_klp_kprobe" + +start_test "livepatch interaction with kprobed function without post_handler" + +load_mod $MOD_KPROBE has_post_handler=false +load_lp $MOD_LIVEPATCH + +unload_mod $MOD_KPROBE +disable_lp $MOD_LIVEPATCH +unload_lp $MOD_LIVEPATCH + +check_result "% insmod test_modules/test_klp_kprobe.ko has_post_handler=false +% insmod test_modules/$MOD_LIVEPATCH.ko +livepatch: enabling patch '$MOD_LIVEPATCH' +livepatch: '$MOD_LIVEPATCH': initializing patching transition +livepatch: '$MOD_LIVEPATCH': starting patching transition +livepatch: '$MOD_LIVEPATCH': completing patching transition +livepatch: '$MOD_LIVEPATCH': patching complete +% rmmod test_klp_kprobe +% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +livepatch: '$MOD_LIVEPATCH': initializing unpatching transition +livepatch: '$MOD_LIVEPATCH': starting unpatching transition +livepatch: '$MOD_LIVEPATCH': completing unpatching transition +livepatch: '$MOD_LIVEPATCH': unpatching complete +% rmmod $MOD_LIVEPATCH" + +exit 0 diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh index bd13257bfdfe..6673023d2b66 100755 --- a/tools/testing/selftests/livepatch/test-livepatch.sh +++ b/tools/testing/selftests/livepatch/test-livepatch.sh @@ -39,7 +39,7 @@ livepatch: '$MOD_LIVEPATCH1': initializing patching transition livepatch: '$MOD_LIVEPATCH1': starting patching transition livepatch: '$MOD_LIVEPATCH1': completing patching transition livepatch: '$MOD_LIVEPATCH1': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition livepatch: '$MOD_LIVEPATCH1': starting unpatching transition livepatch: '$MOD_LIVEPATCH1': completing unpatching transition @@ -92,14 +92,14 @@ livepatch: '$MOD_REPLACE': completing patching transition livepatch: '$MOD_REPLACE': patching complete $MOD_LIVEPATCH1: this has been live patched $MOD_REPLACE: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition livepatch: '$MOD_REPLACE': unpatching complete % rmmod $MOD_REPLACE $MOD_LIVEPATCH1: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH1/enabled livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition livepatch: '$MOD_LIVEPATCH1': starting unpatching transition livepatch: '$MOD_LIVEPATCH1': completing unpatching transition @@ -128,7 +128,7 @@ for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do load_lp "$mod" done -mods=(/sys/kernel/livepatch/*) +mods=($SYSFS_KLP_DIR/*) nmods=${#mods[@]} if [ "$nmods" -ne 3 ]; then die "Expecting three modules listed, found $nmods" @@ -139,7 +139,7 @@ load_lp $MOD_REPLACE replace=1 grep 'live patched' /proc/cmdline > /dev/kmsg grep 'live patched' /proc/meminfo > /dev/kmsg -loop_until 'mods=(/sys/kernel/livepatch/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || +loop_until 'mods=($SYSFS_KLP_DIR/*); nmods=${#mods[@]}; [[ "$nmods" -eq 1 ]]' || die "Expecting only one moduled listed, found $nmods" # These modules were disabled by the atomic replace @@ -188,7 +188,7 @@ $MOD_REPLACE: this has been live patched % rmmod $MOD_LIVEPATCH2 % rmmod $MOD_LIVEPATCH1 $MOD_REPLACE: this has been live patched -% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_REPLACE/enabled livepatch: '$MOD_REPLACE': initializing unpatching transition livepatch: '$MOD_REPLACE': starting unpatching transition livepatch: '$MOD_REPLACE': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh index 10a52ac06185..04b66380f8a0 100755 --- a/tools/testing/selftests/livepatch/test-state.sh +++ b/tools/testing/selftests/livepatch/test-state.sh @@ -29,7 +29,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition $MOD_LIVEPATCH: post_patch_callback: vmlinux $MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition $MOD_LIVEPATCH: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH: restore_console_loglevel: restoring console_loglevel @@ -72,7 +72,7 @@ $MOD_LIVEPATCH2: post_patch_callback: vmlinux $MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change livepatch: '$MOD_LIVEPATCH2': patching complete % rmmod $MOD_LIVEPATCH -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel @@ -127,7 +127,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition $MOD_LIVEPATCH2: post_patch_callback: vmlinux $MOD_LIVEPATCH2: fix_console_loglevel: taking over the console_loglevel change livepatch: '$MOD_LIVEPATCH2': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel @@ -162,7 +162,7 @@ livepatch: '$MOD_LIVEPATCH2': patching complete % insmod test_modules/$MOD_LIVEPATCH.ko livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches. insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Invalid parameters -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition $MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux $MOD_LIVEPATCH2: restore_console_loglevel: restoring console_loglevel diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh index 289eb7d4c4b3..5f9344277b62 100755 --- a/tools/testing/selftests/livepatch/test-syscall.sh +++ b/tools/testing/selftests/livepatch/test-syscall.sh @@ -27,9 +27,9 @@ pid_list=$(echo ${pids[@]} | tr ' ' ',') load_lp $MOD_SYSCALL klp_pids=$pid_list # wait for all tasks to transition to patched state -loop_until 'grep -q '^0$' /sys/kernel/test_klp_syscall/npids' +loop_until 'grep -q '^0$' $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids' -pending_pids=$(cat /sys/kernel/test_klp_syscall/npids) +pending_pids=$(cat $SYSFS_KERNEL_DIR/$MOD_SYSCALL/npids) log "$MOD_SYSCALL: Remaining not livepatched processes: $pending_pids" for pid in ${pids[@]}; do @@ -46,7 +46,7 @@ livepatch: '$MOD_SYSCALL': starting patching transition livepatch: '$MOD_SYSCALL': completing patching transition livepatch: '$MOD_SYSCALL': patching complete $MOD_SYSCALL: Remaining not livepatched processes: 0 -% echo 0 > /sys/kernel/livepatch/$MOD_SYSCALL/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_SYSCALL/enabled livepatch: '$MOD_SYSCALL': initializing unpatching transition livepatch: '$MOD_SYSCALL': starting unpatching transition livepatch: '$MOD_SYSCALL': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh index 05a14f5a7bfb..2c91428d2997 100755 --- a/tools/testing/selftests/livepatch/test-sysfs.sh +++ b/tools/testing/selftests/livepatch/test-sysfs.sh @@ -34,7 +34,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -75,7 +75,7 @@ test_klp_callbacks_mod: test_klp_callbacks_mod_exit test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away livepatch: reverting patch 'test_klp_callbacks_demo' on unloading module 'test_klp_callbacks_mod' test_klp_callbacks_demo: post_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away -% echo 0 > /sys/kernel/livepatch/test_klp_callbacks_demo/enabled +% echo 0 > $SYSFS_KLP_DIR/test_klp_callbacks_demo/enabled livepatch: 'test_klp_callbacks_demo': initializing unpatching transition test_klp_callbacks_demo: pre_unpatch_callback: vmlinux livepatch: 'test_klp_callbacks_demo': starting unpatching transition @@ -101,7 +101,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition @@ -124,7 +124,7 @@ livepatch: '$MOD_LIVEPATCH': initializing patching transition livepatch: '$MOD_LIVEPATCH': starting patching transition livepatch: '$MOD_LIVEPATCH': completing patching transition livepatch: '$MOD_LIVEPATCH': patching complete -% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled +% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled livepatch: '$MOD_LIVEPATCH': initializing unpatching transition livepatch: '$MOD_LIVEPATCH': starting unpatching transition livepatch: '$MOD_LIVEPATCH': completing unpatching transition diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile index e6e638c4bcba..939230e571f5 100644 --- a/tools/testing/selftests/livepatch/test_modules/Makefile +++ b/tools/testing/selftests/livepatch/test_modules/Makefile @@ -6,11 +6,12 @@ obj-m += test_klp_atomic_replace.o \ test_klp_callbacks_demo.o \ test_klp_callbacks_demo2.o \ test_klp_callbacks_mod.o \ + test_klp_kprobe.o \ test_klp_livepatch.o \ + test_klp_shadow_vars.o \ test_klp_state.o \ test_klp_state2.o \ test_klp_state3.o \ - test_klp_shadow_vars.o \ test_klp_syscall.o # Ensure that KDIR exists, otherwise skip the compilation diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c new file mode 100644 index 000000000000..67a8d29012f6 --- /dev/null +++ b/tools/testing/selftests/livepatch/test_modules/test_klp_kprobe.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2024 Marcos Paulo de Souza <mpdesouza@suse.com> +// Copyright (C) 2024 Michael Vetter <mvetter@suse.com> + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kprobes.h> + +static bool has_post_handler = true; +module_param(has_post_handler, bool, 0444); + +static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ +} + +static struct kprobe kp = { + .symbol_name = "cmdline_proc_show", +}; + +static int __init kprobe_init(void) +{ + if (has_post_handler) + kp.post_handler = post_handler; + + return register_kprobe(&kp); +} + +static void __exit kprobe_exit(void) +{ + unregister_kprobe(&kp); +} + +module_init(kprobe_init) +module_exit(kprobe_exit) +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Michael Vetter <mvetter@suse.com>"); +MODULE_DESCRIPTION("Livepatch test: kprobe function"); diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore index da030b43e43b..8f01f4da1c0d 100644 --- a/tools/testing/selftests/mm/.gitignore +++ b/tools/testing/selftests/mm/.gitignore @@ -51,3 +51,7 @@ hugetlb_madv_vs_map mseal_test seal_elf droppable +hugetlb_dio +pkey_sighandler_tests_32 +pkey_sighandler_tests_64 +guard-pages diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 02e1204971b0..3de23ea4663f 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -36,6 +36,17 @@ MAKEFLAGS += --no-builtin-rules CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES) LDLIBS = -lrt -lpthread -lm +KDIR ?= /lib/modules/$(shell uname -r)/build +ifneq (,$(wildcard $(KDIR)/Module.symvers)) +ifneq (,$(wildcard $(KDIR)/include/linux/page_frag_cache.h)) +TEST_GEN_MODS_DIR := page_frag +else +PAGE_FRAG_WARNING = "missing page_frag_cache.h, please use a newer kernel" +endif +else +PAGE_FRAG_WARNING = "missing Module.symvers, please have the kernel built first" +endif + TEST_GEN_FILES = cow TEST_GEN_FILES += compaction_test TEST_GEN_FILES += gup_longterm @@ -79,6 +90,7 @@ TEST_GEN_FILES += hugetlb_fault_after_madv TEST_GEN_FILES += hugetlb_madv_vs_map TEST_GEN_FILES += hugetlb_dio TEST_GEN_FILES += droppable +TEST_GEN_FILES += guard-pages ifneq ($(ARCH),arm64) TEST_GEN_FILES += soft-dirty @@ -105,17 +117,19 @@ endif ifeq ($(CAN_BUILD_X86_64),1) TEST_GEN_FILES += $(BINARIES_64) endif -else -ifneq (,$(filter $(ARCH),arm64 powerpc)) +else ifeq ($(ARCH),arm64) +TEST_GEN_FILES += protection_keys +TEST_GEN_FILES += pkey_sighandler_tests +else ifeq ($(ARCH),powerpc) TEST_GEN_FILES += protection_keys -endif - endif ifneq (,$(filter $(ARCH),arm64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch +ifneq ($(ARCH),riscv64) TEST_GEN_FILES += virtual_address_range +endif TEST_GEN_FILES += write_to_hugetlbfs endif @@ -126,6 +140,7 @@ TEST_FILES += test_hmm.sh TEST_FILES += va_high_addr_switch.sh TEST_FILES += charge_reserved_hugetlb.sh TEST_FILES += hugetlb_reparenting_test.sh +TEST_FILES += test_page_frag.sh # required by charge_reserved_hugetlb.sh TEST_FILES += write_hugetlb_memory.sh @@ -211,3 +226,12 @@ warn_missing_liburing: echo "Warning: missing liburing support. Some tests will be skipped." ; \ echo endif + +ifneq ($(PAGE_FRAG_WARNING),) +all: warn_missing_page_frag + +warn_missing_page_frag: + @echo ; \ + echo "Warning: $(PAGE_FRAG_WARNING). page_frag test will be skipped." ; \ + echo +endif diff --git a/tools/testing/selftests/mm/guard-pages.c b/tools/testing/selftests/mm/guard-pages.c new file mode 100644 index 000000000000..7cdf815d0d63 --- /dev/null +++ b/tools/testing/selftests/mm/guard-pages.c @@ -0,0 +1,1243 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +#define _GNU_SOURCE +#include "../kselftest_harness.h" +#include <asm-generic/mman.h> /* Force the import of the tools version. */ +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/userfaultfd.h> +#include <setjmp.h> +#include <signal.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/syscall.h> +#include <sys/uio.h> +#include <unistd.h> + +/* + * Ignore the checkpatch warning, as per the C99 standard, section 7.14.1.1: + * + * "If the signal occurs other than as the result of calling the abort or raise + * function, the behavior is undefined if the signal handler refers to any + * object with static storage duration other than by assigning a value to an + * object declared as volatile sig_atomic_t" + */ +static volatile sig_atomic_t signal_jump_set; +static sigjmp_buf signal_jmp_buf; + +/* + * Ignore the checkpatch warning, we must read from x but don't want to do + * anything with it in order to trigger a read page fault. We therefore must use + * volatile to stop the compiler from optimising this away. + */ +#define FORCE_READ(x) (*(volatile typeof(x) *)x) + +static int userfaultfd(int flags) +{ + return syscall(SYS_userfaultfd, flags); +} + +static void handle_fatal(int c) +{ + if (!signal_jump_set) + return; + + siglongjmp(signal_jmp_buf, c); +} + +static int pidfd_open(pid_t pid, unsigned int flags) +{ + return syscall(SYS_pidfd_open, pid, flags); +} + +/* + * Enable our signal catcher and try to read/write the specified buffer. The + * return value indicates whether the read/write succeeds without a fatal + * signal. + */ +static bool try_access_buf(char *ptr, bool write) +{ + bool failed; + + /* Tell signal handler to jump back here on fatal signal. */ + signal_jump_set = true; + /* If a fatal signal arose, we will jump back here and failed is set. */ + failed = sigsetjmp(signal_jmp_buf, 0) != 0; + + if (!failed) { + if (write) + *ptr = 'x'; + else + FORCE_READ(ptr); + } + + signal_jump_set = false; + return !failed; +} + +/* Try and read from a buffer, return true if no fatal signal. */ +static bool try_read_buf(char *ptr) +{ + return try_access_buf(ptr, false); +} + +/* Try and write to a buffer, return true if no fatal signal. */ +static bool try_write_buf(char *ptr) +{ + return try_access_buf(ptr, true); +} + +/* + * Try and BOTH read from AND write to a buffer, return true if BOTH operations + * succeed. + */ +static bool try_read_write_buf(char *ptr) +{ + return try_read_buf(ptr) && try_write_buf(ptr); +} + +FIXTURE(guard_pages) +{ + unsigned long page_size; +}; + +FIXTURE_SETUP(guard_pages) +{ + struct sigaction act = { + .sa_handler = &handle_fatal, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + if (sigaction(SIGSEGV, &act, NULL)) + ksft_exit_fail_perror("sigaction"); + + self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); +}; + +FIXTURE_TEARDOWN(guard_pages) +{ + struct sigaction act = { + .sa_handler = SIG_DFL, + .sa_flags = SA_NODEFER, + }; + + sigemptyset(&act.sa_mask); + sigaction(SIGSEGV, &act, NULL); +} + +TEST_F(guard_pages, basic) +{ + const unsigned long NUM_PAGES = 10; + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, NUM_PAGES * page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Trivially assert we can touch the first page. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + + /* Establish that 1st page SIGSEGV's. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* Ensure we can touch everything else.*/ + for (i = 1; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Establish a guard page at the end of the mapping. */ + ASSERT_EQ(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_INSTALL), 0); + + /* Check that both guard pages result in SIGSEGV. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* Remove the first guard page. */ + ASSERT_FALSE(madvise(ptr, page_size, MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(ptr)); + + /* Remove the last guard page. */ + ASSERT_FALSE(madvise(&ptr[(NUM_PAGES - 1) * page_size], page_size, + MADV_GUARD_REMOVE)); + + /* Make sure we can touch it. */ + ASSERT_TRUE(try_read_write_buf(&ptr[(NUM_PAGES - 1) * page_size])); + + /* + * Test setting a _range_ of pages, namely the first 3. The first of + * these be faulted in, so this also tests that we can install guard + * pages over backed pages. + */ + ASSERT_EQ(madvise(ptr, 3 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure they are all guard pages. */ + for (i = 0; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Make sure the rest are not. */ + for (i = 3; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Remove guard pages. */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + /* Now make sure we can touch everything. */ + for (i = 0; i < NUM_PAGES; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * Now remove all guard pages, make sure we don't remove existing + * entries. + */ + ASSERT_EQ(madvise(ptr, NUM_PAGES * page_size, MADV_GUARD_REMOVE), 0); + + for (i = 0; i < NUM_PAGES * page_size; i += page_size) { + char chr = ptr[i]; + + ASSERT_EQ(chr, 'x'); + } + + ASSERT_EQ(munmap(ptr, NUM_PAGES * page_size), 0); +} + +/* Assert that operations applied across multiple VMAs work as expected. */ +TEST_F(guard_pages, multi_vma) +{ + const unsigned long page_size = self->page_size; + char *ptr_region, *ptr, *ptr1, *ptr2, *ptr3; + int i; + + /* Reserve a 100 page region over which we can install VMAs. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* Place a VMA of 10 pages size at the start of the region. */ + ptr1 = mmap(ptr_region, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + + /* Place a VMA of 5 pages size 50 pages into the region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + + /* Place a VMA of 20 pages size at the end of the region. */ + ptr3 = mmap(&ptr_region[80 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + + /* Unmap gaps. */ + ASSERT_EQ(munmap(&ptr_region[10 * page_size], 40 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 25 * page_size), 0); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---] [---] [---] + */ + + /* + * Now mark the whole range as guard pages and make sure all VMAs are as + * such. + */ + + /* + * madvise() is certifiable and lets you perform operations over gaps, + * everything works, but it indicates an error and errno is set to + * -ENOMEM. Also if anything runs out of memory it is set to + * -ENOMEM. You are meant to guess which is which. + */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now remove guar pages over range and assert the opposite. */ + + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), -1); + ASSERT_EQ(errno, ENOMEM); + + for (i = 0; i < 10; i++) { + char *curr = &ptr1[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 5; i++) { + char *curr = &ptr2[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + for (i = 0; i < 20; i++) { + char *curr = &ptr3[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Now map incompatible VMAs in the gaps. */ + ptr = mmap(&ptr_region[10 * page_size], 40 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + ptr = mmap(&ptr_region[55 * page_size], 25 * page_size, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * We end up with VMAs like this: + * + * 0 10 .. 50 55 .. 80 100 + * [---][xxxx][---][xxxx][---] + * + * Where 'x' signifies VMAs that cannot be merged with those adjacent to + * them. + */ + + /* Multiple VMAs adjacent to one another should result in no error. */ + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_INSTALL), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + ASSERT_EQ(madvise(ptr_region, 100 * page_size, MADV_GUARD_REMOVE), 0); + for (i = 0; i < 100; i++) { + char *curr = &ptr_region[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr_region, 100 * page_size), 0); +} + +/* + * Assert that batched operations performed using process_madvise() work as + * expected. + */ +TEST_F(guard_pages, process_madvise) +{ + const unsigned long page_size = self->page_size; + pid_t pid = getpid(); + int pidfd = pidfd_open(pid, 0); + char *ptr_region, *ptr1, *ptr2, *ptr3; + ssize_t count; + struct iovec vec[6]; + + ASSERT_NE(pidfd, -1); + + /* Reserve region to map over. */ + ptr_region = mmap(NULL, 100 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_region, MAP_FAILED); + + /* + * 10 pages offset 1 page into reserve region. We MAP_POPULATE so we + * overwrite existing entries and test this code path against + * overwriting existing entries. + */ + ptr1 = mmap(&ptr_region[page_size], 10 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE | MAP_POPULATE, -1, 0); + ASSERT_NE(ptr1, MAP_FAILED); + /* We want guard markers at start/end of each VMA. */ + vec[0].iov_base = ptr1; + vec[0].iov_len = page_size; + vec[1].iov_base = &ptr1[9 * page_size]; + vec[1].iov_len = page_size; + + /* 5 pages offset 50 pages into reserve region. */ + ptr2 = mmap(&ptr_region[50 * page_size], 5 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr2, MAP_FAILED); + vec[2].iov_base = ptr2; + vec[2].iov_len = page_size; + vec[3].iov_base = &ptr2[4 * page_size]; + vec[3].iov_len = page_size; + + /* 20 pages offset 79 pages into reserve region. */ + ptr3 = mmap(&ptr_region[79 * page_size], 20 * page_size, + PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr3, MAP_FAILED); + vec[4].iov_base = ptr3; + vec[4].iov_len = page_size; + vec[5].iov_base = &ptr3[19 * page_size]; + vec[5].iov_len = page_size; + + /* Free surrounding VMAs. */ + ASSERT_EQ(munmap(ptr_region, page_size), 0); + ASSERT_EQ(munmap(&ptr_region[11 * page_size], 39 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[55 * page_size], 24 * page_size), 0); + ASSERT_EQ(munmap(&ptr_region[99 * page_size], page_size), 0); + + /* Now guard in one step. */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_INSTALL, 0); + + /* OK we don't have permission to do this, skip. */ + if (count == -1 && errno == EPERM) + ksft_exit_skip("No process_madvise() permissions, try running as root.\n"); + + /* Returns the number of bytes advised. */ + ASSERT_EQ(count, 6 * page_size); + + /* Now make sure the guarding was applied. */ + + ASSERT_FALSE(try_read_write_buf(ptr1)); + ASSERT_FALSE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr2)); + ASSERT_FALSE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_FALSE(try_read_write_buf(ptr3)); + ASSERT_FALSE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Now do the same with unguard... */ + count = process_madvise(pidfd, vec, 6, MADV_GUARD_REMOVE, 0); + + /* ...and everything should now succeed. */ + + ASSERT_TRUE(try_read_write_buf(ptr1)); + ASSERT_TRUE(try_read_write_buf(&ptr1[9 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr2)); + ASSERT_TRUE(try_read_write_buf(&ptr2[4 * page_size])); + + ASSERT_TRUE(try_read_write_buf(ptr3)); + ASSERT_TRUE(try_read_write_buf(&ptr3[19 * page_size])); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr1, 10 * page_size), 0); + ASSERT_EQ(munmap(ptr2, 5 * page_size), 0); + ASSERT_EQ(munmap(ptr3, 20 * page_size), 0); + close(pidfd); +} + +/* Assert that unmapping ranges does not leave guard markers behind. */ +TEST_F(guard_pages, munmap) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new1, *ptr_new2; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard first and last pages. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[9 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Assert that they are guarded. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[9 * page_size])); + + /* Unmap them. */ + ASSERT_EQ(munmap(ptr, page_size), 0); + ASSERT_EQ(munmap(&ptr[9 * page_size], page_size), 0); + + /* Map over them.*/ + ptr_new1 = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new1, MAP_FAILED); + ptr_new2 = mmap(&ptr[9 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new2, MAP_FAILED); + + /* Assert that they are now not guarded. */ + ASSERT_TRUE(try_read_write_buf(ptr_new1)); + ASSERT_TRUE(try_read_write_buf(ptr_new2)); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mprotect() operations have no bearing on guard markers. */ +TEST_F(guard_pages, mprotect) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the middle of the range. */ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Assert that it is indeed guarded. */ + ASSERT_FALSE(try_read_write_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_write_buf(&ptr[6 * page_size])); + + /* Now make these pages read-only. */ + ASSERT_EQ(mprotect(&ptr[5 * page_size], 2 * page_size, PROT_READ), 0); + + /* Make sure the range is still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Make sure we can guard again without issue.*/ + ASSERT_EQ(madvise(&ptr[5 * page_size], 2 * page_size, + MADV_GUARD_INSTALL), 0); + + /* Make sure the range is, yet again, still guarded. */ + ASSERT_FALSE(try_read_buf(&ptr[5 * page_size])); + ASSERT_FALSE(try_read_buf(&ptr[6 * page_size])); + + /* Now unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is readable. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Split and merge VMAs and make sure guard pages still behave. */ +TEST_F(guard_pages, split_merge) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now unmap some pages in the range so we split. */ + ASSERT_EQ(munmap(&ptr[2 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[5 * page_size], page_size), 0); + ASSERT_EQ(munmap(&ptr[8 * page_size], page_size), 0); + + /* Make sure the remaining ranges are guarded post-split. */ + for (i = 0; i < 2; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 2; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 6; i < 8; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + for (i = 9; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now map them again - the unmap will have cleared the guards. */ + ptr_new = mmap(&ptr[2 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[5 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + ptr_new = mmap(&ptr[8 * page_size], page_size, PROT_READ | PROT_WRITE, + MAP_FIXED | MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now make sure guard pages are established. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + bool expect_true = i == 2 || i == 5 || i == 8; + + ASSERT_TRUE(expect_true ? result : !result); + } + + /* Now guard everything again. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the whole range is guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now split the range into three. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* Make sure the whole range is guarded for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_buf(curr)); + } + + /* Now reset protection bits so we merge the whole thing. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Make sure the whole range is still guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Split range into 3 again... */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, PROT_READ), 0); + + /* ...and unguard the whole range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Make sure the whole range is remedied for read. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_buf(curr)); + } + + /* Merge them again. */ + ASSERT_EQ(mprotect(ptr, 3 * page_size, PROT_READ | PROT_WRITE), 0); + ASSERT_EQ(mprotect(&ptr[7 * page_size], 3 * page_size, + PROT_READ | PROT_WRITE), 0); + + /* Now ensure the merged range is remedied for read/write. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that MADV_DONTNEED does not remove guard markers. */ +TEST_F(guard_pages, dontneed) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Back the whole range. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Guard every other page. */ + for (i = 0; i < 10; i += 2) { + char *curr = &ptr[i * page_size]; + int res = madvise(curr, page_size, MADV_GUARD_INSTALL); + + ASSERT_EQ(res, 0); + } + + /* Indicate that we don't need any of the range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_DONTNEED), 0); + + /* Check to ensure guard markers are still in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_buf(curr); + + if (i % 2 == 0) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + /* Make sure we really did get reset to zero page. */ + ASSERT_EQ(*curr, '\0'); + } + + /* Now write... */ + result = try_write_buf(&ptr[i * page_size]); + + /* ...and make sure same result. */ + ASSERT_TRUE(i % 2 != 0 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Assert that mlock()'ed pages work correctly with guard markers. */ +TEST_F(guard_pages, mlock) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Populate. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + *curr = 'y'; + } + + /* Lock. */ + ASSERT_EQ(mlock(ptr, 10 * page_size), 0); + + /* Now try to guard, should fail with EINVAL. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), -1); + ASSERT_EQ(errno, EINVAL); + + /* OK unlock. */ + ASSERT_EQ(munlock(ptr, 10 * page_size), 0); + + /* Guard first half of range, should now succeed. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure guard works. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + if (i < 5) { + ASSERT_FALSE(result); + } else { + ASSERT_TRUE(result); + ASSERT_EQ(*curr, 'x'); + } + } + + /* + * Now lock the latter part of the range. We can't lock the guard pages, + * as this would result in the pages being populated and the guarding + * would cause this to error out. + */ + ASSERT_EQ(mlock(&ptr[5 * page_size], 5 * page_size), 0); + + /* + * Now remove guard pages, we permit mlock()'d ranges to have guard + * pages removed as it is a non-destructive operation. + */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + /* Now check that no guard pages remain. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * - Moving a mapping alone should retain markers as they are. + */ +TEST_F(guard_pages, mremap_move) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guard pages are in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Map a new region we will move this range into. Doing this ensures + * that we have reserved a range to map into. + */ + ptr_new = mmap(NULL, 5 * page_size, PROT_NONE, MAP_ANON | MAP_PRIVATE, + -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + ASSERT_EQ(mremap(ptr, 5 * page_size, 5 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new), ptr_new); + + /* Make sure the guard markers are retained. */ + ASSERT_FALSE(try_read_write_buf(ptr_new)); + ASSERT_FALSE(try_read_write_buf(&ptr_new[4 * page_size])); + + /* + * Clean up - we only need reference the new pointer as we overwrote the + * PROT_NONE range and moved the existing one. + */ + munmap(ptr_new, 5 * page_size); +} + +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Expanding should retain guard pages, only now in different position. The user + * will have to remove guard pages manually to fix up (they'd have to do the + * same if it were a PROT_NONE mapping). + */ +TEST_F(guard_pages, mremap_expand) +{ + const unsigned long page_size = self->page_size; + char *ptr, *ptr_new; + + /* Map 10 pages... */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + /* ...But unmap the last 5 so we can ensure we can expand into them. */ + ASSERT_EQ(munmap(&ptr[5 * page_size], 5 * page_size), 0); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now expand to 10 pages. */ + ptr = mremap(ptr, 5 * page_size, 10 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* + * Make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Reserve a region which we can move to and expand into. */ + ptr_new = mmap(NULL, 20 * page_size, PROT_NONE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr_new, MAP_FAILED); + + /* Now move and expand into it. */ + ptr = mremap(ptr, 10 * page_size, 20 * page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, ptr_new); + ASSERT_EQ(ptr, ptr_new); + + /* + * Again, make sure the guard markers are retained in their original positions. + */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* + * A real user would have to remove guard markers, but would reasonably + * expect all characteristics of the mapping to be retained, including + * guard markers. + */ + + /* Cleanup. */ + munmap(ptr, 20 * page_size); +} +/* + * Assert that moving, extending and shrinking memory via mremap() retains + * guard markers where possible. + * + * Shrinking will result in markers that are shrunk over being removed. Again, + * if the user were using a PROT_NONE mapping they'd have to manually fix this + * up also so this is OK. + */ +TEST_F(guard_pages, mremap_shrink) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 5 pages. */ + ptr = mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Place guard markers at both ends of the 5 page span. */ + ASSERT_EQ(madvise(ptr, page_size, MADV_GUARD_INSTALL), 0); + ASSERT_EQ(madvise(&ptr[4 * page_size], page_size, MADV_GUARD_INSTALL), 0); + + /* Make sure the guarding is in effect. */ + ASSERT_FALSE(try_read_write_buf(ptr)); + ASSERT_FALSE(try_read_write_buf(&ptr[4 * page_size])); + + /* Now shrink to 3 pages. */ + ptr = mremap(ptr, 5 * page_size, 3 * page_size, MREMAP_MAYMOVE); + ASSERT_NE(ptr, MAP_FAILED); + + /* We expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 3; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* + * As with expansion, a real user would have to remove guard pages and + * fixup. But you'd have to do similar manual things with PROT_NONE + * mappings too. + */ + + /* + * If we expand back to the original size, the end marker will, of + * course, no longer be present. + */ + ptr = mremap(ptr, 3 * page_size, 5 * page_size, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Again, we expect the guard marker at the start to be retained... */ + ASSERT_FALSE(try_read_write_buf(ptr)); + + /* ...But remaining pages will not have guard markers. */ + for (i = 1; i < 5; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + munmap(ptr, 5 * page_size); +} + +/* + * Assert that forking a process with VMAs that do not have VM_WIPEONFORK set + * retain guard pages. + */ +TEST_F(guard_pages, fork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Establish guard apges in the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Assert that the guarding is in effect. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Now unguard the range.*/ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_REMOVE), 0); + + exit(0); + } + + /* Parent process. */ + + /* Parent simply waits on child. */ + waitpid(pid, NULL, 0); + + /* Child unguard does not impact parent page table state. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* + * Assert that forking a process with VMAs that do have VM_WIPEONFORK set + * behave as expected. + */ +TEST_F(guard_pages, fork_wipeonfork) +{ + const unsigned long page_size = self->page_size; + char *ptr; + pid_t pid; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Mark wipe on fork. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_WIPEONFORK), 0); + + /* Guard the first 5 pages. */ + ASSERT_EQ(madvise(ptr, 5 * page_size, MADV_GUARD_INSTALL), 0); + + pid = fork(); + ASSERT_NE(pid, -1); + if (!pid) { + /* This is the child process now. */ + + /* Guard will have been wiped. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_TRUE(try_read_write_buf(curr)); + } + + exit(0); + } + + /* Parent process. */ + + waitpid(pid, NULL, 0); + + /* Guard markers should be in effect.*/ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + bool result = try_read_write_buf(curr); + + ASSERT_TRUE(i >= 5 ? result : !result); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_FREE retains guard entries as expected. */ +TEST_F(guard_pages, lazyfree) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensure guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Lazyfree range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_FREE), 0); + + /* This should leave the guard markers in place. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_POPULATE_READ, MADV_POPULATE_WRITE behave as expected. */ +TEST_F(guard_pages, populate) +{ + const unsigned long page_size = self->page_size; + char *ptr; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Populate read should error out... */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_READ), -1); + ASSERT_EQ(errno, EFAULT); + + /* ...as should populate write. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_POPULATE_WRITE), -1); + ASSERT_EQ(errno, EFAULT); + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that MADV_COLD, MADV_PAGEOUT do not remove guard markers. */ +TEST_F(guard_pages, cold_pageout) +{ + const unsigned long page_size = self->page_size; + char *ptr; + int i; + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Guard range. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* Ensured guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Now mark cold. This should have no impact on guard markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_COLD), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* OK, now page out. This should equally, have no effect on markers. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_PAGEOUT), 0); + + /* Should remain guarded. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +/* Ensure that guard pages do not break userfaultd. */ +TEST_F(guard_pages, uffd) +{ + const unsigned long page_size = self->page_size; + int uffd; + char *ptr; + int i; + struct uffdio_api api = { + .api = UFFD_API, + .features = 0, + }; + struct uffdio_register reg; + struct uffdio_range range; + + /* Set up uffd. */ + uffd = userfaultfd(0); + if (uffd == -1 && errno == EPERM) + ksft_exit_skip("No userfaultfd permissions, try running as root.\n"); + ASSERT_NE(uffd, -1); + + ASSERT_EQ(ioctl(uffd, UFFDIO_API, &api), 0); + + /* Map 10 pages. */ + ptr = mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE, + MAP_ANON | MAP_PRIVATE, -1, 0); + ASSERT_NE(ptr, MAP_FAILED); + + /* Register the range with uffd. */ + range.start = (unsigned long)ptr; + range.len = 10 * page_size; + reg.range = range; + reg.mode = UFFDIO_REGISTER_MODE_MISSING; + ASSERT_EQ(ioctl(uffd, UFFDIO_REGISTER, ®), 0); + + /* Guard the range. This should not trigger the uffd. */ + ASSERT_EQ(madvise(ptr, 10 * page_size, MADV_GUARD_INSTALL), 0); + + /* The guarding should behave as usual with no uffd intervention. */ + for (i = 0; i < 10; i++) { + char *curr = &ptr[i * page_size]; + + ASSERT_FALSE(try_read_write_buf(curr)); + } + + /* Cleanup. */ + ASSERT_EQ(ioctl(uffd, UFFDIO_UNREGISTER, &range), 0); + close(uffd); + ASSERT_EQ(munmap(ptr, 10 * page_size), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0..141bf63cbe05 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map) buffer->fd = -1; buffer->size = size; - buffer->mirror = malloc(npages); + buffer->mirror = malloc(size); ASSERT_NE(buffer->mirror, NULL); /* Reserve a range of addresses. */ diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c index f9ac20c657ec..432d5af15e66 100644 --- a/tools/testing/selftests/mm/hugetlb_dio.c +++ b/tools/testing/selftests/mm/hugetlb_dio.c @@ -94,8 +94,20 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off) int main(void) { size_t pagesize = 0; + int fd; ksft_print_header(); + + /* Open the file to DIO */ + fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664); + if (fd < 0) + ksft_exit_skip("Unable to allocate file: %s\n", strerror(errno)); + close(fd); + + /* Check if huge pages are free */ + if (!get_free_hugepages()) + ksft_exit_skip("No free hugepage, exiting\n"); + ksft_set_plan(4); /* Get base page size */ diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c index 73b81c632366..e2640529dbb2 100644 --- a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c +++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c @@ -5,20 +5,36 @@ #include <sys/mman.h> #include <sys/types.h> #include <unistd.h> +#include <setjmp.h> +#include <signal.h> #include "vm_util.h" #include "../kselftest.h" -#define MMAP_SIZE (1 << 21) #define INLOOP_ITER 100 -char *huge_ptr; +static char *huge_ptr; +static size_t huge_page_size; + +static sigjmp_buf sigbuf; +static bool sigbus_triggered; + +static void signal_handler(int signal) +{ + if (signal == SIGBUS) { + sigbus_triggered = true; + siglongjmp(sigbuf, 1); + } +} /* Touch the memory while it is being madvised() */ void *touch(void *unused) { char *ptr = (char *)huge_ptr; + if (sigsetjmp(sigbuf, 1)) + return NULL; + for (int i = 0; i < INLOOP_ITER; i++) ptr[0] = '.'; @@ -30,7 +46,7 @@ void *madv(void *unused) usleep(rand() % 10); for (int i = 0; i < INLOOP_ITER; i++) - madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED); + madvise(huge_ptr, huge_page_size, MADV_DONTNEED); return NULL; } @@ -44,9 +60,23 @@ int main(void) * interactions */ int max = 10000; + int err; + + ksft_print_header(); + ksft_set_plan(1); srand(getpid()); + if (signal(SIGBUS, signal_handler) == SIG_ERR) + ksft_exit_skip("Could not register signal handler."); + + huge_page_size = default_huge_page_size(); + if (!huge_page_size) + ksft_exit_skip("Could not detect default hugetlb page size."); + + ksft_print_msg("[INFO] detected default hugetlb page size: %zu KiB\n", + huge_page_size / 1024); + free_hugepages = get_free_hugepages(); if (free_hugepages != 1) { ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n", @@ -54,7 +84,7 @@ int main(void) } while (max--) { - huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE, + huge_ptr = mmap(NULL, huge_page_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); @@ -66,8 +96,14 @@ int main(void) pthread_join(thread1, NULL); pthread_join(thread2, NULL); - munmap(huge_ptr, MMAP_SIZE); + munmap(huge_ptr, huge_page_size); } - return KSFT_PASS; + ksft_test_result(!sigbus_triggered, "SIGBUS behavior\n"); + + err = ksft_get_fail_cnt(); + if (err) + ksft_exit_fail_msg("%d out of %d tests failed\n", + err, ksft_test_num()); + ksft_exit_pass(); } diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c index 56d4480e8d3c..8a4d34cce36b 100644 --- a/tools/testing/selftests/mm/khugepaged.c +++ b/tools/testing/selftests/mm/khugepaged.c @@ -1091,7 +1091,7 @@ static void usage(void) fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n"); fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n"); fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n"); - fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n"); + fprintf(stderr, "\tmounted with huge=advise option for khugepaged tests to work\n"); fprintf(stderr, "\n\tSupported Options:\n"); fprintf(stderr, "\t\t-h: This help message.\n"); fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n"); diff --git a/tools/testing/selftests/mm/page_frag/Makefile b/tools/testing/selftests/mm/page_frag/Makefile new file mode 100644 index 000000000000..8c8bb39ffa28 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/Makefile @@ -0,0 +1,18 @@ +PAGE_FRAG_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +MODULES = page_frag_test.ko + +obj-m += page_frag_test.o + +all: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(PAGE_FRAG_TEST_DIR) clean diff --git a/tools/testing/selftests/mm/page_frag/page_frag_test.c b/tools/testing/selftests/mm/page_frag/page_frag_test.c new file mode 100644 index 000000000000..e806c1866e36 --- /dev/null +++ b/tools/testing/selftests/mm/page_frag/page_frag_test.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test module for page_frag cache + * + * Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> + */ + +#include <linux/module.h> +#include <linux/cpumask.h> +#include <linux/completion.h> +#include <linux/ptr_ring.h> +#include <linux/kthread.h> +#include <linux/page_frag_cache.h> + +#define TEST_FAILED_PREFIX "page_frag_test failed: " + +static struct ptr_ring ptr_ring; +static int nr_objs = 512; +static atomic_t nthreads; +static struct completion wait; +static struct page_frag_cache test_nc; +static int test_popped; +static int test_pushed; +static bool force_exit; + +static int nr_test = 2000000; +module_param(nr_test, int, 0); +MODULE_PARM_DESC(nr_test, "number of iterations to test"); + +static bool test_align; +module_param(test_align, bool, 0); +MODULE_PARM_DESC(test_align, "use align API for testing"); + +static int test_alloc_len = 2048; +module_param(test_alloc_len, int, 0); +MODULE_PARM_DESC(test_alloc_len, "alloc len for testing"); + +static int test_push_cpu; +module_param(test_push_cpu, int, 0); +MODULE_PARM_DESC(test_push_cpu, "test cpu for pushing fragment"); + +static int test_pop_cpu; +module_param(test_pop_cpu, int, 0); +MODULE_PARM_DESC(test_pop_cpu, "test cpu for popping fragment"); + +static int page_frag_pop_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag pop test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_popped < nr_test) { + void *obj = __ptr_ring_consume(ring); + + if (obj) { + test_popped++; + page_frag_free(obj); + } else { + if (force_exit) + break; + + cond_resched(); + } + } + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + pr_info("page_frag pop test thread exits on cpu %d\n", + smp_processor_id()); + + return 0; +} + +static int page_frag_push_thread(void *arg) +{ + struct ptr_ring *ring = arg; + + pr_info("page_frag push test thread begins on cpu %d\n", + smp_processor_id()); + + while (test_pushed < nr_test && !force_exit) { + void *va; + int ret; + + if (test_align) { + va = page_frag_alloc_align(&test_nc, test_alloc_len, + GFP_KERNEL, SMP_CACHE_BYTES); + + if ((unsigned long)va & (SMP_CACHE_BYTES - 1)) { + force_exit = true; + WARN_ONCE(true, TEST_FAILED_PREFIX "unaligned va returned\n"); + } + } else { + va = page_frag_alloc(&test_nc, test_alloc_len, GFP_KERNEL); + } + + if (!va) + continue; + + ret = __ptr_ring_produce(ring, va); + if (ret) { + page_frag_free(va); + cond_resched(); + } else { + test_pushed++; + } + } + + pr_info("page_frag push test thread exits on cpu %d\n", + smp_processor_id()); + + if (atomic_dec_and_test(&nthreads)) + complete(&wait); + + return 0; +} + +static int __init page_frag_test_init(void) +{ + struct task_struct *tsk_push, *tsk_pop; + int last_pushed = 0, last_popped = 0; + ktime_t start; + u64 duration; + int ret; + + page_frag_cache_init(&test_nc); + atomic_set(&nthreads, 2); + init_completion(&wait); + + if (test_alloc_len > PAGE_SIZE || test_alloc_len <= 0 || + !cpu_active(test_push_cpu) || !cpu_active(test_pop_cpu)) + return -EINVAL; + + ret = ptr_ring_init(&ptr_ring, nr_objs, GFP_KERNEL); + if (ret) + return ret; + + tsk_push = kthread_create_on_cpu(page_frag_push_thread, &ptr_ring, + test_push_cpu, "page_frag_push"); + if (IS_ERR(tsk_push)) + return PTR_ERR(tsk_push); + + tsk_pop = kthread_create_on_cpu(page_frag_pop_thread, &ptr_ring, + test_pop_cpu, "page_frag_pop"); + if (IS_ERR(tsk_pop)) { + kthread_stop(tsk_push); + return PTR_ERR(tsk_pop); + } + + start = ktime_get(); + wake_up_process(tsk_push); + wake_up_process(tsk_pop); + + pr_info("waiting for test to complete\n"); + + while (!wait_for_completion_timeout(&wait, msecs_to_jiffies(10000))) { + /* exit if there is no progress for push or pop size */ + if (last_pushed == test_pushed || last_popped == test_popped) { + WARN_ONCE(true, TEST_FAILED_PREFIX "no progress\n"); + force_exit = true; + continue; + } + + last_pushed = test_pushed; + last_popped = test_popped; + pr_info("page_frag_test progress: pushed = %d, popped = %d\n", + test_pushed, test_popped); + } + + if (force_exit) { + pr_err(TEST_FAILED_PREFIX "exit with error\n"); + goto out; + } + + duration = (u64)ktime_us_delta(ktime_get(), start); + pr_info("%d of iterations for %s testing took: %lluus\n", nr_test, + test_align ? "aligned" : "non-aligned", duration); + +out: + ptr_ring_cleanup(&ptr_ring, NULL); + page_frag_cache_drain(&test_nc); + + return -EAGAIN; +} + +static void __exit page_frag_test_exit(void) +{ +} + +module_init(page_frag_test_init); +module_exit(page_frag_test_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yunsheng Lin <linyunsheng@huawei.com>"); +MODULE_DESCRIPTION("Test module for page_frag"); diff --git a/tools/testing/selftests/mm/pkey-arm64.h b/tools/testing/selftests/mm/pkey-arm64.h index 580e1b0bb38e..d9d2100eafc0 100644 --- a/tools/testing/selftests/mm/pkey-arm64.h +++ b/tools/testing/selftests/mm/pkey-arm64.h @@ -31,6 +31,7 @@ #define NR_RESERVED_PKEYS 1 /* pkey-0 */ #define PKEY_ALLOW_ALL 0x77777777 +#define PKEY_REG_ALLOW_NONE 0x0 #define PKEY_BITS_PER_PKEY 4 #define PAGE_SIZE sysconf(_SC_PAGESIZE) @@ -126,7 +127,7 @@ static inline u64 get_pkey_bits(u64 reg, int pkey) return 0; } -static void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) +static inline void aarch64_write_signal_pkey(ucontext_t *uctxt, u64 pkey) { struct _aarch64_ctx *ctx = GET_UC_RESV_HEAD(uctxt); struct poe_context *poe_ctx = diff --git a/tools/testing/selftests/mm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h index 9ab6a3ee153b..f7cfe163b0ff 100644 --- a/tools/testing/selftests/mm/pkey-helpers.h +++ b/tools/testing/selftests/mm/pkey-helpers.h @@ -112,6 +112,13 @@ void record_pkey_malloc(void *ptr, long size, int prot); #define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE) #endif +/* + * FIXME: Remove once the generic PKEY_UNRESTRICTED definition is merged. + */ +#ifndef PKEY_UNRESTRICTED +#define PKEY_UNRESTRICTED 0x0 +#endif + #ifndef set_pkey_bits static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags) { diff --git a/tools/testing/selftests/mm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h index 5f28e26a2511..ac91777c8917 100644 --- a/tools/testing/selftests/mm/pkey-x86.h +++ b/tools/testing/selftests/mm/pkey-x86.h @@ -34,6 +34,8 @@ #define PAGE_SIZE 4096 #define MB (1<<20) +#define PKEY_REG_ALLOW_NONE 0x55555555 + static inline void __page_o_noops(void) { /* 8-bytes of instruction * 512 bytes = 1 page */ diff --git a/tools/testing/selftests/mm/pkey_sighandler_tests.c b/tools/testing/selftests/mm/pkey_sighandler_tests.c index a8088b645ad6..c593a426341c 100644 --- a/tools/testing/selftests/mm/pkey_sighandler_tests.c +++ b/tools/testing/selftests/mm/pkey_sighandler_tests.c @@ -11,6 +11,7 @@ */ #define _GNU_SOURCE #define __SANE_USERSPACE_TYPES__ +#include <linux/mman.h> #include <errno.h> #include <sys/syscall.h> #include <string.h> @@ -59,12 +60,58 @@ long syscall_raw(long n, long a1, long a2, long a3, long a4, long a5, long a6) : "=a"(ret) : "a"(n), "b"(a1), "c"(a2), "d"(a3), "S"(a4), "D"(a5) : "memory"); +#elif defined __aarch64__ + register long x0 asm("x0") = a1; + register long x1 asm("x1") = a2; + register long x2 asm("x2") = a3; + register long x3 asm("x3") = a4; + register long x4 asm("x4") = a5; + register long x5 asm("x5") = a6; + register long x8 asm("x8") = n; + asm volatile ("svc #0" + : "=r"(x0) + : "r"(x0), "r"(x1), "r"(x2), "r"(x3), "r"(x4), "r"(x5), "r"(x8) + : "memory"); + ret = x0; #else # error syscall_raw() not implemented #endif return ret; } +static inline long clone_raw(unsigned long flags, void *stack, + int *parent_tid, int *child_tid) +{ + long a1 = flags; + long a2 = (long)stack; + long a3 = (long)parent_tid; +#if defined(__x86_64__) || defined(__i386) + long a4 = (long)child_tid; + long a5 = 0; +#elif defined(__aarch64__) + long a4 = 0; + long a5 = (long)child_tid; +#else +# error clone_raw() not implemented +#endif + + return syscall_raw(SYS_clone, a1, a2, a3, a4, a5, 0); +} + +/* + * Returns the most restrictive pkey register value that can be used by the + * tests. + */ +static inline u64 pkey_reg_restrictive_default(void) +{ + /* + * Disallow everything except execution on pkey 0, so that each caller + * doesn't need to enable it explicitly (the selftest code runs with + * its code mapped with pkey 0). + */ + return set_pkey_bits(PKEY_REG_ALLOW_NONE, 0, PKEY_DISABLE_ACCESS); +} + static void sigsegv_handler(int signo, siginfo_t *info, void *ucontext) { pthread_mutex_lock(&mutex); @@ -113,7 +160,7 @@ static void raise_sigusr2(void) static void *thread_segv_with_pkey0_disabled(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* Segfault (with SEGV_MAPERR) */ *(int *) (0x1) = 1; @@ -123,7 +170,7 @@ static void *thread_segv_with_pkey0_disabled(void *ptr) static void *thread_segv_pkuerr_stack(void *ptr) { /* Disable MPK 0 (and all others too) */ - __write_pkey_reg(0x55555555); + __write_pkey_reg(pkey_reg_restrictive_default()); /* After we disable MPK 0, we can't access the stack to return */ return NULL; @@ -133,6 +180,7 @@ static void *thread_segv_maperr_ptr(void *ptr) { stack_t *stack = ptr; int *bad = (int *)1; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -142,7 +190,9 @@ static void *thread_segv_maperr_ptr(void *ptr) syscall_raw(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 1 is enabled. */ - __write_pkey_reg(0x55555551); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Segfault */ *bad = 1; @@ -240,6 +290,7 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -257,7 +308,10 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) assert(stack != MAP_FAILED); /* Allow access to MPK 0 and MPK 1 */ - __write_pkey_reg(0x55555550); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 1, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the new stack with MPK 1 */ pkey = pkey_alloc(0, 0); @@ -272,14 +326,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) memset(&siginfo, 0, sizeof(siginfo)); /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; @@ -307,7 +360,13 @@ static void test_sigsegv_handler_with_different_pkey_for_stack(void) static void test_pkru_preserved_after_sigusr1(void) { struct sigaction sa; - unsigned long pkru = 0x45454544; + u64 pkey_reg; + + /* Allow access to MPK 0 and an arbitrary set of keys */ + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 3, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 7, PKEY_UNRESTRICTED); sa.sa_flags = SA_SIGINFO; @@ -320,7 +379,7 @@ static void test_pkru_preserved_after_sigusr1(void) memset(&siginfo, 0, sizeof(siginfo)); - __write_pkey_reg(pkru); + __write_pkey_reg(pkey_reg); raise(SIGUSR1); @@ -330,7 +389,7 @@ static void test_pkru_preserved_after_sigusr1(void) pthread_mutex_unlock(&mutex); /* Ensure the pkru value is the same after returning from signal. */ - ksft_test_result(pkru == __read_pkey_reg() && + ksft_test_result(pkey_reg == __read_pkey_reg() && siginfo.si_signo == SIGUSR1, "%s\n", __func__); } @@ -347,6 +406,7 @@ static noinline void *thread_sigusr2_self(void *ptr) 'S', 'I', 'G', 'U', 'S', 'R', '2', '.', '.', '.', '\n', '\0'}; stack_t *stack = ptr; + u64 pkey_reg; /* * Setup alternate signal stack, which should be pkey_mprotect()ed by @@ -356,7 +416,9 @@ static noinline void *thread_sigusr2_self(void *ptr) syscall(SYS_sigaltstack, (long)stack, 0, 0, 0, 0, 0); /* Disable MPK 0. Only MPK 2 is enabled. */ - __write_pkey_reg(0x55555545); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); raise_sigusr2(); @@ -384,6 +446,7 @@ static void test_pkru_sigreturn(void) int pkey; int parent_pid = 0; int child_pid = 0; + u64 pkey_reg; sa.sa_handler = SIG_DFL; sa.sa_flags = 0; @@ -418,7 +481,10 @@ static void test_pkru_sigreturn(void) * the current thread's stack is protected by the default MPK 0. Hence * both need to be enabled. */ - __write_pkey_reg(0x55555544); + pkey_reg = pkey_reg_restrictive_default(); + pkey_reg = set_pkey_bits(pkey_reg, 0, PKEY_UNRESTRICTED); + pkey_reg = set_pkey_bits(pkey_reg, 2, PKEY_UNRESTRICTED); + __write_pkey_reg(pkey_reg); /* Protect the stack with MPK 2 */ pkey = pkey_alloc(0, 0); @@ -431,14 +497,13 @@ static void test_pkru_sigreturn(void) sigstack.ss_size = STACK_SIZE; /* Use clone to avoid newer glibcs using rseq on new threads */ - long ret = syscall_raw(SYS_clone, - CLONE_VM | CLONE_FS | CLONE_FILES | - CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | - CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | - CLONE_DETACHED, - (long) ((char *)(stack) + STACK_SIZE), - (long) &parent_pid, - (long) &child_pid, 0, 0); + long ret = clone_raw(CLONE_VM | CLONE_FS | CLONE_FILES | + CLONE_SIGHAND | CLONE_THREAD | CLONE_SYSVSEM | + CLONE_PARENT_SETTID | CLONE_CHILD_CLEARTID | + CLONE_DETACHED, + stack + STACK_SIZE, + &parent_pid, + &child_pid); if (ret < 0) { errno = -ret; diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index c5797ad1d37b..2fc290d9430c 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -75,6 +75,8 @@ separated by spaces: read-only VMAs - mdwe test prctl(PR_SET_MDWE, ...) +- page_frag + test handling of page fragment allocation and freeing example: ./run_vmtests.sh -t "hmm mmap ksm" EOF @@ -347,10 +349,12 @@ if [ $VADDR64 -ne 0 ]; then # allows high virtual address allocation requests independent # of platform's physical memory. - prev_policy=$(cat /proc/sys/vm/overcommit_memory) - echo 1 > /proc/sys/vm/overcommit_memory - CATEGORY="hugevm" run_test ./virtual_address_range - echo $prev_policy > /proc/sys/vm/overcommit_memory + if [ -x ./virtual_address_range ]; then + prev_policy=$(cat /proc/sys/vm/overcommit_memory) + echo 1 > /proc/sys/vm/overcommit_memory + CATEGORY="hugevm" run_test ./virtual_address_range + echo $prev_policy > /proc/sys/vm/overcommit_memory + fi # va high address boundary switch test ARCH_ARM64="arm64" @@ -456,6 +460,12 @@ CATEGORY="mkdirty" run_test ./mkdirty CATEGORY="mdwe" run_test ./mdwe_test +CATEGORY="page_frag" run_test ./test_page_frag.sh smoke + +CATEGORY="page_frag" run_test ./test_page_frag.sh aligned + +CATEGORY="page_frag" run_test ./test_page_frag.sh nonaligned + echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix echo "1..${count_total}" | tap_output diff --git a/tools/testing/selftests/mm/test_page_frag.sh b/tools/testing/selftests/mm/test_page_frag.sh new file mode 100755 index 000000000000..f55b105084cf --- /dev/null +++ b/tools/testing/selftests/mm/test_page_frag.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (C) 2024 Yunsheng Lin <linyunsheng@huawei.com> +# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com> +# +# This is a test script for the kernel test driver to test the +# correctness and performance of page_frag's implementation. +# Therefore it is just a kernel module loader. You can specify +# and pass different parameters in order to: +# a) analyse performance of page fragment allocations; +# b) stressing and stability check of page_frag subsystem. + +DRIVER="./page_frag/page_frag_test.ko" +CPU_LIST=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2) +TEST_CPU_0=$(echo $CPU_LIST | awk '{print $1}') + +if [ $(echo $CPU_LIST | wc -w) -gt 1 ]; then + TEST_CPU_1=$(echo $CPU_LIST | awk '{print $2}') + NR_TEST=100000000 +else + TEST_CPU_1=$TEST_CPU_0 + NR_TEST=1000000 +fi + +# 1 if fails +exitcode=1 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +check_test_failed_prefix() { + if dmesg | grep -q 'page_frag_test failed:';then + echo "page_frag_test failed, please check dmesg" + exit $exitcode + fi +} + +# +# Static templates for testing of page_frag APIs. +# Also it is possible to pass any supported parameters manually. +# +SMOKE_PARAM="test_push_cpu=$TEST_CPU_0 test_pop_cpu=$TEST_CPU_1" +NONALIGNED_PARAM="$SMOKE_PARAM test_alloc_len=75 nr_test=$NR_TEST" +ALIGNED_PARAM="$NONALIGNED_PARAM test_align=1" + +check_test_requirements() +{ + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo "$0: Must be run as root" + exit $ksft_skip + fi + + if ! which insmod > /dev/null 2>&1; then + echo "$0: You need insmod installed" + exit $ksft_skip + fi + + if [ ! -f $DRIVER ]; then + echo "$0: You need to compile page_frag_test module" + exit $ksft_skip + fi +} + +run_nonaligned_check() +{ + echo "Run performance tests to evaluate how fast nonaligned alloc API is." + + insmod $DRIVER $NONALIGNED_PARAM > /dev/null 2>&1 +} + +run_aligned_check() +{ + echo "Run performance tests to evaluate how fast aligned alloc API is." + + insmod $DRIVER $ALIGNED_PARAM > /dev/null 2>&1 +} + +run_smoke_check() +{ + echo "Run smoke test." + + insmod $DRIVER $SMOKE_PARAM > /dev/null 2>&1 +} + +usage() +{ + echo -n "Usage: $0 [ aligned ] | [ nonaligned ] | | [ smoke ] | " + echo "manual parameters" + echo + echo "Valid tests and parameters:" + echo + modinfo $DRIVER + echo + echo "Example usage:" + echo + echo "# Shows help message" + echo "$0" + echo + echo "# Smoke testing" + echo "$0 smoke" + echo + echo "# Performance testing for nonaligned alloc API" + echo "$0 nonaligned" + echo + echo "# Performance testing for aligned alloc API" + echo "$0 aligned" + echo + exit 0 +} + +function validate_passed_args() +{ + VALID_ARGS=`modinfo $DRIVER | awk '/parm:/ {print $2}' | sed 's/:.*//'` + + # + # Something has been passed, check it. + # + for passed_arg in $@; do + key=${passed_arg//=*/} + valid=0 + + for valid_arg in $VALID_ARGS; do + if [[ $key = $valid_arg ]]; then + valid=1 + break + fi + done + + if [[ $valid -ne 1 ]]; then + echo "Error: key is not correct: ${key}" + exit $exitcode + fi + done +} + +function run_manual_check() +{ + # + # Validate passed parameters. If there is wrong one, + # the script exists and does not execute further. + # + validate_passed_args $@ + + echo "Run the test with following parameters: $@" + insmod $DRIVER $@ > /dev/null 2>&1 +} + +function run_test() +{ + if [ $# -eq 0 ]; then + usage + else + if [[ "$1" = "smoke" ]]; then + run_smoke_check + elif [[ "$1" = "nonaligned" ]]; then + run_nonaligned_check + elif [[ "$1" = "aligned" ]]; then + run_aligned_check + else + run_manual_check $@ + fi + fi + + check_test_failed_prefix + + echo "Done." + echo "Check the kernel ring buffer to see the summary." +} + +check_test_requirements +run_test $@ + +exit 0 diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c index b3d21eed203d..a2e71b1636e7 100644 --- a/tools/testing/selftests/mm/uffd-unit-tests.c +++ b/tools/testing/selftests/mm/uffd-unit-tests.c @@ -241,6 +241,8 @@ static void *fork_event_consumer(void *data) fork_event_args *args = data; struct uffd_msg msg = { 0 }; + ready_for_fork = true; + /* Read until a full msg received */ while (uffd_read_msg(args->parent_uffd, &msg)); @@ -308,8 +310,11 @@ static int pagemap_test_fork(int uffd, bool with_event, bool test_pin) /* Prepare a thread to resolve EVENT_FORK */ if (with_event) { + ready_for_fork = false; if (pthread_create(&thread, NULL, fork_event_consumer, &args)) err("pthread_create()"); + while (!ready_for_fork) + ; /* Wait for the poll_thread to start executing before forking */ } child = fork(); diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index 4e4c1e311247..2a2b69e91950 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -64,7 +64,7 @@ #define NR_CHUNKS_HIGH NR_CHUNKS_384TB #endif -static char *hind_addr(void) +static char *hint_addr(void) { int bits = HIGH_ADDR_SHIFT + rand() % (63 - HIGH_ADDR_SHIFT); @@ -185,7 +185,7 @@ int main(int argc, char *argv[]) } for (i = 0; i < NR_CHUNKS_HIGH; i++) { - hint = hind_addr(); + hint = hint_addr(); hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index c6a8c732b802..68801e1a9ec2 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -1414,6 +1414,13 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); + ASSERT_EQ(mount("testing", "/mnt/A", "ramfs", MS_NOATIME | MS_NODEV, + "size=100000,mode=700"), 0); + + ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0); + + ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0); + open_tree_fd = sys_open_tree(-EBADF, "/mnt/A", AT_RECURSIVE | AT_EMPTY_PATH | @@ -1433,6 +1440,8 @@ TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0); ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0); + + (void)umount2("/mnt/A", MNT_DETACH); } TEST_F(mount_setattr, mount_attr_nosymfollow) diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 1c04c780db66..28a715a8ef2b 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,6 +2,7 @@ bind_bhash bind_timewait bind_wildcard +busy_poller cmsg_sender diag_uid epoll_busy_poll @@ -16,8 +17,9 @@ ipsec ipv6_flowlabel ipv6_flowlabel_mgr log.txt +msg_oob msg_zerocopy -ncdevmem +netlink-dumps nettest psock_fanout psock_snd diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 649f1fe0dc46..3d487b03c4a0 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -78,6 +78,7 @@ TEST_PROGS += test_vxlan_vnifiltering.sh TEST_GEN_FILES += io_uring_zerocopy_tx TEST_PROGS += io_uring_zerocopy_tx.sh TEST_GEN_FILES += bind_bhash +TEST_GEN_PROGS += netlink-dumps TEST_GEN_PROGS += sk_bind_sendto_listen TEST_GEN_PROGS += sk_connect_zero_addr TEST_GEN_PROGS += sk_so_peek_off @@ -92,14 +93,15 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += fdb_flush.sh +TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh TEST_PROGS += bpf_offload.py +TEST_PROGS += ipv6_route_update_soft_lockup.sh +TEST_PROGS += busy_poll_test.sh # YNL files, must be before "include ..lib.mk" -EXTRA_CLEAN += $(OUTPUT)/libynl.a -YNL_GEN_FILES := ncdevmem +YNL_GEN_FILES := busy_poller TEST_GEN_FILES += $(YNL_GEN_FILES) TEST_FILES := settings diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index 3efe44f6e92a..d10f420e4ef6 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -594,8 +594,9 @@ def check_extack_nsim(output, reference, args): check_extack(output, "netdevsim: " + reference, args) def check_no_extack(res, needle): - fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), - "Found '%s' in command output, leaky extack?" % (needle)) + haystack = (res[1] + res[2]).strip() + fail(haystack.count(needle) or haystack.count("Warning:"), + "Unexpected command output, leaky extack? ('%s', '%s')" % (needle, haystack)) def check_verifier_log(output, reference): lines = output.split("\n") diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh new file mode 100755 index 000000000000..7db292ec4884 --- /dev/null +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -0,0 +1,165 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source net_helper.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +# busy poll config +MAX_EVENTS=8 +BUSY_POLL_USECS=0 +BUSY_POLL_BUDGET=16 +PREFER_BUSY_POLL=1 + +# IRQ deferral config +NAPI_DEFER_HARD_IRQS=100 +GRO_FLUSH_TIMEOUT=50000 +SUSPEND_TIMEOUT=20000000 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + # ensure the server has 1 queue + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +test_busypoll() +{ + suspend_value=${1:-0} + tmp_file=$(mktemp) + out_file=$(mktemp) + + # fill a test file with random data + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null + + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ + -p${SERVER_PORT} \ + -b${SERVER_IP} \ + -m${MAX_EVENTS} \ + -u${BUSY_POLL_USECS} \ + -P${PREFER_BUSY_POLL} \ + -g${BUSY_POLL_BUDGET} \ + -i${NSIM_SV_IFIDX} \ + -s${suspend_value} \ + -o${out_file}& + + wait_local_port_listen nssv ${SERVER_PORT} tcp + + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} + + wait + + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') + + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then + res=0 + else + echo "md5sum mismatch" + echo "input file md5sum: ${tmp_file_md5sum}"; + echo "output file md5sum: ${out_file_md5sum}"; + res=1 + fi + + rm $out_file $tmp_file + + return $res +} + +test_busypoll_with_suspend() +{ + test_busypoll ${SUSPEND_TIMEOUT} + + return $? +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +test_busypoll +if [ $? -ne 0 ]; then + echo "test_busypoll failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_suspend +if [ $? -ne 0 ]; then + echo "test_busypoll_with_suspend failed" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c new file mode 100644 index 000000000000..99b0e8c17fca --- /dev/null +++ b/tools/testing/selftests/net/busy_poller.c @@ -0,0 +1,346 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> + +#include "netdev-user.h" + +/* The below ifdef blob is required because: + * + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, + * systems with older glibcs will not have them available. However, + * sys/epoll.h does include the type definition for epoll_data, which is + * needed by the user program (e.g. epoll_event.data.fd) + * + * - linux/eventpoll.h does not define the epoll_data type, it is simply an + * opaque __u64. It does, however, include the ioctl definition. + * + * Including both headers is impossible (types would be redefined), so I've + * opted instead to take sys/epoll.h, and include the blob below. + * + * Someday, when glibc is globally up to date, the blob below can be removed. + */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +static uint32_t cfg_port = 8000; +static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; +static char *cfg_outfile; +static int cfg_max_events = 8; +static int cfg_ifindex; + +/* busy poll params */ +static uint32_t cfg_busy_poll_usecs; +static uint32_t cfg_busy_poll_budget; +static uint32_t cfg_prefer_busy_poll; + +/* IRQ params */ +static uint32_t cfg_defer_hard_irqs; +static uint64_t cfg_gro_flush_timeout; +static uint64_t cfg_irq_suspend_timeout; + +static void usage(const char *filepath) +{ + error(1, 0, + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) { + switch (c) { + case 'u': + cfg_busy_poll_usecs = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_usecs == ULONG_MAX || + cfg_busy_poll_usecs > UINT32_MAX) + error(1, ERANGE, "busy_poll_usecs too large"); + break; + case 'P': + cfg_prefer_busy_poll = strtoul(optarg, NULL, 0); + if (cfg_prefer_busy_poll == ULONG_MAX || + cfg_prefer_busy_poll > 1) + error(1, ERANGE, + "prefer busy poll should be 0 or 1"); + break; + case 'g': + cfg_busy_poll_budget = strtoul(optarg, NULL, 0); + if (cfg_busy_poll_budget == ULONG_MAX || + cfg_busy_poll_budget > UINT16_MAX) + error(1, ERANGE, + "busy poll budget must be [0, UINT16_MAX]"); + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + if (cfg_port > UINT16_MAX) + error(1, ERANGE, "port must be <= 65535"); + break; + case 'b': + ret = inet_aton(optarg, &cfg_bind_addr); + if (ret == 0) + error(1, errno, + "bind address %s invalid", optarg); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + case 'm': + cfg_max_events = strtol(optarg, NULL, 0); + + if (cfg_max_events == LONG_MIN || + cfg_max_events == LONG_MAX || + cfg_max_events <= 0) + error(1, ERANGE, + "max events must be > 0 and < LONG_MAX"); + break; + case 'd': + cfg_defer_hard_irqs = strtoul(optarg, NULL, 0); + + if (cfg_defer_hard_irqs == ULONG_MAX || + cfg_defer_hard_irqs > INT32_MAX) + error(1, ERANGE, + "defer_hard_irqs must be <= INT32_MAX"); + break; + case 'r': + cfg_gro_flush_timeout = strtoull(optarg, NULL, 0); + + if (cfg_gro_flush_timeout == ULLONG_MAX) + error(1, ERANGE, + "gro_flush_timeout must be < ULLONG_MAX"); + break; + case 's': + cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0); + + if (cfg_irq_suspend_timeout == ULLONG_MAX) + error(1, ERANGE, + "irq_suspend_timeout must be < ULLONG_MAX"); + break; + case 'i': + cfg_ifindex = strtoul(optarg, NULL, 0); + if (cfg_ifindex == ULONG_MAX) + error(1, ERANGE, + "ifindex must be < ULONG_MAX"); + break; + } + } + + if (!cfg_ifindex) + usage(argv[0]); + + if (optind != argc) + usage(argv[0]); +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) + error(1, errno, "epoll_ctl add fd: %d", fd); +} + +static void setnonblock(int sockfd) +{ + int flags; + + flags = fcntl(sockfd, F_GETFL, 0); + + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) + error(1, errno, "unable to set socket to nonblocking mode"); +} + +static void write_chunk(int fd, char *buf, ssize_t buflen) +{ + ssize_t remaining = buflen; + char *buf_offset = buf; + ssize_t writelen = 0; + ssize_t write_result; + + while (writelen < buflen) { + write_result = write(fd, buf_offset, remaining); + if (write_result == -1) + error(1, errno, "unable to write data to outfile"); + + writelen += write_result; + remaining -= write_result; + buf_offset += write_result; + } +} + +static void setup_queue(void) +{ + struct netdev_napi_get_list *napi_list = NULL; + struct netdev_napi_get_req_dump *req = NULL; + struct netdev_napi_set_req *set_req = NULL; + struct ynl_sock *ys; + struct ynl_error yerr; + uint32_t napi_id; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) + error(1, 0, "YNL: %s", yerr.msg); + + req = netdev_napi_get_req_dump_alloc(); + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); + napi_list = netdev_napi_get_dump(ys, req); + + /* assume there is 1 NAPI configured and take the first */ + if (napi_list->obj._present.id) + napi_id = napi_list->obj.id; + else + error(1, 0, "napi ID not present?"); + + set_req = netdev_napi_set_req_alloc(); + netdev_napi_set_req_set_id(set_req, napi_id); + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); + netdev_napi_set_req_set_gro_flush_timeout(set_req, + cfg_gro_flush_timeout); + netdev_napi_set_req_set_irq_suspend_timeout(set_req, + cfg_irq_suspend_timeout); + + if (netdev_napi_set(ys, set_req)) + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); + + netdev_napi_get_list_free(napi_list); + netdev_napi_get_req_dump_free(req); + netdev_napi_set_req_free(set_req); + ynl_sock_destroy(ys); +} + +static void run_poller(void) +{ + struct epoll_event events[cfg_max_events]; + struct epoll_params epoll_params = {0}; + struct sockaddr_in server_addr; + int i, epfd, nfds; + ssize_t readlen; + int outfile_fd; + char buf[1024]; + int sockfd; + int conn; + int val; + + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); + if (outfile_fd == -1) + error(1, errno, "unable to open outfile: %s", cfg_outfile); + + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) + error(1, errno, "unable to create listen socket"); + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(cfg_port); + server_addr.sin_addr = cfg_bind_addr; + + /* these values are range checked during parse_opts, so casting is safe + * here + */ + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; + epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll; + epoll_params.__pad = 0; + + val = 1; + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) + error(1, errno, "poller setsockopt reuseaddr"); + + setnonblock(sockfd); + + if (bind(sockfd, (struct sockaddr *)&server_addr, + sizeof(struct sockaddr_in))) + error(0, errno, "poller bind to port: %d\n", cfg_port); + + if (listen(sockfd, 1)) + error(1, errno, "poller listen"); + + epfd = epoll_create1(0); + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) + error(1, errno, "unable to set busy poll params"); + + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); + + for (;;) { + nfds = epoll_wait(epfd, events, cfg_max_events, -1); + for (i = 0; i < nfds; i++) { + if (events[i].data.fd == sockfd) { + conn = accept(sockfd, NULL, NULL); + if (conn == -1) + error(1, errno, + "accepting incoming connection failed"); + + setnonblock(conn); + epoll_ctl_add(epfd, conn, + EPOLLIN | EPOLLET | EPOLLRDHUP | + EPOLLHUP); + } else if (events[i].events & EPOLLIN) { + for (;;) { + readlen = read(events[i].data.fd, buf, + sizeof(buf)); + if (readlen > 0) + write_chunk(outfile_fd, buf, + readlen); + else + break; + } + } else { + /* spurious event ? */ + } + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { + epoll_ctl(epfd, EPOLL_CTL_DEL, + events[i].data.fd, NULL); + close(events[i].data.fd); + close(outfile_fd); + return; + } + } + } +} + +int main(int argc, char *argv[]) +{ + parse_opts(argc, argv); + setup_queue(); + run_poller(); + return 0; +} diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index 7c4818c971fc..507d0a82f5f0 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -77,7 +77,7 @@ sw_drops_test() rm ${dir}/packets.pcap - { kill %% && wait %%; } 2>/dev/null + kill_process %% timeout 5 dwdump -o sw -w ${dir}/packets.pcap (( $(tshark -r ${dir}/packets.pcap \ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh new file mode 100755 index 000000000000..c03151e7791c --- /dev/null +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_dup_bridge + test_dup_vxlan_self + test_dup_vxlan_master + test_dup_macvlan_self + test_dup_macvlan_master +" + +do_test_dup() +{ + local op=$1; shift + local what=$1; shift + local tmpf + + RET=0 + + tmpf=$(mktemp) + defer rm "$tmpf" + + defer_scope_push + bridge monitor fdb &> "$tmpf" & + defer kill_process $! + + sleep 0.5 + bridge fdb "$op" 00:11:22:33:44:55 vlan 1 "$@" + sleep 0.5 + defer_scope_pop + + local count=$(grep -c -e 00:11:22:33:44:55 $tmpf) + ((count == 1)) + check_err $? "Got $count notifications, expected 1" + + log_test "$what $op: Duplicate notifications" +} + +test_dup_bridge() +{ + ip_link_add br up type bridge vlan_filtering 1 + do_test_dup add "bridge" dev br self + do_test_dup del "bridge" dev br self +} + +test_dup_vxlan_self() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_master vx br + + do_test_dup add "vxlan" dev vx self dst 192.0.2.1 + do_test_dup del "vxlan" dev vx self dst 192.0.2.1 +} + +test_dup_vxlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add vx up type vxlan id 2000 dstport 4789 + ip_link_master vx br + + do_test_dup add "vxlan master" dev vx master + do_test_dup del "vxlan master" dev vx master +} + +test_dup_macvlan_self() +{ + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + + do_test_dup add "macvlan self" dev mv self + do_test_dup del "macvlan self" dev mv self +} + +test_dup_macvlan_master() +{ + ip_link_add br up type bridge vlan_filtering 1 + ip_link_add dd up type dummy + ip_link_add mv up link dd type macvlan mode passthru + ip_link_master mv br + + do_test_dup add "macvlan master" dev mv self + do_test_dup del "macvlan master" dev mv self +} + +cleanup() +{ + defer_scopes_cleanup +} + +trap cleanup EXIT +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 5f3c28fc8624..3ea6f886a210 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -689,7 +689,7 @@ fib6_notify_test() log_test $ret 0 "ipv6 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% #rm errors.txt @@ -736,7 +736,7 @@ fib_notify_test() log_test $ret 0 "ipv4 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm errors.txt @@ -2328,7 +2328,7 @@ ipv4_mangle_test() $IP route del table 123 172.16.101.0/24 dev veth1 $IP rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2386,7 +2386,7 @@ ipv6_mangle_test() $IP -6 route del table 123 2001:db8:101::/64 dev veth1 $IP -6 rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 224346426ef2..7d885cff8d79 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -126,6 +126,7 @@ TEST_FILES := devlink_lib.sh \ tc_common.sh TEST_INCLUDES := \ - ../lib.sh + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 62a05bca1e82..18afa89ebbcc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -501,7 +501,7 @@ devlink_trap_drop_cleanup() local pref=$1; shift local handle=$1; shift - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh index 96c97064f2d3..becc7c3fc809 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh index ff9fb0db9bd1..e5335116a2fd 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh index 12c138785242..7e0cbfdefab0 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh index 83b55c30a5c3..e0844495f3d1 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh index 256607916d92..741bc9c928eb 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh index ad1bcd6334a8..ad9eab4b1367 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 24f4ab328bd2..2d91281dc5b7 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -436,3 +436,83 @@ test_mtu_change() check_err $? log_test "ping GRE IPv6, packet size 1800 after MTU change" } + +topo_flat_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + ip link set dev g1a type ip6gre local $new1 remote $new2 + __addr_add_del g1a add "$new1/128" + __addr_add_del g1a del "$old1/128" + ip -6 route add $new2/128 via 2001:db8:10::2 + ip -6 route del $old2/128 + + ip link set dev g2a type ip6gre local $new2 remote $new1 + __addr_add_del g2a add "$new2/128" + __addr_add_del g2a del "$old2/128" + ip -6 route add vrf v$ol2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ol2 $old1/128 +} + +flat_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +flat_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +topo_hier_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + __addr_add_del dummy1 del "$old1/64" + __addr_add_del dummy1 add "$new1/64" + ip link set dev g1a type ip6gre local $new1 remote $new2 + ip -6 route add vrf v$ul1 $new2/128 via 2001:db8:10::2 + ip -6 route del vrf v$ul1 $old2/128 + + __addr_add_del dummy2 del "$old2/64" + __addr_add_del dummy2 add "$new2/64" + ip link set dev g2a type ip6gre local $new2 remote $new1 + ip -6 route add vrf v$ul2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ul2 $old1/128 +} + +hier_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} + +hier_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index c992e385159c..7337f398f9cc 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -48,7 +48,6 @@ declare -A NETIFS=( : "${WAIT_TIME:=5}" # Whether to pause on, respectively, after a failure and before cleanup. -: "${PAUSE_ON_FAIL:=no}" : "${PAUSE_ON_CLEANUP:=no}" # Whether to create virtual interfaces, and what netdevice type they should be. @@ -446,191 +445,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -ret_set_ksft_status() -{ - local ksft_status=$1; shift - local msg=$1; shift - - RET=$(ksft_status_merge $RET $ksft_status) - if (( $? )); then - retmsg=$msg - fi -} - -# Whether FAILs should be interpreted as XFAILs. Internal. -FAIL_TO_XFAIL= - -check_err() -{ - local err=$1 - local msg=$2 - - if ((err)); then - if [[ $FAIL_TO_XFAIL = yes ]]; then - ret_set_ksft_status $ksft_xfail "$msg" - else - ret_set_ksft_status $ksft_fail "$msg" - fi - fi -} - -check_fail() -{ - local err=$1 - local msg=$2 - - check_err $((!err)) "$msg" -} - -check_err_fail() -{ - local should_fail=$1; shift - local err=$1; shift - local what=$1; shift - - if ((should_fail)); then - check_fail $err "$what succeeded, but should have failed" - else - check_err $err "$what failed" - fi -} - -xfail() -{ - FAIL_TO_XFAIL=yes "$@" -} - -xfail_on_slow() -{ - if [[ $KSFT_MACHINE_SLOW = yes ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -omit_on_slow() -{ - if [[ $KSFT_MACHINE_SLOW != yes ]]; then - "$@" - fi -} - -xfail_on_veth() -{ - local dev=$1; shift - local kind - - kind=$(ip -j -d link show dev $dev | - jq -r '.[].linkinfo.info_kind') - if [[ $kind = veth ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -log_test_result() -{ - local test_name=$1; shift - local opt_str=$1; shift - local result=$1; shift - local retmsg=$1; shift - - printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" - if [[ $retmsg ]]; then - printf "\t%s\n" "$retmsg" - fi -} - -pause_on_fail() -{ - if [[ $PAUSE_ON_FAIL == yes ]]; then - echo "Hit enter to continue, 'q' to quit" - read a - [[ $a == q ]] && exit 1 - fi -} - -handle_test_result_pass() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" " OK " -} - -handle_test_result_fail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" FAIL "$retmsg" - pause_on_fail -} - -handle_test_result_xfail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" - pause_on_fail -} - -handle_test_result_skip() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" SKIP "$retmsg" -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if ((RET == ksft_pass)); then - handle_test_result_pass "$test_name" "$opt_str" - elif ((RET == ksft_xfail)); then - handle_test_result_xfail "$test_name" "$opt_str" - elif ((RET == ksft_skip)); then - handle_test_result_skip "$test_name" "$opt_str" - else - handle_test_result_fail "$test_name" "$opt_str" - fi - - EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) - return $RET -} - -log_test_skip() -{ - RET=$ksft_skip retmsg= log_test "$@" -} - -log_test_xfail() -{ - RET=$ksft_xfail retmsg= log_test "$@" -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" @@ -1398,13 +1212,10 @@ matchall_sink_create() action drop } -tests_run() +cleanup() { - local current_test - - for current_test in ${TESTS:-$ALL_TESTS}; do - $current_test - done + pre_cleanup + defer_scopes_cleanup } multipath_eval() @@ -1761,8 +1572,9 @@ start_tcp_traffic() stop_traffic() { - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null + local pid=${1-%%}; shift + + kill_process "$pid" } declare -A cappid diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index 9e677aa64a06..694ece9ba3a7 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -202,7 +202,7 @@ one_bridge_two_pvids() ip link set $swp2 master br0 bridge vlan add dev $swp1 vid 1 pvid untagged - bridge vlan add dev $swp1 vid 2 pvid untagged + bridge vlan add dev $swp2 vid 2 pvid untagged run_test "Switch ports in VLAN-aware bridge with different PVIDs" diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index e60c8b4818cc..1f6f53e284b5 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -24,15 +24,10 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc add dev $swp2 root handle 1: tbf \ rate 1Gbit burst 1Mbit latency 100ms + defer tc qdisc del dev $swp2 root PARENT="parent 1:" } -switch_destroy() -{ - ets_switch_destroy - tc qdisc del dev $swp2 root -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index f906fcc66572..8f9922c695b0 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -166,44 +166,32 @@ h1_create() local i; simple_if_init $h1 + defer simple_if_fini $h1 + mtu_set $h1 9900 + defer mtu_restore $h1 + for i in {0..2}; do vlan_create $h1 1$i v$h1 $(sip $i)/28 + defer vlan_destroy $h1 1$i ip link set dev $h1.1$i type vlan egress 0:$i done } -h1_destroy() -{ - local i - - for i in {0..2}; do - vlan_destroy $h1 1$i - done - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { local i simple_if_init $h2 - mtu_set $h2 9900 - for i in {0..2}; do - vlan_create $h2 1$i v$h2 $(dip $i)/28 - done -} + defer simple_if_fini $h2 -h2_destroy() -{ - local i + mtu_set $h2 9900 + defer mtu_restore $h2 for i in {0..2}; do - vlan_destroy $h2 1$i + vlan_create $h2 1$i v$h2 $(dip $i)/28 + defer vlan_destroy $h2 1$i done - mtu_restore $h2 - simple_if_fini $h2 } ets_switch_create() @@ -211,44 +199,45 @@ ets_switch_create() local i ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 9900 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 9900 + defer mtu_restore $swp2 for i in {0..2}; do vlan_create $swp1 1$i + defer vlan_destroy $swp1 1$i ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 vlan_create $swp2 1$i + defer vlan_destroy $swp2 1$i ip link add dev br1$i type bridge + defer ip link del dev br1$i + ip link set dev $swp1.1$i master br1$i + defer ip link set dev $swp1.1$i nomaster + ip link set dev $swp2.1$i master br1$i + defer ip link set dev $swp2.1$i nomaster ip link set dev br1$i up - ip link set dev $swp1.1$i up - ip link set dev $swp2.1$i up - done -} + defer ip link set dev br1$i down -ets_switch_destroy() -{ - local i - - ets_delete_qdisc + ip link set dev $swp1.1$i up + defer ip link set dev $swp1.1$i down - for i in {0..2}; do - ip link del dev br1$i - vlan_destroy $swp2 1$i - vlan_destroy $swp1 1$i + ip link set dev $swp2.1$i up + defer ip link set dev $swp2.1$i down done - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer ets_delete_qdisc } setup_prepare() @@ -263,23 +252,13 @@ setup_prepare() hut=$h2 vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(dip 0) " vlan 10" diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index f9d26a7911bb..08240d3e3c87 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -90,6 +90,7 @@ __ets_dwrr_test() for stream in ${streams[@]}; do ets_start_traffic $stream + defer stop_traffic $! done sleep 10 @@ -120,25 +121,24 @@ __ets_dwrr_test() ${d[0]} ${d[$i]} fi done - - for stream in ${streams[@]}; do - stop_traffic - done } ets_dwrr_test_012() { - __ets_dwrr_test 0 1 2 + in_defer_scope \ + __ets_dwrr_test 0 1 2 } ets_dwrr_test_01() { - __ets_dwrr_test 0 1 + in_defer_scope \ + __ets_dwrr_test 0 1 } ets_dwrr_test_12() { - __ets_dwrr_test 1 2 + in_defer_scope \ + __ets_dwrr_test 1 2 } ets_qdisc_setup() diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 17f28644568e..af166662b78a 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -53,71 +53,63 @@ PKTSZ=1400 h1_create() { simple_if_init $h1 192.0.2.1/28 + defer simple_if_fini $h1 192.0.2.1/28 + mtu_set $h1 10000 + defer mtu_restore $h1 + tc qdisc replace dev $h1 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M -} - -h1_destroy() -{ - tc qdisc del dev $h1 root - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/28 + defer tc qdisc del dev $h1 root } h2_create() { simple_if_init $h2 192.0.2.2/28 - mtu_set $h2 10000 -} + defer simple_if_fini $h2 192.0.2.2/28 -h2_destroy() -{ - mtu_restore $h2 - simple_if_fini $h2 192.0.2.2/28 + mtu_set $h2 10000 + defer mtu_restore $h2 } h3_create() { simple_if_init $h3 192.0.2.3/28 - mtu_set $h3 10000 -} + defer simple_if_fini $h3 192.0.2.3/28 -h3_destroy() -{ - mtu_restore $h3 - simple_if_fini $h3 192.0.2.3/28 + mtu_set $h3 10000 + defer mtu_restore $h3 } switch_create() { ip link add dev br up type bridge + defer ip link del dev br + ip link set dev $swp1 up master br + defer ip link set dev $swp1 down nomaster + ip link set dev $swp2 up master br + defer ip link set dev $swp2 down nomaster + ip link set dev $swp3 up master br + defer ip link set dev $swp3 down nomaster mtu_set $swp1 10000 + defer mtu_restore $h1 + mtu_set $swp2 10000 + defer mtu_restore $h2 + mtu_set $swp3 10000 + defer mtu_restore $h3 tc qdisc replace dev $swp3 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M - ip link add name _drop_test up type dummy -} + defer tc qdisc del dev $swp3 root -switch_destroy() -{ - ip link del dev _drop_test - tc qdisc del dev $swp3 root - - mtu_restore $h3 - mtu_restore $h2 - mtu_restore $h1 - - ip link set dev $swp3 down nomaster - ip link set dev $swp2 down nomaster - ip link set dev $swp1 down nomaster - ip link del dev br + ip link add name _drop_test up type dummy + defer ip link del dev _drop_test } setup_prepare() @@ -134,6 +126,7 @@ setup_prepare() h3_mac=$(mac_get $h3) vrf_prepare + defer vrf_cleanup h1_create h2_create @@ -141,18 +134,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.3 " from host 1" @@ -287,6 +268,7 @@ do_ecn_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -298,9 +280,6 @@ do_ecn_test() build_backlog $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "$name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_nodrop_test() @@ -310,6 +289,7 @@ do_ecn_nodrop_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -321,9 +301,6 @@ do_ecn_nodrop_test() build_backlog $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "$name backlog > limit: UDP not dropped" - - stop_traffic - sleep 1 } do_red_test() @@ -336,6 +313,7 @@ do_red_test() # is above limit. $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! # Pushing below the queue limit should work. RET=0 @@ -352,9 +330,6 @@ do_red_test() pct=$(check_marking "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "RED backlog > limit" - - stop_traffic - sleep 1 } do_red_qevent_test() @@ -369,6 +344,7 @@ do_red_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t udp -q & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -396,9 +372,6 @@ do_red_qevent_test() check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" log_test "RED early_dropped packets mirrored" - - stop_traffic - sleep 1 } do_ecn_qevent_test() @@ -410,6 +383,7 @@ do_ecn_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -428,9 +402,6 @@ do_ecn_qevent_test() tc filter del block 10 pref 1234 handle 102 matchall log_test "ECN marked packets mirrored" - - stop_traffic - sleep 1 } install_qdisc() @@ -451,36 +422,36 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc xfail_on_slow do_ecn_test $BACKLOG - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc xfail_on_slow do_ecn_nodrop_test $BACKLOG - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc xfail_on_slow do_red_test $BACKLOG - uninstall_qdisc } red_qevent_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc xfail_on_slow do_red_qevent_test $BACKLOG - uninstall_qdisc } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc xfail_on_slow do_ecn_qevent_test $BACKLOG - uninstall_qdisc } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index 9cd884d4a5de..ec309a5086bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -60,68 +60,65 @@ host_create() local host=$1; shift simple_if_init $dev + defer simple_if_fini $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ flower $TCFLAGS vlan_id 10 action pass tc filter add dev $h2 ingress pref 1011 prot 802.1q \ flower $TCFLAGS vlan_id 11 action pass } -h2_destroy() -{ - tc qdisc del dev $h2 clsact - host_destroy $h2 -} - switch_create() { local intf local vlan ip link add dev br10 type bridge + defer ip link del dev br10 + ip link add dev br11 type bridge + defer ip link del dev br11 for intf in $swp1 $swp2; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan down done done @@ -130,34 +127,10 @@ switch_create() done ip link set dev br10 up - ip link set dev br11 up -} - -switch_destroy() -{ - local intf - local vlan - - # A test may have been interrupted mid-run, with Qdisc installed. Delete - # it here. - tc qdisc del dev $swp2 root 2>/dev/null - - ip link set dev br11 down - ip link set dev br10 down + defer ip link set dev br10 down - for intf in $swp2 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br11 - ip link del dev br10 + ip link set dev br11 up + defer ip link set dev br11 down } setup_prepare() @@ -177,23 +150,13 @@ setup_prepare() h2_mac=$(mac_get $h2) vrf_prepare + defer vrf_cleanup h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 2 10) " vlan 10" @@ -207,18 +170,18 @@ tbf_get_counter() tc_rule_stats_get $h2 10$vlan ingress .bytes } -do_tbf_test() +__tbf_test() { local vlan=$1; shift local mbit=$1; shift start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + defer stop_traffic $! sleep 5 # Wait for the burst to dwindle local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) sleep 10 local t3=$(tbf_get_counter $vlan) - stop_traffic RET=0 @@ -231,3 +194,9 @@ do_tbf_test() log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } + +do_tbf_test() +{ + in_defer_scope \ + __tbf_test "$@" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index df9bcd6a811a..c182a04282bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -30,8 +30,9 @@ tbf_test() # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + defer tc qdisc del dev $swp2 root + tbf_test_one 128K - tc qdisc del dev $swp2 root } tbf_root_test() @@ -42,6 +43,8 @@ tbf_root_test() tc qdisc replace dev $swp2 root handle 1: \ tbf rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ $QDISC 3 priomap 2 1 0 tc qdisc replace dev $swp2 parent 10:3 handle 103: \ @@ -53,8 +56,6 @@ tbf_root_test() do_tbf_test 10 400 $bs do_tbf_test 11 400 $bs - - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh index 96c997be0d03..9f20320f8d84 100755 --- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -14,13 +14,14 @@ tbf_test_one() tc qdisc replace dev $swp2 root handle 108: tbf \ rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + do_tbf_test 10 400 $bs } tbf_test() { tbf_test_one 128K - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 5103f64a71d6..509fdedfcfa1 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -148,7 +148,7 @@ police_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } @@ -198,7 +198,7 @@ police_shared_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% } police_shared_test() @@ -278,7 +278,7 @@ police_mirror_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower @@ -320,7 +320,7 @@ police_pps_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 241542441c51..555a868743f0 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -3,3 +3,4 @@ CONFIG_NET_SCH_NETEM=m CONFIG_HSR=y CONFIG_VETH=y CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh index 8e97b1f2e7e5..1dc882ac1c74 100644 --- a/tools/testing/selftests/net/hsr/hsr_common.sh +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -15,7 +15,7 @@ do_ping() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 2" + local ping_args="-q -c 2 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 @@ -36,7 +36,7 @@ do_ping_long() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 10" + local ping_args="-q -c 10 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index f5d207fc770a..5a65f4f836be 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -175,6 +175,100 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } +setup_vlan_interfaces() { + ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 + ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 + ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 + ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 + + ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 + ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 + ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 + ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 + + ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 + ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 + ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 + ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 + + ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 + ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 + ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 + ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + + ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 + ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 + ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 + ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + + ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 + ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 + ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 + ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + + ip -net "$ns1" link set dev hsr1.2 up + ip -net "$ns1" link set dev hsr1.3 up + ip -net "$ns1" link set dev hsr1.4 up + ip -net "$ns1" link set dev hsr1.5 up + + ip -net "$ns2" link set dev hsr2.2 up + ip -net "$ns2" link set dev hsr2.3 up + ip -net "$ns2" link set dev hsr2.4 up + ip -net "$ns2" link set dev hsr2.5 up + + ip -net "$ns3" link set dev hsr3.2 up + ip -net "$ns3" link set dev hsr3.3 up + ip -net "$ns3" link set dev hsr3.4 up + ip -net "$ns3" link set dev hsr3.5 up + +} + +hsr_vlan_ping() { + do_ping "$ns1" 100.64.2.2 + do_ping "$ns1" 100.64.3.2 + do_ping "$ns1" 100.64.4.2 + do_ping "$ns1" 100.64.5.2 + + do_ping "$ns1" 100.64.2.3 + do_ping "$ns1" 100.64.3.3 + do_ping "$ns1" 100.64.4.3 + do_ping "$ns1" 100.64.5.3 + + do_ping "$ns2" 100.64.2.1 + do_ping "$ns2" 100.64.3.1 + do_ping "$ns2" 100.64.4.1 + do_ping "$ns2" 100.64.5.1 + + do_ping "$ns2" 100.64.2.3 + do_ping "$ns2" 100.64.3.3 + do_ping "$ns2" 100.64.4.3 + do_ping "$ns2" 100.64.5.3 + + do_ping "$ns3" 100.64.2.1 + do_ping "$ns3" 100.64.3.1 + do_ping "$ns3" 100.64.4.1 + do_ping "$ns3" 100.64.5.1 + + do_ping "$ns3" 100.64.2.2 + do_ping "$ns3" 100.64.3.2 + do_ping "$ns3" 100.64.4.2 + do_ping "$ns3" 100.64.5.2 +} + +run_vlan_tests() { + vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then + echo "INFO: Running VLAN tests" + setup_vlan_interfaces + hsr_vlan_ping + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + check_prerequisites setup_ns ns1 ns2 ns3 @@ -183,9 +277,13 @@ trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test +run_vlan_tests + setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 do_complete_ping_test +run_vlan_tests + exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings new file mode 100644 index 000000000000..0fbc037f2aa8 --- /dev/null +++ b/tools/testing/selftests/net/hsr/settings @@ -0,0 +1 @@ +timeout=50 diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 12491850ae98..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -3,119 +3,106 @@ # # Author: Justin Iurman <justin.iurman@uliege.be> # -# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data -# consistency directly inside packets on the receiver side. Tests are divided -# into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates -# wider use cases that do not fall into the other two categories). Both OUTPUT -# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL -# tests use the entire three-node topology (alpha, beta, gamma). Each test is -# documented inside its own handler in the code below. +# This script evaluates IOAM for IPv6 by checking local IOAM configurations and +# IOAM data inside packets. There are three categories of tests: LOCAL, OUTPUT, +# and INPUT. The former (LOCAL) checks all IOAM related configurations locally +# without sending packets. OUTPUT tests verify the processing of an IOAM +# encapsulating node, while INPUT tests verify the processing of an IOAM transit +# node. Both OUTPUT and INPUT tests send packets. Each test is documented inside +# its own handler. # -# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. -# When either Beta or Gamma is the destination (depending on the test category), -# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. +# The topology used for OUTPUT and INPUT tests is made of three nodes: +# - Alpha (the IOAM encapsulating node) +# - Beta (the IOAM transit node) +# - Gamma (the receiver) ** # +# An IOAM domain is configured from Alpha to Beta, but not on the reverse path. +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. # -# +-------------------+ +-------------------+ -# | | | | -# | Alpha netns | | Gamma netns | -# | | | | -# | +-------------+ | | +-------------+ | -# | | veth0 | | | | veth0 | | -# | | db01::2/64 | | | | db02::2/64 | | -# | +-------------+ | | +-------------+ | -# | . | | . | -# +-------------------+ +-------------------+ -# . . -# . . -# . . -# +----------------------------------------------------+ -# | . . | -# | +-------------+ +-------------+ | -# | | veth0 | | veth1 | | -# | | db01::1/64 | ................ | db02::1/64 | | -# | +-------------+ +-------------+ | -# | | -# | Beta netns | -# | | -# +----------------------------------------------------+ +# ** Gamma is required because ioam6_parser.c uses a packet socket and we need +# to see IOAM data inserted by the very last node (Beta), which would happen +# _after_ we get a copy of the packet on Beta. Note that using an +# IPv6 raw socket with IPV6_RECVHOPOPTS on Beta would not be enough: we also +# need to access the IPv6 header to check some fields (e.g., source and +# destination addresses), which is not possible in that case. As a +# consequence, we need Gamma as a receiver to run ioam6_parser.c which uses a +# packet socket. # # +# +-----------------------+ +-----------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------------+ | | +-------------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | | +# | +-------------------+ | | +-------------------+ | +# | . | | . | +# +-----------.-----------+ +-----------.-----------+ +# . . +# . . +# . . +# +-----------.----------------------------------.-----------+ +# | . . | +# | +-------------------+ +-------------------+ | +# | | veth0 | | veth1 | | +# | | 2001:db8:1::1/64 | ............ | 2001:db8:2::1/64 | | +# | +-------------------+ +-------------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------------+ # -# ============================================================= -# | Alpha - IOAM configuration | -# +===========================================================+ -# | Node ID | 1 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 11111111 | -# +-----------------------------------------------------------+ -# | Ingress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Egress ID | 101 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 101101 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee0 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf00dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 777 | -# +-----------------------------------------------------------+ -# | Schema Data | something that will be 4n-aligned | -# +-----------------------------------------------------------+ # # -# ============================================================= -# | Beta - IOAM configuration | -# +===========================================================+ -# | Node ID | 2 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 22222222 | -# +-----------------------------------------------------------+ -# | Ingress ID | 201 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 201201 | -# +-----------------------------------------------------------+ -# | Egress ID | 202 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 202202 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee1 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf11dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 666 | -# +-----------------------------------------------------------+ -# | Schema Data | Hello there -Obi | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Alpha - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 1 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 11111111 | +# +---------------------+------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Egress ID | 101 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 101101 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xdeadbeef | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +---------------------+------------------------------------+ +# | Schema ID | 777 | +# +---------------------+------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +---------------------+------------------------------------+ # # -# ============================================================= -# | Gamma - IOAM configuration | -# +===========================================================+ -# | Node ID | 3 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 33333333 | -# +-----------------------------------------------------------+ -# | Ingress ID | 301 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 301301 | -# +-----------------------------------------------------------+ -# | Egress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee2 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf22dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Beta - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 2 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 22222222 | +# +---------------------+------------------------------------+ +# | Ingress ID | 201 | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 201201 | +# +---------------------+------------------------------------+ +# | Egress ID | 202 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 202202 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xffffffffffffffff (default value) | +# +---------------------+------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +---------------------+------------------------------------+ +# | Schema Data | | +# +---------------------+------------------------------------+ source lib.sh @@ -128,64 +115,69 @@ source lib.sh ################################################################################ ALPHA=( - 1 # ID - 11111111 # Wide ID - 0xffff # Ingress ID - 0xffffffff # Ingress Wide ID - 101 # Egress ID - 101101 # Egress Wide ID - 0xdeadbee0 # Namespace Data - 0xcafec0caf00dc0de # Namespace Wide Data - 777 # Schema ID (0xffffff = None) - "something that will be 4n-aligned" # Schema Data + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID (default value) + 0xffffffff # Ingress Wide ID (default value) + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbeef # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID + "something that will be 4n-aligned" # Schema Data ) BETA=( - 2 - 22222222 - 201 - 201201 - 202 - 202202 - 0xdeadbee1 - 0xcafec0caf11dc0de - 666 - "Hello there -Obi" + 2 # ID + 22222222 # Wide ID + 201 # Ingress ID + 201201 # Ingress Wide ID + 202 # Egress ID + 202202 # Egress Wide ID + 0xffffffff # Namespace Data (empty value) + 0xffffffffffffffff # Namespace Wide Data (empty value) + 0xffffff # Schema ID (empty value) + "" # Schema Data (empty value) ) -GAMMA=( - 3 - 33333333 - 301 - 301301 - 0xffff - 0xffffffff - 0xdeadbee2 - 0xcafec0caf22dc0de - 0xffffff - "" -) +TESTS_LOCAL=" + local_sysctl_ioam_id + local_sysctl_ioam_id_wide + local_sysctl_ioam_intf_id + local_sysctl_ioam_intf_id_wide + local_sysctl_ioam_intf_enabled + local_ioam_namespace + local_ioam_schema + local_ioam_schema_namespace + local_route_ns + local_route_tunsrc + local_route_tundst + local_route_trace_type + local_route_trace_size + local_route_trace_type_bits + local_route_trace_size_values +" TESTS_OUTPUT=" - out_undef_ns - out_no_room - out_bits - out_full_supp_trace + output_undef_ns + output_no_room + output_no_room_oss + output_bits + output_sizes + output_full_supp_trace " TESTS_INPUT=" - in_undef_ns - in_no_room - in_oflag - in_bits - in_full_supp_trace + input_undef_ns + input_no_room + input_no_room_oss + input_disabled + input_oflag + input_bits + input_sizes + input_full_supp_trace " -TESTS_GLOBAL=" - fwd_full_supp_trace -" - - ################################################################################ # # # LIBRARY # @@ -194,66 +186,64 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - setup_ns ioam_tmp_node - ip link add name veth0 netns $ioam_tmp_node type veth \ - peer name veth1 netns $ioam_tmp_node + setup_ns ioam_tmp_node &>/dev/null + local ret=$? - ip -netns $ioam_tmp_node link set veth0 up - ip -netns $ioam_tmp_node link set veth1 up + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace add 0 - ns_ad=$? + ip -netns $ioam_tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" - ns_sh=$? + ip -netns $ioam_tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) - if [[ $ns_ad != 0 || $ns_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: kernel version probably too old, missing ioam support" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Setup failed." + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - tr_ad=$? + ip -netns $ioam_tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + ret=$? - ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" - tr_sh=$? + ip -netns $ioam_tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + ret=$((ret + $?)) - if [[ $tr_ad != 0 || $tr_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ - "without CONFIG_IPV6_IOAM6_LWTUNNEL?" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Cannot attach an IOAM trace to a route. Was your kernel" \ + "compiled without CONFIG_IPV6_IOAM6_LWTUNNEL? Are you running an" \ + "old kernel? Are you using an old version of iproute2?" + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + cleanup_ns $ioam_tmp_node - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" ip6tnl_loaded=$? - if [ $ip6tnl_loaded = 0 ] + if [ $ip6tnl_loaded == 0 ] then encap_tests=0 else modprobe ip6_tunnel &>/dev/null - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" encap_tests=$? if [ $encap_tests != 0 ] then - ip a | grep -q "ip6tnl0" + ip a 2>/dev/null | grep -q "ip6tnl0" encap_tests=$? if [ $encap_tests != 0 ] then echo "Note: ip6_tunnel not found neither as a module nor inside the" \ - "kernel, tests that require it (encap mode) will be omitted" + "kernel. Any tests that require it will be skipped." fi fi fi @@ -261,477 +251,1400 @@ check_kernel_compatibility() cleanup() { - ip link del ioam-veth-alpha 2>/dev/null || true - ip link del ioam-veth-gamma 2>/dev/null || true - - cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma if [ $ip6tnl_loaded != 0 ] then - modprobe -r ip6_tunnel 2>/dev/null || true + modprobe -r ip6_tunnel &>/dev/null fi } setup() { - setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma &>/dev/null ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ - peer name ioam-veth-betaL netns $ioam_node_beta + peer name ioam-veth-betaL netns $ioam_node_beta &>/dev/null ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ - peer name ioam-veth-gamma netns $ioam_node_gamma - - ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 - ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 - - ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 - ip -netns $ioam_node_alpha link set veth0 up - ip -netns $ioam_node_alpha link set lo up - ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 - ip -netns $ioam_node_alpha route del db01::/64 - ip -netns $ioam_node_alpha route add db01::/64 dev veth0 - - ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 - ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 - ip -netns $ioam_node_beta link set veth0 up - ip -netns $ioam_node_beta link set veth1 up - ip -netns $ioam_node_beta link set lo up - - ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 - ip -netns $ioam_node_gamma link set veth0 up - ip -netns $ioam_node_gamma link set lo up - ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 - - # - IOAM config - - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + peer name ioam-veth-gamma netns $ioam_node_gamma &>/dev/null + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null + + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha link set veth0 up &>/dev/null + ip -netns $ioam_node_alpha link set lo up &>/dev/null + ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $ioam_node_beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $ioam_node_beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $ioam_node_beta link set veth0 up &>/dev/null + ip -netns $ioam_node_beta link set veth1 up &>/dev/null + ip -netns $ioam_node_beta link set lo up &>/dev/null + + ip -netns $ioam_node_gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_gamma link set veth0 up &>/dev/null + ip -netns $ioam_node_gamma link set lo up &>/dev/null + ip -netns $ioam_node_gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + # - Alpha: IOAM config - + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam schema add ${ALPHA[8]} "${ALPHA[9]}" &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null + + # - Beta: IOAM config - + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id=${BETA[0]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} &>/dev/null + ip -netns $ioam_node_beta ioam namespace add 123 &>/dev/null sleep 1 - ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - echo "Setup FAILED" - cleanup &>/dev/null - exit 0 + echo "SKIP: Setup failed." + cleanup + exit $ksft_skip fi } log_test_passed() { - local desc=$1 - printf "TEST: %-60s [ OK ]\n" "${desc}" + printf " - TEST: %-57s [ OK ]\n" "$1" + npassed=$((npassed+1)) } -log_test_failed() +log_test_skipped() { - local desc=$1 - printf "TEST: %-60s [FAIL]\n" "${desc}" + printf " - TEST: %-57s [SKIP]\n" "$1" + nskipped=$((nskipped+1)) } -log_results() +log_test_failed() { - echo "- Tests passed: ${npassed}" - echo "- Tests failed: ${nfailed}" + printf " - TEST: %-57s [FAIL]\n" "$1" + nfailed=$((nfailed+1)) } run_test() { local name=$1 local desc=$2 - local node_src=$3 - local node_dst=$4 - local ip6_dst=$5 - local trace_type=$6 - local ioam_ns=$7 - local type=$8 - - ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & + local ip6_src=$3 + local trace_type=$4 + local trace_size=$5 + local ioam_ns=$6 + local type=$7 + + ip netns exec $ioam_node_gamma \ + ./ioam6_parser veth0 $name $ip6_src 2001:db8:2::2 \ + $trace_type $trace_size $ioam_ns $type & local spid=$! sleep 0.1 - ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + ip netns exec $ioam_node_alpha ping6 -t 64 -c 1 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - if [ $? = 0 ] - then - npassed=$((npassed+1)) - log_test_passed "${desc}" - else - nfailed=$((nfailed+1)) - log_test_failed "${desc}" - fi + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" fi } run() { + local test + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s LOCAL tests %-29s |" echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "OUTPUT tests" - printf "%0.s-" {1..74} + + echo + echo "Global config" + for test in $TESTS_LOCAL + do + $test + done + + echo + echo "Inline mode" + for test in $TESTS_LOCAL + do + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_LOCAL + do + $test "encap" + done + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s OUTPUT tests %-28s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null - for t in $TESTS_OUTPUT + echo + echo "Inline mode" + for test in $TESTS_OUTPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" done - # clean OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + echo + echo "Encap mode" + for test in $TESTS_OUTPUT + do + $test "encap" + done + echo + echo "Encap mode (with tunsrc)" + for test in $TESTS_OUTPUT + do + $test "encap" "tunsrc" + done + + # clean OUTPUT settings + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "INPUT tests" - printf "%0.s-" {1..74} + printf "| %-28s INPUT tests %-29s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set INPUT settings - ip -netns $ioam_node_alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null - for t in $TESTS_INPUT + echo + echo "Inline mode" + for test in $TESTS_INPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_INPUT + do + $test "encap" done # clean INPUT settings - ip -netns $ioam_node_alpha ioam namespace add 123 \ - data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "GLOBAL tests" - printf "%0.s-" {1..74} + printf "| %-30s Results %-31s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - for t in $TESTS_GLOBAL - do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" - done - echo - log_results + echo "- Passed: ${npassed}" + echo "- Skipped: ${nskipped}" + echo "- Failed: ${nfailed}" + echo } bit2type=( 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 - 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 0x000001 ) -bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 0 ) ################################################################################ # # -# OUTPUT tests # +# LOCAL tests # # # -# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # ################################################################################ -out_undef_ns() +local_sysctl_ioam_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id 2>/dev/null | grep -wq ${ALPHA[0]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_sysctl_ioam_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace if the chosen IOAM # - # namespace is not configured locally. # + # Make sure the sysctl "net.ipv6.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Sysctl net.ipv6.ioam6_id_wide" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ ! -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[1]} - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +local_sysctl_ioam_intf_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id 2>/dev/null | grep -wq ${ALPHA[4]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_no_room() +local_sysctl_ioam_intf_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace and will set the # - # Overflow flag since there is no room enough for its data. # + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Missing trace room" + local desc="Sysctl net.ipv6.conf.XX.ioam6_id_wide" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[5]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up +local_sysctl_ioam_intf_enabled() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_enabled" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_enabled" - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + [ ! -z $1 ] && return - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip netns exec $ioam_node_beta \ + sysctl net.ipv6.conf.veth0.ioam6_enabled 2>/dev/null | grep -wq 1 - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_bits() +local_ioam_namespace() { ############################################################################## - # Make sure that, for each trace type bit, the encap node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure the creation of an IOAM Namespace works as expected. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Create an IOAM Namespace" - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + [ ! -z $1 ] && return - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq 123 + local ret=$? - for i in {0..22} - do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[6]} + ret=$((ret + $?)) - local cmd_res=$? - local descr="${desc/<n>/$i}" + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[7]} + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema() +{ + ############################################################################## + # Make sure the creation of an IOAM Schema works as expected. # + ############################################################################## + local desc="Create an IOAM Schema" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + local sc_data=$( + for i in `seq 0 $((${#ALPHA[9]}-1))` + do + chr=${ALPHA[9]:i:1} + printf "%x " "'${chr}" + done + ) + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -q "$sc_data" + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema_namespace() +{ + ############################################################################## + # Make sure the binding of a Schema to a Namespace works as expected. # + ############################################################################## + local desc="Bind an IOAM Schema to an IOAM Namespace" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq 123 + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_route_ns() +{ + ############################################################################## + # Make sure the Namespace-ID is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Namespace-ID" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tunsrc() +{ + ############################################################################## + # Make sure the Tunnel Source is only (and possibly) used with encap mode. # + ############################################################################## + local desc + local mode + local mode_tunsrc - if [[ $i -ge 12 && $i -le 21 ]] + [ -z $1 ] && return + + if [ "$1" == "encap" ] + then + desc="Optional Tunnel Source" + mode="$1 tundst 2001:db8:2::2" + mode_tunsrc="$1 tunsrc 2001:db8:1::50 tundst 2001:db8:2::2" + else + desc="Unneeded Tunnel Source" + mode="$1" + mode_tunsrc="$1 tunsrc 2001:db8:1::50" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tunsrc trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 != 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tundst() +{ + ############################################################################## + # Make sure the Tunnel Destination is only (and always) used with encap mode.# + ############################################################################## + local desc + + [ -z $1 ] && return + + [ "$1" == "encap" ] && desc="Mandatory Tunnel Destination" \ + || desc="Unneeded Tunnel Destination" + + local mode="$1" + local mode_tundst="$1 tundst 2001:db8:2::2" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tundst trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type() +{ + ############################################################################## + # Make sure the Trace Type is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Type" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_size() +{ + ############################################################################## + # Make sure the Trace Size is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Size" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type_bits() +{ + ############################################################################## + # Make sure only allowed bits (0-11 and 22) are accepted. # + ############################################################################## + local desc="Trace Type bits" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + local i + for i in {0..23} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type ${bit2type[$i]} ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [[ ($? == 0 && (($i -ge 12 && $i -le 21) || $i == 23)) || + ($? != 0 && (($i -ge 0 && $i -le 11) || $i == 22)) ]] then - if [ $cmd_res != 0 ] - then - npassed=$((npassed+1)) - log_test_passed "$descr ($1 mode)" - else - nfailed=$((nfailed+1)) - log_test_failed "$descr ($1 mode)" - fi - else - run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + local err=1 + break fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" - bit2size[22]=$tmp + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } -out_full_supp_trace() +local_route_trace_size_values() { ############################################################################## - # Make sure that the encap node will correctly fill a full trace. Be careful,# - # "full trace" here does NOT mean all bits (only supported ones). # + # Make sure only allowed sizes (multiples of four in [4,244]) are accepted. # ############################################################################## - local desc="Full supported trace" + local desc="Trace Size values" + local mode - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 100 dev veth0 + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + # we also try the next multiple of four after the MAX to check it's refused + local i + for i in {0..248} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [[ ($? == 0 && ($i == 0 || $i == 248 || $(( $i % 4 )) != 0)) || + ($? != 0 && $i != 0 && $i != 248 && $(( $i % 4 )) == 0) ]] + then + local err=1 + break + fi + done + + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } ################################################################################ # # -# INPUT tests # +# OUTPUT tests # # # -# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # -# insertion -> the IOAM namespace configured on the sender is removed # -# and is used in the inserted trace to force the sender not to fill it. # ################################################################################ -in_undef_ns() +output_undef_ns() { ############################################################################## - # Make sure that the receiving node won't fill the trace if the related IOAM # - # namespace is not configured locally. # + # Make sure an IOAM encapsulating node does NOT fill the trace when the # + # corresponding IOAM Namespace-ID is not configured locally. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_no_room() +output_no_room() { ############################################################################## - # Make sure that the receiving node won't fill the trace and will set the # - # Overflow flag if there is no room enough for its data. # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for its data. # ############################################################################## - local desc="Missing trace room" + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_bits() +output_no_room_oss() { ############################################################################## - # Make sure that, for each trace type bit, the receiving node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for the Opaque State Snapshot. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +output_bits() +{ + ############################################################################## + # Make sure an IOAM encapsulating node implements all supported bits by # + # checking it correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + local i for i in {0..11} {22..22} do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "output_bit$i" "${descr}" $saddr \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null bit2size[22]=$tmp } -in_oflag() +output_sizes() { ############################################################################## - # Make sure that the receiving node won't fill the trace since the Overflow # - # flag is set. # + # Make sure an IOAM encapsulating node allocates supported sizes correctly. # ############################################################################## - local desc="Overflow flag is set" + local desc="Trace Size of <n> bytes" + local ns=0 + local tr_type=0x800000 + local mode="$1" + local saddr="2001:db8:1::2" - # Exception: - # Here, we need the sender to set the Overflow flag. For that, we will add - # back the IOAM namespace that was previously configured on the sender. - ip -netns $ioam_node_alpha ioam namespace add 123 + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - # And we clean the exception for this test to get things back to normal for - # other INPUT tests - ip -netns $ioam_node_alpha ioam namespace del 123 + if [ $? == 0 ] + then + run_test "output_size$i" "${descr}" $saddr $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_full_supp_trace() +output_full_supp_trace() { ############################################################################## - # Make sure that the receiving node will correctly fill a full trace. Be # - # careful, "full trace" here does NOT mean all bits (only supported ones). # + # Make sure an IOAM encapsulating node correctly fills a trace when all # + # supported bits are set. # ############################################################################## local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + local i + tr_size=$(( ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } ################################################################################ # # -# GLOBAL tests # +# INPUT tests # # # -# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # ################################################################################ -fwd_full_supp_trace() +input_undef_ns() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the corresponding IOAM # + # Namespace-ID is not configured locally. # + ############################################################################## + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for its data. # + ############################################################################## + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room_oss() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for the Opaque State Snapshot. # + ############################################################################## + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_disabled() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when IOAM is not enabled on # + # the corresponding (ingress) interface. # + ############################################################################## + local desc="IOAM disabled on ingress interface" + local ns=123 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: disable IOAM on ingress interface + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_oflag() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the Overflow flag is # + # set. # + ############################################################################## + local desc="Overflow flag is set" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns $ioam_node_alpha ioam namespace add 123 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_bits() +{ + ############################################################################## + # Make sure an IOAM node implements all supported bits by checking it # + # correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + local i + for i in {0..11} {22..22} + do + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "input_bit$i" "${descr}" 2001:db8:1::2 \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null + + bit2size[22]=$tmp +} + +input_sizes() { ############################################################################## - # Make sure that all three nodes correctly filled the full supported trace # - # by checking that the trace data is consistent with the predefined config. # + # Make sure an IOAM node handles all supported sizes correctly. # ############################################################################## - local desc="Forward - Full supported trace" + local desc="Trace Size of <n> bytes" + local ns=123 + local tr_type=0x800000 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db02::2 0xfff002 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down + if [ $? == 0 ] + then + run_test "input_size$i" "${descr}" 2001:db8:1::2 $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_full_supp_trace() +{ + ############################################################################## + # Make sure an IOAM node correctly fills a trace when all supported bits are # + # set. # + ############################################################################## + local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local i + tr_size=$(( ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } @@ -742,30 +1655,29 @@ fwd_full_supp_trace() ################################################################################ npassed=0 +nskipped=0 nfailed=0 if [ "$(id -u)" -ne 0 ] then - echo "SKIP: Need root privileges" + echo "SKIP: Need root privileges." exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip ioam &>/dev/null -if [ $? = 1 ] -then - echo "SKIP: iproute2 too old, missing ioam command" + echo "SKIP: Could not run test without ip tool." exit $ksft_skip fi check_kernel_compatibility - -cleanup &>/dev/null setup run -cleanup &>/dev/null +cleanup + +if [ $nfailed != 0 ] +then + exit $ksft_fail +fi + +exit $ksft_pass diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 895e5bb5044b..de4b5c9e8a74 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,8 +8,10 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> +#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -40,7 +42,7 @@ static struct ioam_config node1 = { .egr_id = 101, .ingr_wide = 0xffffffff, /* default value */ .egr_wide = 101101, - .ns_data = 0xdeadbee0, + .ns_data = 0xdeadbeef, .ns_wide = 0xcafec0caf00dc0de, .sc_id = 777, .sc_data = "something that will be 4n-aligned", @@ -54,33 +56,22 @@ static struct ioam_config node2 = { .egr_id = 202, .ingr_wide = 201201, .egr_wide = 202202, - .ns_data = 0xdeadbee1, - .ns_wide = 0xcafec0caf11dc0de, - .sc_id = 666, - .sc_data = "Hello there -Obi", - .hlim = 63, -}; - -static struct ioam_config node3 = { - .id = 3, - .wide = 33333333, - .ingr_id = 301, - .egr_id = 0xffff, /* default value */ - .ingr_wide = 301301, - .egr_wide = 0xffffffff, /* default value */ - .ns_data = 0xdeadbee2, - .ns_wide = 0xcafec0caf22dc0de, + .ns_data = 0xffffffff, /* default value */ + .ns_wide = 0xffffffffffffffff, /* default value */ .sc_id = 0xffffff, /* default value */ .sc_data = NULL, - .hlim = 62, + .hlim = 63, }; enum { /********** * OUTPUT * **********/ + __TEST_OUT_MIN, + TEST_OUT_UNDEF_NS, TEST_OUT_NO_ROOM, + TEST_OUT_NO_ROOM_OSS, TEST_OUT_BIT0, TEST_OUT_BIT1, TEST_OUT_BIT2, @@ -94,13 +85,80 @@ enum { TEST_OUT_BIT10, TEST_OUT_BIT11, TEST_OUT_BIT22, + TEST_OUT_SIZE4, + TEST_OUT_SIZE8, + TEST_OUT_SIZE12, + TEST_OUT_SIZE16, + TEST_OUT_SIZE20, + TEST_OUT_SIZE24, + TEST_OUT_SIZE28, + TEST_OUT_SIZE32, + TEST_OUT_SIZE36, + TEST_OUT_SIZE40, + TEST_OUT_SIZE44, + TEST_OUT_SIZE48, + TEST_OUT_SIZE52, + TEST_OUT_SIZE56, + TEST_OUT_SIZE60, + TEST_OUT_SIZE64, + TEST_OUT_SIZE68, + TEST_OUT_SIZE72, + TEST_OUT_SIZE76, + TEST_OUT_SIZE80, + TEST_OUT_SIZE84, + TEST_OUT_SIZE88, + TEST_OUT_SIZE92, + TEST_OUT_SIZE96, + TEST_OUT_SIZE100, + TEST_OUT_SIZE104, + TEST_OUT_SIZE108, + TEST_OUT_SIZE112, + TEST_OUT_SIZE116, + TEST_OUT_SIZE120, + TEST_OUT_SIZE124, + TEST_OUT_SIZE128, + TEST_OUT_SIZE132, + TEST_OUT_SIZE136, + TEST_OUT_SIZE140, + TEST_OUT_SIZE144, + TEST_OUT_SIZE148, + TEST_OUT_SIZE152, + TEST_OUT_SIZE156, + TEST_OUT_SIZE160, + TEST_OUT_SIZE164, + TEST_OUT_SIZE168, + TEST_OUT_SIZE172, + TEST_OUT_SIZE176, + TEST_OUT_SIZE180, + TEST_OUT_SIZE184, + TEST_OUT_SIZE188, + TEST_OUT_SIZE192, + TEST_OUT_SIZE196, + TEST_OUT_SIZE200, + TEST_OUT_SIZE204, + TEST_OUT_SIZE208, + TEST_OUT_SIZE212, + TEST_OUT_SIZE216, + TEST_OUT_SIZE220, + TEST_OUT_SIZE224, + TEST_OUT_SIZE228, + TEST_OUT_SIZE232, + TEST_OUT_SIZE236, + TEST_OUT_SIZE240, + TEST_OUT_SIZE244, TEST_OUT_FULL_SUPP_TRACE, + __TEST_OUT_MAX, + /********* * INPUT * *********/ + __TEST_IN_MIN, + TEST_IN_UNDEF_NS, TEST_IN_NO_ROOM, + TEST_IN_NO_ROOM_OSS, + TEST_IN_DISABLED, TEST_IN_OFLAG, TEST_IN_BIT0, TEST_IN_BIT1, @@ -115,36 +173,107 @@ enum { TEST_IN_BIT10, TEST_IN_BIT11, TEST_IN_BIT22, + TEST_IN_SIZE4, + TEST_IN_SIZE8, + TEST_IN_SIZE12, + TEST_IN_SIZE16, + TEST_IN_SIZE20, + TEST_IN_SIZE24, + TEST_IN_SIZE28, + TEST_IN_SIZE32, + TEST_IN_SIZE36, + TEST_IN_SIZE40, + TEST_IN_SIZE44, + TEST_IN_SIZE48, + TEST_IN_SIZE52, + TEST_IN_SIZE56, + TEST_IN_SIZE60, + TEST_IN_SIZE64, + TEST_IN_SIZE68, + TEST_IN_SIZE72, + TEST_IN_SIZE76, + TEST_IN_SIZE80, + TEST_IN_SIZE84, + TEST_IN_SIZE88, + TEST_IN_SIZE92, + TEST_IN_SIZE96, + TEST_IN_SIZE100, + TEST_IN_SIZE104, + TEST_IN_SIZE108, + TEST_IN_SIZE112, + TEST_IN_SIZE116, + TEST_IN_SIZE120, + TEST_IN_SIZE124, + TEST_IN_SIZE128, + TEST_IN_SIZE132, + TEST_IN_SIZE136, + TEST_IN_SIZE140, + TEST_IN_SIZE144, + TEST_IN_SIZE148, + TEST_IN_SIZE152, + TEST_IN_SIZE156, + TEST_IN_SIZE160, + TEST_IN_SIZE164, + TEST_IN_SIZE168, + TEST_IN_SIZE172, + TEST_IN_SIZE176, + TEST_IN_SIZE180, + TEST_IN_SIZE184, + TEST_IN_SIZE188, + TEST_IN_SIZE192, + TEST_IN_SIZE196, + TEST_IN_SIZE200, + TEST_IN_SIZE204, + TEST_IN_SIZE208, + TEST_IN_SIZE212, + TEST_IN_SIZE216, + TEST_IN_SIZE220, + TEST_IN_SIZE224, + TEST_IN_SIZE228, + TEST_IN_SIZE232, + TEST_IN_SIZE236, + TEST_IN_SIZE240, + TEST_IN_SIZE244, TEST_IN_FULL_SUPP_TRACE, - /********** - * GLOBAL * - **********/ - TEST_FWD_FULL_SUPP_TRACE, + __TEST_IN_MAX, __TEST_MAX, }; -static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_header(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || - __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + if (__be16_to_cpu(trace->namespace_id) != ioam_ns || + __be32_to_cpu(trace->type_be32) != (trace_type << 8)) return 1; switch (tid) { case TEST_OUT_UNDEF_NS: case TEST_IN_UNDEF_NS: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen != 1; + case TEST_IN_DISABLED: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 1; case TEST_OUT_NO_ROOM: case TEST_IN_NO_ROOM: case TEST_IN_OFLAG: - return !ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen != 1; + return trace->overflow == 0 || + trace->nodelen != 2 || + trace->remlen != 1; + + case TEST_OUT_NO_ROOM_OSS: + return trace->overflow == 0 || + trace->nodelen != 0 || + trace->remlen != 1; + + case TEST_IN_NO_ROOM_OSS: + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return trace->overflow == 1 || + trace->nodelen != 0 || + trace->remlen != 0; case TEST_OUT_BIT0: case TEST_IN_BIT0: @@ -164,9 +293,9 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT7: case TEST_OUT_BIT11: case TEST_IN_BIT11: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 0; case TEST_OUT_BIT8: case TEST_IN_BIT8: @@ -174,22 +303,145 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT9: case TEST_OUT_BIT10: case TEST_IN_BIT10: - return ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen; - - case TEST_OUT_BIT22: - case TEST_IN_BIT22: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 2 || + trace->remlen != 0; + + case TEST_OUT_SIZE4: + case TEST_OUT_SIZE8: + case TEST_OUT_SIZE12: + case TEST_OUT_SIZE16: + case TEST_OUT_SIZE20: + case TEST_OUT_SIZE24: + case TEST_OUT_SIZE28: + case TEST_OUT_SIZE32: + case TEST_OUT_SIZE36: + case TEST_OUT_SIZE40: + case TEST_OUT_SIZE44: + case TEST_OUT_SIZE48: + case TEST_OUT_SIZE52: + case TEST_OUT_SIZE56: + case TEST_OUT_SIZE60: + case TEST_OUT_SIZE64: + case TEST_OUT_SIZE68: + case TEST_OUT_SIZE72: + case TEST_OUT_SIZE76: + case TEST_OUT_SIZE80: + case TEST_OUT_SIZE84: + case TEST_OUT_SIZE88: + case TEST_OUT_SIZE92: + case TEST_OUT_SIZE96: + case TEST_OUT_SIZE100: + case TEST_OUT_SIZE104: + case TEST_OUT_SIZE108: + case TEST_OUT_SIZE112: + case TEST_OUT_SIZE116: + case TEST_OUT_SIZE120: + case TEST_OUT_SIZE124: + case TEST_OUT_SIZE128: + case TEST_OUT_SIZE132: + case TEST_OUT_SIZE136: + case TEST_OUT_SIZE140: + case TEST_OUT_SIZE144: + case TEST_OUT_SIZE148: + case TEST_OUT_SIZE152: + case TEST_OUT_SIZE156: + case TEST_OUT_SIZE160: + case TEST_OUT_SIZE164: + case TEST_OUT_SIZE168: + case TEST_OUT_SIZE172: + case TEST_OUT_SIZE176: + case TEST_OUT_SIZE180: + case TEST_OUT_SIZE184: + case TEST_OUT_SIZE188: + case TEST_OUT_SIZE192: + case TEST_OUT_SIZE196: + case TEST_OUT_SIZE200: + case TEST_OUT_SIZE204: + case TEST_OUT_SIZE208: + case TEST_OUT_SIZE212: + case TEST_OUT_SIZE216: + case TEST_OUT_SIZE220: + case TEST_OUT_SIZE224: + case TEST_OUT_SIZE228: + case TEST_OUT_SIZE232: + case TEST_OUT_SIZE236: + case TEST_OUT_SIZE240: + case TEST_OUT_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != trace_size / 4; + + case TEST_IN_SIZE4: + case TEST_IN_SIZE8: + case TEST_IN_SIZE12: + case TEST_IN_SIZE16: + case TEST_IN_SIZE20: + case TEST_IN_SIZE24: + case TEST_IN_SIZE28: + case TEST_IN_SIZE32: + case TEST_IN_SIZE36: + case TEST_IN_SIZE40: + case TEST_IN_SIZE44: + case TEST_IN_SIZE48: + case TEST_IN_SIZE52: + case TEST_IN_SIZE56: + case TEST_IN_SIZE60: + case TEST_IN_SIZE64: + case TEST_IN_SIZE68: + case TEST_IN_SIZE72: + case TEST_IN_SIZE76: + case TEST_IN_SIZE80: + case TEST_IN_SIZE84: + case TEST_IN_SIZE88: + case TEST_IN_SIZE92: + case TEST_IN_SIZE96: + case TEST_IN_SIZE100: + case TEST_IN_SIZE104: + case TEST_IN_SIZE108: + case TEST_IN_SIZE112: + case TEST_IN_SIZE116: + case TEST_IN_SIZE120: + case TEST_IN_SIZE124: + case TEST_IN_SIZE128: + case TEST_IN_SIZE132: + case TEST_IN_SIZE136: + case TEST_IN_SIZE140: + case TEST_IN_SIZE144: + case TEST_IN_SIZE148: + case TEST_IN_SIZE152: + case TEST_IN_SIZE156: + case TEST_IN_SIZE160: + case TEST_IN_SIZE164: + case TEST_IN_SIZE168: + case TEST_IN_SIZE172: + case TEST_IN_SIZE176: + case TEST_IN_SIZE180: + case TEST_IN_SIZE184: + case TEST_IN_SIZE188: + case TEST_IN_SIZE192: + case TEST_IN_SIZE196: + case TEST_IN_SIZE200: + case TEST_IN_SIZE204: + case TEST_IN_SIZE208: + case TEST_IN_SIZE212: + case TEST_IN_SIZE216: + case TEST_IN_SIZE220: + case TEST_IN_SIZE224: + case TEST_IN_SIZE228: + case TEST_IN_SIZE232: + case TEST_IN_SIZE236: + case TEST_IN_SIZE240: + case TEST_IN_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != (trace_size / 4) - trace->nodelen; case TEST_OUT_FULL_SUPP_TRACE: case TEST_IN_FULL_SUPP_TRACE: - case TEST_FWD_FULL_SUPP_TRACE: - return ioam6h->overflow || - ioam6h->nodelen != 15 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 15 || + trace->remlen != 0; default: break; @@ -198,167 +450,137 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, return 1; } -static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, - const struct ioam_config cnf) +static int check_data(struct ioam6_trace_hdr *trace, __u8 trace_size, + const struct ioam_config cnf, bool is_output) { - unsigned int len; + unsigned int len, i; __u8 aligned; __u64 raw64; __u32 raw32; + __u8 *p; - if (ioam6h->type.bit0) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit1) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.ingr_id != (raw32 >> 16) || - cnf.egr_id != (raw32 & 0xffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit2) - *p += sizeof(__u32); - - if (ioam6h->type.bit3) - *p += sizeof(__u32); - - if (ioam6h->type.bit4) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit5) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit6) - *p += sizeof(__u32); + if (trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21 | trace->type.bit23) + return 1; - if (ioam6h->type.bit7) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + for (i = 0; i < trace->remlen * 4; i++) { + if (trace->data[i] != 0) return 1; - *p += sizeof(__u32); } - if (ioam6h->type.bit8) { - raw64 = __be64_to_cpu(*((__u64 *)*p)); - if (cnf.hlim != (raw64 >> 56) || - cnf.wide != (raw64 & 0xffffffffffffff)) - return 1; - *p += sizeof(__u64); - } + if (trace->remlen * 4 == trace_size) + return 0; - if (ioam6h->type.bit9) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) - return 1; - *p += sizeof(__u32); + p = trace->data + trace->remlen * 4; - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit10) { - if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) return 1; - *p += sizeof(__u64); + p += sizeof(__u32); } - if (ioam6h->type.bit11) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit2) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit12) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit3) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit13) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit14) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ns_data) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit15) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)p)) == 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit16) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit17) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)p)); + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit18) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ingr_wide) return 1; - *p += sizeof(__u32); - } + p += sizeof(__u32); - if (ioam6h->type.bit19) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (__be32_to_cpu(*((__u32 *)p)) != cnf.egr_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit20) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)p)) != cnf.ns_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit21) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit22) { + if (trace->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; - raw32 = __be32_to_cpu(*((__u32 *)*p)); + raw32 = __be32_to_cpu(*((__u32 *)p)); if (aligned != (raw32 >> 24) * 4 || cnf.sc_id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); if (cnf.sc_data) { - if (strncmp((char *)*p, cnf.sc_data, len)) + if (strncmp((char *)p, cnf.sc_data, len)) return 1; - *p += len; + p += len; aligned -= len; while (aligned--) { - if (**p != '\0') + if (*p != '\0') return 1; - *p += sizeof(__u8); + p += sizeof(__u8); } } } @@ -366,151 +588,351 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, return 0; } -static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_ioam_trace(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - __u8 *p; - - if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) + if (check_header(tid, trace, trace_type, trace_size, ioam_ns)) return 1; - p = ioam6h->data + ioam6h->remlen * 4; - - switch (tid) { - case TEST_OUT_BIT0: - case TEST_OUT_BIT1: - case TEST_OUT_BIT2: - case TEST_OUT_BIT3: - case TEST_OUT_BIT4: - case TEST_OUT_BIT5: - case TEST_OUT_BIT6: - case TEST_OUT_BIT7: - case TEST_OUT_BIT8: - case TEST_OUT_BIT9: - case TEST_OUT_BIT10: - case TEST_OUT_BIT11: - case TEST_OUT_BIT22: - case TEST_OUT_FULL_SUPP_TRACE: - return check_ioam6_data(&p, ioam6h, node1); - - case TEST_IN_BIT0: - case TEST_IN_BIT1: - case TEST_IN_BIT2: - case TEST_IN_BIT3: - case TEST_IN_BIT4: - case TEST_IN_BIT5: - case TEST_IN_BIT6: - case TEST_IN_BIT7: - case TEST_IN_BIT8: - case TEST_IN_BIT9: - case TEST_IN_BIT10: - case TEST_IN_BIT11: - case TEST_IN_BIT22: - case TEST_IN_FULL_SUPP_TRACE: - { - __u32 tmp32 = node2.egr_wide; - __u16 tmp16 = node2.egr_id; - int res; - - node2.egr_id = 0xffff; - node2.egr_wide = 0xffffffff; + if (tid > __TEST_OUT_MIN && tid < __TEST_OUT_MAX) + return check_data(trace, trace_size, node1, true); - res = check_ioam6_data(&p, ioam6h, node2); - - node2.egr_id = tmp16; - node2.egr_wide = tmp32; - - return res; - } - - case TEST_FWD_FULL_SUPP_TRACE: - if (check_ioam6_data(&p, ioam6h, node3)) - return 1; - if (check_ioam6_data(&p, ioam6h, node2)) - return 1; - return check_ioam6_data(&p, ioam6h, node1); - - default: - break; - } + if (tid > __TEST_IN_MIN && tid < __TEST_IN_MAX) + return check_data(trace, trace_size, node2, false); return 1; } static int str2id(const char *tname) { - if (!strcmp("out_undef_ns", tname)) + if (!strcmp("output_undef_ns", tname)) return TEST_OUT_UNDEF_NS; - if (!strcmp("out_no_room", tname)) + if (!strcmp("output_no_room", tname)) return TEST_OUT_NO_ROOM; - if (!strcmp("out_bit0", tname)) + if (!strcmp("output_no_room_oss", tname)) + return TEST_OUT_NO_ROOM_OSS; + if (!strcmp("output_bit0", tname)) return TEST_OUT_BIT0; - if (!strcmp("out_bit1", tname)) + if (!strcmp("output_bit1", tname)) return TEST_OUT_BIT1; - if (!strcmp("out_bit2", tname)) + if (!strcmp("output_bit2", tname)) return TEST_OUT_BIT2; - if (!strcmp("out_bit3", tname)) + if (!strcmp("output_bit3", tname)) return TEST_OUT_BIT3; - if (!strcmp("out_bit4", tname)) + if (!strcmp("output_bit4", tname)) return TEST_OUT_BIT4; - if (!strcmp("out_bit5", tname)) + if (!strcmp("output_bit5", tname)) return TEST_OUT_BIT5; - if (!strcmp("out_bit6", tname)) + if (!strcmp("output_bit6", tname)) return TEST_OUT_BIT6; - if (!strcmp("out_bit7", tname)) + if (!strcmp("output_bit7", tname)) return TEST_OUT_BIT7; - if (!strcmp("out_bit8", tname)) + if (!strcmp("output_bit8", tname)) return TEST_OUT_BIT8; - if (!strcmp("out_bit9", tname)) + if (!strcmp("output_bit9", tname)) return TEST_OUT_BIT9; - if (!strcmp("out_bit10", tname)) + if (!strcmp("output_bit10", tname)) return TEST_OUT_BIT10; - if (!strcmp("out_bit11", tname)) + if (!strcmp("output_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit22", tname)) + if (!strcmp("output_bit22", tname)) return TEST_OUT_BIT22; - if (!strcmp("out_full_supp_trace", tname)) + if (!strcmp("output_size4", tname)) + return TEST_OUT_SIZE4; + if (!strcmp("output_size8", tname)) + return TEST_OUT_SIZE8; + if (!strcmp("output_size12", tname)) + return TEST_OUT_SIZE12; + if (!strcmp("output_size16", tname)) + return TEST_OUT_SIZE16; + if (!strcmp("output_size20", tname)) + return TEST_OUT_SIZE20; + if (!strcmp("output_size24", tname)) + return TEST_OUT_SIZE24; + if (!strcmp("output_size28", tname)) + return TEST_OUT_SIZE28; + if (!strcmp("output_size32", tname)) + return TEST_OUT_SIZE32; + if (!strcmp("output_size36", tname)) + return TEST_OUT_SIZE36; + if (!strcmp("output_size40", tname)) + return TEST_OUT_SIZE40; + if (!strcmp("output_size44", tname)) + return TEST_OUT_SIZE44; + if (!strcmp("output_size48", tname)) + return TEST_OUT_SIZE48; + if (!strcmp("output_size52", tname)) + return TEST_OUT_SIZE52; + if (!strcmp("output_size56", tname)) + return TEST_OUT_SIZE56; + if (!strcmp("output_size60", tname)) + return TEST_OUT_SIZE60; + if (!strcmp("output_size64", tname)) + return TEST_OUT_SIZE64; + if (!strcmp("output_size68", tname)) + return TEST_OUT_SIZE68; + if (!strcmp("output_size72", tname)) + return TEST_OUT_SIZE72; + if (!strcmp("output_size76", tname)) + return TEST_OUT_SIZE76; + if (!strcmp("output_size80", tname)) + return TEST_OUT_SIZE80; + if (!strcmp("output_size84", tname)) + return TEST_OUT_SIZE84; + if (!strcmp("output_size88", tname)) + return TEST_OUT_SIZE88; + if (!strcmp("output_size92", tname)) + return TEST_OUT_SIZE92; + if (!strcmp("output_size96", tname)) + return TEST_OUT_SIZE96; + if (!strcmp("output_size100", tname)) + return TEST_OUT_SIZE100; + if (!strcmp("output_size104", tname)) + return TEST_OUT_SIZE104; + if (!strcmp("output_size108", tname)) + return TEST_OUT_SIZE108; + if (!strcmp("output_size112", tname)) + return TEST_OUT_SIZE112; + if (!strcmp("output_size116", tname)) + return TEST_OUT_SIZE116; + if (!strcmp("output_size120", tname)) + return TEST_OUT_SIZE120; + if (!strcmp("output_size124", tname)) + return TEST_OUT_SIZE124; + if (!strcmp("output_size128", tname)) + return TEST_OUT_SIZE128; + if (!strcmp("output_size132", tname)) + return TEST_OUT_SIZE132; + if (!strcmp("output_size136", tname)) + return TEST_OUT_SIZE136; + if (!strcmp("output_size140", tname)) + return TEST_OUT_SIZE140; + if (!strcmp("output_size144", tname)) + return TEST_OUT_SIZE144; + if (!strcmp("output_size148", tname)) + return TEST_OUT_SIZE148; + if (!strcmp("output_size152", tname)) + return TEST_OUT_SIZE152; + if (!strcmp("output_size156", tname)) + return TEST_OUT_SIZE156; + if (!strcmp("output_size160", tname)) + return TEST_OUT_SIZE160; + if (!strcmp("output_size164", tname)) + return TEST_OUT_SIZE164; + if (!strcmp("output_size168", tname)) + return TEST_OUT_SIZE168; + if (!strcmp("output_size172", tname)) + return TEST_OUT_SIZE172; + if (!strcmp("output_size176", tname)) + return TEST_OUT_SIZE176; + if (!strcmp("output_size180", tname)) + return TEST_OUT_SIZE180; + if (!strcmp("output_size184", tname)) + return TEST_OUT_SIZE184; + if (!strcmp("output_size188", tname)) + return TEST_OUT_SIZE188; + if (!strcmp("output_size192", tname)) + return TEST_OUT_SIZE192; + if (!strcmp("output_size196", tname)) + return TEST_OUT_SIZE196; + if (!strcmp("output_size200", tname)) + return TEST_OUT_SIZE200; + if (!strcmp("output_size204", tname)) + return TEST_OUT_SIZE204; + if (!strcmp("output_size208", tname)) + return TEST_OUT_SIZE208; + if (!strcmp("output_size212", tname)) + return TEST_OUT_SIZE212; + if (!strcmp("output_size216", tname)) + return TEST_OUT_SIZE216; + if (!strcmp("output_size220", tname)) + return TEST_OUT_SIZE220; + if (!strcmp("output_size224", tname)) + return TEST_OUT_SIZE224; + if (!strcmp("output_size228", tname)) + return TEST_OUT_SIZE228; + if (!strcmp("output_size232", tname)) + return TEST_OUT_SIZE232; + if (!strcmp("output_size236", tname)) + return TEST_OUT_SIZE236; + if (!strcmp("output_size240", tname)) + return TEST_OUT_SIZE240; + if (!strcmp("output_size244", tname)) + return TEST_OUT_SIZE244; + if (!strcmp("output_full_supp_trace", tname)) return TEST_OUT_FULL_SUPP_TRACE; - if (!strcmp("in_undef_ns", tname)) + if (!strcmp("input_undef_ns", tname)) return TEST_IN_UNDEF_NS; - if (!strcmp("in_no_room", tname)) + if (!strcmp("input_no_room", tname)) return TEST_IN_NO_ROOM; - if (!strcmp("in_oflag", tname)) + if (!strcmp("input_no_room_oss", tname)) + return TEST_IN_NO_ROOM_OSS; + if (!strcmp("input_disabled", tname)) + return TEST_IN_DISABLED; + if (!strcmp("input_oflag", tname)) return TEST_IN_OFLAG; - if (!strcmp("in_bit0", tname)) + if (!strcmp("input_bit0", tname)) return TEST_IN_BIT0; - if (!strcmp("in_bit1", tname)) + if (!strcmp("input_bit1", tname)) return TEST_IN_BIT1; - if (!strcmp("in_bit2", tname)) + if (!strcmp("input_bit2", tname)) return TEST_IN_BIT2; - if (!strcmp("in_bit3", tname)) + if (!strcmp("input_bit3", tname)) return TEST_IN_BIT3; - if (!strcmp("in_bit4", tname)) + if (!strcmp("input_bit4", tname)) return TEST_IN_BIT4; - if (!strcmp("in_bit5", tname)) + if (!strcmp("input_bit5", tname)) return TEST_IN_BIT5; - if (!strcmp("in_bit6", tname)) + if (!strcmp("input_bit6", tname)) return TEST_IN_BIT6; - if (!strcmp("in_bit7", tname)) + if (!strcmp("input_bit7", tname)) return TEST_IN_BIT7; - if (!strcmp("in_bit8", tname)) + if (!strcmp("input_bit8", tname)) return TEST_IN_BIT8; - if (!strcmp("in_bit9", tname)) + if (!strcmp("input_bit9", tname)) return TEST_IN_BIT9; - if (!strcmp("in_bit10", tname)) + if (!strcmp("input_bit10", tname)) return TEST_IN_BIT10; - if (!strcmp("in_bit11", tname)) + if (!strcmp("input_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit22", tname)) + if (!strcmp("input_bit22", tname)) return TEST_IN_BIT22; - if (!strcmp("in_full_supp_trace", tname)) + if (!strcmp("input_size4", tname)) + return TEST_IN_SIZE4; + if (!strcmp("input_size8", tname)) + return TEST_IN_SIZE8; + if (!strcmp("input_size12", tname)) + return TEST_IN_SIZE12; + if (!strcmp("input_size16", tname)) + return TEST_IN_SIZE16; + if (!strcmp("input_size20", tname)) + return TEST_IN_SIZE20; + if (!strcmp("input_size24", tname)) + return TEST_IN_SIZE24; + if (!strcmp("input_size28", tname)) + return TEST_IN_SIZE28; + if (!strcmp("input_size32", tname)) + return TEST_IN_SIZE32; + if (!strcmp("input_size36", tname)) + return TEST_IN_SIZE36; + if (!strcmp("input_size40", tname)) + return TEST_IN_SIZE40; + if (!strcmp("input_size44", tname)) + return TEST_IN_SIZE44; + if (!strcmp("input_size48", tname)) + return TEST_IN_SIZE48; + if (!strcmp("input_size52", tname)) + return TEST_IN_SIZE52; + if (!strcmp("input_size56", tname)) + return TEST_IN_SIZE56; + if (!strcmp("input_size60", tname)) + return TEST_IN_SIZE60; + if (!strcmp("input_size64", tname)) + return TEST_IN_SIZE64; + if (!strcmp("input_size68", tname)) + return TEST_IN_SIZE68; + if (!strcmp("input_size72", tname)) + return TEST_IN_SIZE72; + if (!strcmp("input_size76", tname)) + return TEST_IN_SIZE76; + if (!strcmp("input_size80", tname)) + return TEST_IN_SIZE80; + if (!strcmp("input_size84", tname)) + return TEST_IN_SIZE84; + if (!strcmp("input_size88", tname)) + return TEST_IN_SIZE88; + if (!strcmp("input_size92", tname)) + return TEST_IN_SIZE92; + if (!strcmp("input_size96", tname)) + return TEST_IN_SIZE96; + if (!strcmp("input_size100", tname)) + return TEST_IN_SIZE100; + if (!strcmp("input_size104", tname)) + return TEST_IN_SIZE104; + if (!strcmp("input_size108", tname)) + return TEST_IN_SIZE108; + if (!strcmp("input_size112", tname)) + return TEST_IN_SIZE112; + if (!strcmp("input_size116", tname)) + return TEST_IN_SIZE116; + if (!strcmp("input_size120", tname)) + return TEST_IN_SIZE120; + if (!strcmp("input_size124", tname)) + return TEST_IN_SIZE124; + if (!strcmp("input_size128", tname)) + return TEST_IN_SIZE128; + if (!strcmp("input_size132", tname)) + return TEST_IN_SIZE132; + if (!strcmp("input_size136", tname)) + return TEST_IN_SIZE136; + if (!strcmp("input_size140", tname)) + return TEST_IN_SIZE140; + if (!strcmp("input_size144", tname)) + return TEST_IN_SIZE144; + if (!strcmp("input_size148", tname)) + return TEST_IN_SIZE148; + if (!strcmp("input_size152", tname)) + return TEST_IN_SIZE152; + if (!strcmp("input_size156", tname)) + return TEST_IN_SIZE156; + if (!strcmp("input_size160", tname)) + return TEST_IN_SIZE160; + if (!strcmp("input_size164", tname)) + return TEST_IN_SIZE164; + if (!strcmp("input_size168", tname)) + return TEST_IN_SIZE168; + if (!strcmp("input_size172", tname)) + return TEST_IN_SIZE172; + if (!strcmp("input_size176", tname)) + return TEST_IN_SIZE176; + if (!strcmp("input_size180", tname)) + return TEST_IN_SIZE180; + if (!strcmp("input_size184", tname)) + return TEST_IN_SIZE184; + if (!strcmp("input_size188", tname)) + return TEST_IN_SIZE188; + if (!strcmp("input_size192", tname)) + return TEST_IN_SIZE192; + if (!strcmp("input_size196", tname)) + return TEST_IN_SIZE196; + if (!strcmp("input_size200", tname)) + return TEST_IN_SIZE200; + if (!strcmp("input_size204", tname)) + return TEST_IN_SIZE204; + if (!strcmp("input_size208", tname)) + return TEST_IN_SIZE208; + if (!strcmp("input_size212", tname)) + return TEST_IN_SIZE212; + if (!strcmp("input_size216", tname)) + return TEST_IN_SIZE216; + if (!strcmp("input_size220", tname)) + return TEST_IN_SIZE220; + if (!strcmp("input_size224", tname)) + return TEST_IN_SIZE224; + if (!strcmp("input_size228", tname)) + return TEST_IN_SIZE228; + if (!strcmp("input_size232", tname)) + return TEST_IN_SIZE232; + if (!strcmp("input_size236", tname)) + return TEST_IN_SIZE236; + if (!strcmp("input_size240", tname)) + return TEST_IN_SIZE240; + if (!strcmp("input_size244", tname)) + return TEST_IN_SIZE244; + if (!strcmp("input_full_supp_trace", tname)) return TEST_IN_FULL_SUPP_TRACE; - if (!strcmp("fwd_full_supp_trace", tname)) - return TEST_FWD_FULL_SUPP_TRACE; return -1; } +static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +} + static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -555,119 +977,124 @@ static int get_u16(__u16 *val, const char *arg, int base) return 0; } -static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { - [TEST_OUT_UNDEF_NS] = check_ioam_header, - [TEST_OUT_NO_ROOM] = check_ioam_header, - [TEST_OUT_BIT0] = check_ioam_header_and_data, - [TEST_OUT_BIT1] = check_ioam_header_and_data, - [TEST_OUT_BIT2] = check_ioam_header_and_data, - [TEST_OUT_BIT3] = check_ioam_header_and_data, - [TEST_OUT_BIT4] = check_ioam_header_and_data, - [TEST_OUT_BIT5] = check_ioam_header_and_data, - [TEST_OUT_BIT6] = check_ioam_header_and_data, - [TEST_OUT_BIT7] = check_ioam_header_and_data, - [TEST_OUT_BIT8] = check_ioam_header_and_data, - [TEST_OUT_BIT9] = check_ioam_header_and_data, - [TEST_OUT_BIT10] = check_ioam_header_and_data, - [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT22] = check_ioam_header_and_data, - [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_IN_UNDEF_NS] = check_ioam_header, - [TEST_IN_NO_ROOM] = check_ioam_header, - [TEST_IN_OFLAG] = check_ioam_header, - [TEST_IN_BIT0] = check_ioam_header_and_data, - [TEST_IN_BIT1] = check_ioam_header_and_data, - [TEST_IN_BIT2] = check_ioam_header_and_data, - [TEST_IN_BIT3] = check_ioam_header_and_data, - [TEST_IN_BIT4] = check_ioam_header_and_data, - [TEST_IN_BIT5] = check_ioam_header_and_data, - [TEST_IN_BIT6] = check_ioam_header_and_data, - [TEST_IN_BIT7] = check_ioam_header_and_data, - [TEST_IN_BIT8] = check_ioam_header_and_data, - [TEST_IN_BIT9] = check_ioam_header_and_data, - [TEST_IN_BIT10] = check_ioam_header_and_data, - [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT22] = check_ioam_header_and_data, - [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, -}; +static int get_u8(__u8 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFUL) + return -1; + + *val = res; + return 0; +} int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1, on = 1; - struct ioam6_hdr *opt; - struct cmsghdr *cmsg; - struct msghdr msg; - struct iovec iov; - __u8 buffer[512]; + __u8 buffer[512], *ptr, nexthdr, tr_size; + struct ioam6_trace_hdr *trace; + unsigned int hoplen, ret = 1; + struct ipv6_hopopt_hdr *hbh; + int fd, size, testname_id; + struct in6_addr src, dst; + struct ioam6_hdr *ioam6; + struct timeval timeout; + struct ipv6hdr *ipv6; __u32 tr_type; __u16 ioam_ns; - __u8 *ptr; - if (argc != 5) + if (argc != 9) goto out; - tid = str2id(argv[1]); - if (tid < 0 || !func[tid]) - goto out; + testname_id = str2id(argv[2]); - if (get_u32(&tr_type, argv[2], 16) || - get_u16(&ioam_ns, argv[3], 0)) + if (testname_id < 0 || + inet_pton(AF_INET6, argv[3], &src) != 1 || + inet_pton(AF_INET6, argv[4], &dst) != 1 || + get_u32(&tr_type, argv[5], 16) || + get_u8(&tr_size, argv[6], 0) || + get_u16(&ioam_ns, argv[7], 0)) goto out; - fd = socket(PF_INET6, SOCK_RAW, - !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + nexthdr = (!strcmp(argv[8], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + + hoplen = sizeof(*hbh); + hoplen += 2; // 2-byte padding for alignment + hoplen += sizeof(*ioam6); // IOAM option header + hoplen += sizeof(*trace); // IOAM trace header + hoplen += tr_size; // IOAM trace size + hoplen += (tr_size % 8); // optional padding + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); if (fd < 0) goto out; - setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + argv[1], strlen(argv[1]))) + goto close; - iov.iov_len = 1; - iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); - if (!iov.iov_base) + timeout.tv_sec = 1; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, + (const char *)&timeout, sizeof(timeout))) goto close; recv: - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = buffer; - msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); - - size = recvmsg(fd, &msg, 0); + size = recv(fd, buffer, sizeof(buffer), 0); if (size <= 0) goto close; - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != IPPROTO_IPV6 || - cmsg->cmsg_type != IPV6_HOPOPTS || - cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) - continue; + ipv6 = (struct ipv6hdr *)buffer; + + /* Skip packets that do not have the expected src/dst address or that + * do not have a Hop-by-hop. + */ + if (!ipv6_addr_equal(&ipv6->saddr, &src) || + !ipv6_addr_equal(&ipv6->daddr, &dst) || + ipv6->nexthdr != IPPROTO_HOPOPTS) + goto recv; + + /* Check Hbh's Next Header and Size. */ + hbh = (struct ipv6_hopopt_hdr *)(buffer + sizeof(*ipv6)); + if (hbh->nexthdr != nexthdr || hbh->hdrlen != (hoplen >> 3) - 1) + goto close; - ptr = (__u8 *)CMSG_DATA(cmsg); + /* Check we have a 2-byte padding for alignment. */ + ptr = (__u8 *)hbh + sizeof(*hbh); + if (ptr[0] != IPV6_TLV_PADN && ptr[1] != 0) + goto close; - hoplen = (ptr[1] + 1) << 3; - ptr += sizeof(struct ipv6_hopopt_hdr); + /* Check we now have the IOAM option. */ + ptr += 2; + if (ptr[0] != IPV6_TLV_IOAM) + goto close; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)ptr; + /* Check its size and the IOAM option type. */ + ioam6 = (struct ioam6_hdr *)ptr; + if (ioam6->opt_len != sizeof(*ioam6) - 2 + sizeof(*trace) + tr_size || + ioam6->type != IOAM6_TYPE_PREALLOC) + goto close; - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - ptr += sizeof(*opt); - ret = func[tid](tid, - (struct ioam6_trace_hdr *)ptr, - tr_type, ioam_ns); - goto close; - } + trace = (struct ioam6_trace_hdr *)(ptr + sizeof(*ioam6)); - ptr += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; - } - } + /* Check the trailing 4-byte padding (potentially). */ + ptr = (__u8 *)trace + sizeof(*trace) + tr_size; + if (tr_size % 8 && ptr[0] != IPV6_TLV_PADN && ptr[1] != 2 && + ptr[2] != 0 && ptr[3] != 0) + goto close; - goto recv; + /* Check the IOAM header and data. */ + ret = check_ioam_trace(testname_id, trace, tr_type, tr_size, ioam_ns); close: - free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 000000000000..a6b2b1f9c641 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh +source net_helper.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index be8707bfb46e..8994fec1c38f 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -1,11 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +net_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") +source "$net_dir/lib/sh/defer.sh" + ############################################################################## # Defines : "${WAIT_TIMEOUT:=20}" +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms # Kselftest framework constants. @@ -17,6 +23,11 @@ ksft_skip=4 # namespace list created by setup_ns NS_LIST=() +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + ############################################################################## # Helpers @@ -233,3 +244,218 @@ tc_rule_handle_stats_get() | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1; shift + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + in_defer_scope \ + $current_test + done +} + +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + +check_err() +{ + local err=$1 + local msg=$2 + + if ((err)); then + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + check_err $((!err)) "$msg" +} + +check_err_fail() +{ + local should_fail=$1; shift + local err=$1; shift + local what=$1; shift + + if ((should_fail)); then + check_fail $err "$what succeeded, but should have failed" + else + check_err $err "$what failed" + fi +} + +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +kill_process() +{ + local pid=$1; shift + + # Suppress noise from killing the process. + { kill $pid && wait $pid; } 2>/dev/null +} + +ip_link_add() +{ + local name=$1; shift + + ip link add name "$name" "$@" + defer ip link del dev "$name" +} + +ip_link_master() +{ + local member=$1; shift + local master=$1; shift + + ip link set dev "$member" master "$master" + defer ip link set dev "$member" nomaster +} diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 82c3264b115e..18b9443454a9 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -10,6 +10,6 @@ TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum -TEST_INCLUDES := $(wildcard py/*.py) +TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index e0a34e5e8dd5..27437590eeb5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -675,22 +675,20 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; - uint16_t ip_len; + uint16_t payload_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; - ip_len = ntohs(ip6h->payload_len); - if (ip_len > len - sizeof(*ip6h)) + payload_len = ntohs(ip6h->payload_len); + if (payload_len > len - sizeof(*ip6h)) return -1; - len = ip_len; iph_addr_p = &ip6h->saddr; - if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len); + return recv_verify_packet_tcp(ip6h + 1, payload_len); else - return recv_verify_packet_udp(ip6h + 1, len); + return recv_verify_packet_udp(ip6h + 1, payload_len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index b6d498d125fe..54d8f5eba810 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,3 +6,4 @@ from .netns import NetNS from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ynl import NetshaperFamily diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index f571a8b3139b..1a8cbe9acc48 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json import os import random diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 1ace58370c06..a0d689d58c57 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -47,3 +47,8 @@ class NetdevFamily(YnlFamily): def __init__(self): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), schema='') + +class NetshaperFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), + schema='') diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh new file mode 100644 index 000000000000..082f5d38321b --- /dev/null +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# map[(scope_id,track,cleanup_id) -> cleanup_command] +# track={d=default | p=priority} +declare -A __DEFER__JOBS + +# map[(scope_id,track) -> # cleanup_commands] +declare -A __DEFER__NJOBS + +# scope_id of the topmost scope. +__DEFER__SCOPE_ID=0 + +__defer__ndefer_key() +{ + local track=$1; shift + + echo $__DEFER__SCOPE_ID,$track +} + +__defer__defer_key() +{ + local track=$1; shift + local defer_ix=$1; shift + + echo $__DEFER__SCOPE_ID,$track,$defer_ix +} + +__defer__ndefers() +{ + local track=$1; shift + + echo ${__DEFER__NJOBS[$(__defer__ndefer_key $track)]} +} + +__defer__run() +{ + local track=$1; shift + local defer_ix=$1; shift + local defer_key=$(__defer__defer_key $track $defer_ix) + + ${__DEFER__JOBS[$defer_key]} + unset __DEFER__JOBS[$defer_key] +} + +__defer__schedule() +{ + local track=$1; shift + local ndefers=$(__defer__ndefers $track) + local ndefers_key=$(__defer__ndefer_key $track) + local defer_key=$(__defer__defer_key $track $ndefers) + local defer="$@" + + __DEFER__JOBS[$defer_key]="$defer" + __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) +} + +__defer__scope_wipe() +{ + __DEFER__NJOBS[$(__defer__ndefer_key d)]=0 + __DEFER__NJOBS[$(__defer__ndefer_key p)]=0 +} + +defer_scope_push() +{ + ((__DEFER__SCOPE_ID++)) + __defer__scope_wipe +} + +defer_scope_pop() +{ + local defer_ix + + for ((defer_ix=$(__defer__ndefers p); defer_ix-->0; )); do + __defer__run p $defer_ix + done + + for ((defer_ix=$(__defer__ndefers d); defer_ix-->0; )); do + __defer__run d $defer_ix + done + + __defer__scope_wipe + ((__DEFER__SCOPE_ID--)) +} + +defer() +{ + __defer__schedule d "$@" +} + +defer_prio() +{ + __defer__schedule p "$@" +} + +defer_scopes_cleanup() +{ + while ((__DEFER__SCOPE_ID >= 0)); do + defer_scope_pop + done +} + +in_defer_scope() +{ + local ret + + defer_scope_push + "$@" + ret=$? + defer_scope_pop + + return $ret +} + +__defer__scope_wipe diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 5d796622e730..8e3fc05a5397 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,7 +11,7 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := mptcp_lib.sh settings -TEST_INCLUDES := ../lib.sh ../net_helper.sh +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 57325d57e4c6..b48b4e56826a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -259,6 +259,15 @@ check_mptcp_disabled() mptcp_lib_ns_init disabled_ns print_larger_title "New MPTCP socket can be blocked via sysctl" + + # mainly to cover more code + if ! ip netns exec ${disabled_ns} sysctl net.mptcp >/dev/null; then + mptcp_lib_pr_fail "not able to list net.mptcp sysctl knobs" + mptcp_lib_result_fail "not able to list net.mptcp sysctl knobs" + ret=${KSFT_FAIL} + return 1 + fi + # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index e8d0a01b4144..c07e2bd3a315 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -887,40 +888,62 @@ check_cestab() fi } -do_transfer() +cond_start_capture() { - local listener_ns="$1" - local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid - local FAILING_LINKS=${FAILING_LINKS:-""} - local fastclose=${fastclose:-""} - local speed=${speed:-"fast"} + local ns="$1" - :> "$cout" - :> "$sout" :> "$capout" if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then + local capuser capfile + if [ -z $SUDO_USER ]; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & cappid=$! sleep 1 fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local port + + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} + port=$(get_port) + + :> "$cout" + :> "$sout" + + cond_start_capture ${listener_ns} NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat -n @@ -1007,10 +1030,7 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid - fi + cond_stop_capture NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ nstat | grep Tcp > /tmp/${listener_ns}.out @@ -1026,7 +1046,6 @@ do_transfer() ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" cat /tmp/${connector_ns}.out - cat "$capout" return 1 fi @@ -1043,13 +1062,7 @@ do_transfer() fi rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -2873,6 +2886,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2963,6 +3002,22 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi } syncookies_tests() diff --git a/tools/testing/selftests/net/ncdevmem.c b/tools/testing/selftests/net/ncdevmem.c deleted file mode 100644 index 64d6805381c5..000000000000 --- a/tools/testing/selftests/net/ncdevmem.c +++ /dev/null @@ -1,570 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#define _GNU_SOURCE -#define __EXPORTED_HEADERS__ - -#include <linux/uio.h> -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <stdbool.h> -#include <string.h> -#include <errno.h> -#define __iovec_defined -#include <fcntl.h> -#include <malloc.h> -#include <error.h> - -#include <arpa/inet.h> -#include <sys/socket.h> -#include <sys/mman.h> -#include <sys/ioctl.h> -#include <sys/syscall.h> - -#include <linux/memfd.h> -#include <linux/dma-buf.h> -#include <linux/udmabuf.h> -#include <libmnl/libmnl.h> -#include <linux/types.h> -#include <linux/netlink.h> -#include <linux/genetlink.h> -#include <linux/netdev.h> -#include <time.h> -#include <net/if.h> - -#include "netdev-user.h" -#include <ynl.h> - -#define PAGE_SHIFT 12 -#define TEST_PREFIX "ncdevmem" -#define NUM_PAGES 16000 - -#ifndef MSG_SOCK_DEVMEM -#define MSG_SOCK_DEVMEM 0x2000000 -#endif - -/* - * tcpdevmem netcat. Works similarly to netcat but does device memory TCP - * instead of regular TCP. Uses udmabuf to mock a dmabuf provider. - * - * Usage: - * - * On server: - * ncdevmem -s <server IP> -c <client IP> -f eth1 -l -p 5201 -v 7 - * - * On client: - * yes $(echo -e \\x01\\x02\\x03\\x04\\x05\\x06) | \ - * tr \\n \\0 | \ - * head -c 5G | \ - * nc <server IP> 5201 -p 5201 - * - * Note this is compatible with regular netcat. i.e. the sender or receiver can - * be replaced with regular netcat to test the RX or TX path in isolation. - */ - -static char *server_ip = "192.168.1.4"; -static char *client_ip = "192.168.1.2"; -static char *port = "5201"; -static size_t do_validation; -static int start_queue = 8; -static int num_queues = 8; -static char *ifname = "eth1"; -static unsigned int ifindex; -static unsigned int dmabuf_id; - -void print_bytes(void *ptr, size_t size) -{ - unsigned char *p = ptr; - int i; - - for (i = 0; i < size; i++) - printf("%02hhX ", p[i]); - printf("\n"); -} - -void print_nonzero_bytes(void *ptr, size_t size) -{ - unsigned char *p = ptr; - unsigned int i; - - for (i = 0; i < size; i++) - putchar(p[i]); - printf("\n"); -} - -void validate_buffer(void *line, size_t size) -{ - static unsigned char seed = 1; - unsigned char *ptr = line; - int errors = 0; - size_t i; - - for (i = 0; i < size; i++) { - if (ptr[i] != seed) { - fprintf(stderr, - "Failed validation: expected=%u, actual=%u, index=%lu\n", - seed, ptr[i], i); - errors++; - if (errors > 20) - error(1, 0, "validation failed."); - } - seed++; - if (seed == do_validation) - seed = 0; - } - - fprintf(stdout, "Validated buffer\n"); -} - -#define run_command(cmd, ...) \ - ({ \ - char command[256]; \ - memset(command, 0, sizeof(command)); \ - snprintf(command, sizeof(command), cmd, ##__VA_ARGS__); \ - printf("Running: %s\n", command); \ - system(command); \ - }) - -static int reset_flow_steering(void) -{ - int ret = 0; - - ret = run_command("sudo ethtool -K %s ntuple off", ifname); - if (ret) - return ret; - - return run_command("sudo ethtool -K %s ntuple on", ifname); -} - -static int configure_headersplit(bool on) -{ - return run_command("sudo ethtool -G %s tcp-data-split %s", ifname, - on ? "on" : "off"); -} - -static int configure_rss(void) -{ - return run_command("sudo ethtool -X %s equal %d", ifname, start_queue); -} - -static int configure_channels(unsigned int rx, unsigned int tx) -{ - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); -} - -static int configure_flow_steering(void) -{ - return run_command("sudo ethtool -N %s flow-type tcp4 src-ip %s dst-ip %s src-port %s dst-port %s queue %d", - ifname, client_ip, server_ip, port, port, start_queue); -} - -static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd, - struct netdev_queue_id *queues, - unsigned int n_queue_index, struct ynl_sock **ys) -{ - struct netdev_bind_rx_req *req = NULL; - struct netdev_bind_rx_rsp *rsp = NULL; - struct ynl_error yerr; - - *ys = ynl_sock_create(&ynl_netdev_family, &yerr); - if (!*ys) { - fprintf(stderr, "YNL: %s\n", yerr.msg); - return -1; - } - - req = netdev_bind_rx_req_alloc(); - netdev_bind_rx_req_set_ifindex(req, ifindex); - netdev_bind_rx_req_set_fd(req, dmabuf_fd); - __netdev_bind_rx_req_set_queues(req, queues, n_queue_index); - - rsp = netdev_bind_rx(*ys, req); - if (!rsp) { - perror("netdev_bind_rx"); - goto err_close; - } - - if (!rsp->_present.id) { - perror("id not present"); - goto err_close; - } - - printf("got dmabuf id=%d\n", rsp->id); - dmabuf_id = rsp->id; - - netdev_bind_rx_req_free(req); - netdev_bind_rx_rsp_free(rsp); - - return 0; - -err_close: - fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg); - netdev_bind_rx_req_free(req); - ynl_sock_destroy(*ys); - return -1; -} - -static void create_udmabuf(int *devfd, int *memfd, int *buf, size_t dmabuf_size) -{ - struct udmabuf_create create; - int ret; - - *devfd = open("/dev/udmabuf", O_RDWR); - if (*devfd < 0) { - error(70, 0, - "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n", - TEST_PREFIX); - } - - *memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING); - if (*memfd < 0) - error(70, 0, "%s: [skip,no-memfd]\n", TEST_PREFIX); - - /* Required for udmabuf */ - ret = fcntl(*memfd, F_ADD_SEALS, F_SEAL_SHRINK); - if (ret < 0) - error(73, 0, "%s: [skip,fcntl-add-seals]\n", TEST_PREFIX); - - ret = ftruncate(*memfd, dmabuf_size); - if (ret == -1) - error(74, 0, "%s: [FAIL,memfd-truncate]\n", TEST_PREFIX); - - memset(&create, 0, sizeof(create)); - - create.memfd = *memfd; - create.offset = 0; - create.size = dmabuf_size; - *buf = ioctl(*devfd, UDMABUF_CREATE, &create); - if (*buf < 0) - error(75, 0, "%s: [FAIL, create udmabuf]\n", TEST_PREFIX); -} - -int do_server(void) -{ - char ctrl_data[sizeof(int) * 20000]; - struct netdev_queue_id *queues; - size_t non_page_aligned_frags = 0; - struct sockaddr_in client_addr; - struct sockaddr_in server_sin; - size_t page_aligned_frags = 0; - int devfd, memfd, buf, ret; - size_t total_received = 0; - socklen_t client_addr_len; - bool is_devmem = false; - char *buf_mem = NULL; - struct ynl_sock *ys; - size_t dmabuf_size; - char iobuf[819200]; - char buffer[256]; - int socket_fd; - int client_fd; - size_t i = 0; - int opt = 1; - - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); - - if (reset_flow_steering()) - error(1, 0, "Failed to reset flow steering\n"); - - /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "Failed to configure rss\n"); - - /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering()) - error(1, 0, "Failed to configure flow steering\n"); - - sleep(1); - - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); - - buf_mem = mmap(NULL, dmabuf_size, PROT_READ | PROT_WRITE, MAP_SHARED, - buf, 0); - if (buf_mem == MAP_FAILED) - error(1, 0, "mmap()"); - - server_sin.sin_family = AF_INET; - server_sin.sin_port = htons(atoi(port)); - - ret = inet_pton(server_sin.sin_family, server_ip, &server_sin.sin_addr); - if (socket < 0) - error(79, 0, "%s: [FAIL, create socket]\n", TEST_PREFIX); - - socket_fd = socket(server_sin.sin_family, SOCK_STREAM, 0); - if (socket < 0) - error(errno, errno, "%s: [FAIL, create socket]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEPORT, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - ret = setsockopt(socket_fd, SOL_SOCKET, SO_REUSEADDR, &opt, - sizeof(opt)); - if (ret) - error(errno, errno, "%s: [FAIL, set sock opt]\n", TEST_PREFIX); - - printf("binding to address %s:%d\n", server_ip, - ntohs(server_sin.sin_port)); - - ret = bind(socket_fd, &server_sin, sizeof(server_sin)); - if (ret) - error(errno, errno, "%s: [FAIL, bind]\n", TEST_PREFIX); - - ret = listen(socket_fd, 1); - if (ret) - error(errno, errno, "%s: [FAIL, listen]\n", TEST_PREFIX); - - client_addr_len = sizeof(client_addr); - - inet_ntop(server_sin.sin_family, &server_sin.sin_addr, buffer, - sizeof(buffer)); - printf("Waiting or connection on %s:%d\n", buffer, - ntohs(server_sin.sin_port)); - client_fd = accept(socket_fd, &client_addr, &client_addr_len); - - inet_ntop(client_addr.sin_family, &client_addr.sin_addr, buffer, - sizeof(buffer)); - printf("Got connection from %s:%d\n", buffer, - ntohs(client_addr.sin_port)); - - while (1) { - struct iovec iov = { .iov_base = iobuf, - .iov_len = sizeof(iobuf) }; - struct dmabuf_cmsg *dmabuf_cmsg = NULL; - struct dma_buf_sync sync = { 0 }; - struct cmsghdr *cm = NULL; - struct msghdr msg = { 0 }; - struct dmabuf_token token; - ssize_t ret; - - is_devmem = false; - printf("\n\n"); - - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = ctrl_data; - msg.msg_controllen = sizeof(ctrl_data); - ret = recvmsg(client_fd, &msg, MSG_SOCK_DEVMEM); - printf("recvmsg ret=%ld\n", ret); - if (ret < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) - continue; - if (ret < 0) { - perror("recvmsg"); - continue; - } - if (ret == 0) { - printf("client exited\n"); - goto cleanup; - } - - i++; - for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { - if (cm->cmsg_level != SOL_SOCKET || - (cm->cmsg_type != SCM_DEVMEM_DMABUF && - cm->cmsg_type != SCM_DEVMEM_LINEAR)) { - fprintf(stdout, "skipping non-devmem cmsg\n"); - continue; - } - - dmabuf_cmsg = (struct dmabuf_cmsg *)CMSG_DATA(cm); - is_devmem = true; - - if (cm->cmsg_type == SCM_DEVMEM_LINEAR) { - /* TODO: process data copied from skb's linear - * buffer. - */ - fprintf(stdout, - "SCM_DEVMEM_LINEAR. dmabuf_cmsg->frag_size=%u\n", - dmabuf_cmsg->frag_size); - - continue; - } - - token.token_start = dmabuf_cmsg->frag_token; - token.token_count = 1; - - total_received += dmabuf_cmsg->frag_size; - printf("received frag_page=%llu, in_page_offset=%llu, frag_offset=%llu, frag_size=%u, token=%u, total_received=%lu, dmabuf_id=%u\n", - dmabuf_cmsg->frag_offset >> PAGE_SHIFT, - dmabuf_cmsg->frag_offset % getpagesize(), - dmabuf_cmsg->frag_offset, dmabuf_cmsg->frag_size, - dmabuf_cmsg->frag_token, total_received, - dmabuf_cmsg->dmabuf_id); - - if (dmabuf_cmsg->dmabuf_id != dmabuf_id) - error(1, 0, - "received on wrong dmabuf_id: flow steering error\n"); - - if (dmabuf_cmsg->frag_size % getpagesize()) - non_page_aligned_frags++; - else - page_aligned_frags++; - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_START; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); - - if (do_validation) - validate_buffer( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - else - print_nonzero_bytes( - ((unsigned char *)buf_mem) + - dmabuf_cmsg->frag_offset, - dmabuf_cmsg->frag_size); - - sync.flags = DMA_BUF_SYNC_READ | DMA_BUF_SYNC_END; - ioctl(buf, DMA_BUF_IOCTL_SYNC, &sync); - - ret = setsockopt(client_fd, SOL_SOCKET, - SO_DEVMEM_DONTNEED, &token, - sizeof(token)); - if (ret != 1) - error(1, 0, - "SO_DEVMEM_DONTNEED not enough tokens"); - } - if (!is_devmem) - error(1, 0, "flow steering error\n"); - - printf("total_received=%lu\n", total_received); - } - - fprintf(stdout, "%s: ok\n", TEST_PREFIX); - - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - - fprintf(stdout, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n", - page_aligned_frags, non_page_aligned_frags); - -cleanup: - - munmap(buf_mem, dmabuf_size); - close(client_fd); - close(socket_fd); - close(buf); - close(memfd); - close(devfd); - ynl_sock_destroy(ys); - - return 0; -} - -void run_devmem_tests(void) -{ - struct netdev_queue_id *queues; - int devfd, memfd, buf; - struct ynl_sock *ys; - size_t dmabuf_size; - size_t i = 0; - - dmabuf_size = getpagesize() * NUM_PAGES; - - create_udmabuf(&devfd, &memfd, &buf, dmabuf_size); - - /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) - error(1, 0, "rss error\n"); - - queues = calloc(num_queues, sizeof(*queues)); - - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); - - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Binding empty queues array should have failed\n"); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (configure_headersplit(0)) - error(1, 0, "Failed to configure header split\n"); - - if (!bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Configure dmabuf with header split off should have failed\n"); - - if (configure_headersplit(1)) - error(1, 0, "Failed to configure header split\n"); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, buf, queues, num_queues, &ys)) - error(1, 0, "Failed to bind\n"); - - /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) - error(1, 0, "Deactivating a bound queue should be illegal.\n"); - - /* Closing the netlink socket does an implicit unbind */ - ynl_sock_destroy(ys); -} - -int main(int argc, char *argv[]) -{ - int is_server = 0, opt; - - while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) { - switch (opt) { - case 'l': - is_server = 1; - break; - case 's': - server_ip = optarg; - break; - case 'c': - client_ip = optarg; - break; - case 'p': - port = optarg; - break; - case 'v': - do_validation = atoll(optarg); - break; - case 'q': - num_queues = atoi(optarg); - break; - case 't': - start_queue = atoi(optarg); - break; - case 'f': - ifname = optarg; - break; - case '?': - printf("unknown option: %c\n", optopt); - break; - } - } - - ifindex = if_nametoindex(ifname); - - for (; optind < argc; optind++) - printf("extra arguments: %s\n", argv[optind]); - - run_devmem_tests(); - - if (is_server) - return do_server(); - - return 0; -} diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 0a64d6d0e29a..64c4f8d9aa6c 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -2,5 +2,6 @@ audit_logread connect_close conntrack_dump_flush +conntrack_reverse_clash sctp_collision nf_queue diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e6c9e777fead..ffe161fac8b5 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -8,6 +8,7 @@ MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh TEST_PROGS += br_netfilter_queue.sh +TEST_PROGS += conntrack_dump_flush.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh @@ -31,14 +32,14 @@ TEST_PROGS += nft_tproxy_tcp.sh TEST_PROGS += nft_tproxy_udp.sh TEST_PROGS += nft_zones_many.sh TEST_PROGS += rpath.sh +TEST_PROGS += vxlan_mtu_frag.sh TEST_PROGS += xt_string.sh TEST_PROGS_EXTENDED = nft_concat_range_perf.sh -TEST_GEN_PROGS = conntrack_dump_flush - TEST_GEN_FILES = audit_logread TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision @@ -54,4 +55,5 @@ TEST_FILES := lib.sh TEST_FILES += packetdrill TEST_INCLUDES := \ - ../lib.sh + ../lib.sh \ + $(wildcard ../lib/sh/*.sh) diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index c5fe7b34eaf1..43d8b500d391 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -7,6 +7,7 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m @@ -84,6 +85,7 @@ CONFIG_NFT_SYNPROXY=m CONFIG_NFT_TPROXY=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m +CONFIG_VXLAN=m CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index bd9317bf5ada..5f827e10717d 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -43,6 +43,8 @@ static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, @@ -71,6 +73,8 @@ static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_proto(struct nlmsghdr *nlh) @@ -90,6 +94,8 @@ static int build_cta_proto(struct nlmsghdr *nlh) mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, @@ -98,7 +104,7 @@ static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *rplnlh; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -207,6 +213,7 @@ static int conntrack_data_generate_v6(struct mnl_socket *sock, static int count_entries(const struct nlmsghdr *nlh, void *data) { reply_counter++; + return MNL_CB_OK; } static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) @@ -216,7 +223,7 @@ static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -263,7 +270,7 @@ static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh new file mode 100755 index 000000000000..8b0935385849 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +exec unshare -n ./conntrack_dump_flush diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index 073e8e62d350..e95ecb37c2b1 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -32,6 +32,7 @@ source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 +DUMMYNET=10.9.9 PFXL=30 ret=0 @@ -54,6 +55,7 @@ setup_ns ns0 ns1 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" @@ -65,13 +67,18 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then exit $ksft_skip fi +ip -net "$ns0" link add dummy0 type dummy + ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set dummy0 master tvrf ip -net "$ns0" li set tvrf up ip -net "$ns0" li set veth0 up +ip -net "$ns0" li set dummy0 up ip -net "$ns1" li set veth0 up ip -net "$ns0" addr add $IP0/$PFXL dev veth0 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 +ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 listener_ready() { @@ -212,9 +219,35 @@ EOF fi } +test_fib() +{ +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip t { + counter fibcount { } + + chain prerouting { + type filter hook prerouting priority 0; + meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack + } +} +EOF + ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 + ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null + + if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then + echo "PASS: fib lookup returned exepected output interface" + else + echo "FAIL: fib lookup did not return exepected output interface" + ret=1 + return + fi +} + test_ct_zone_in test_masquerade_vrf "default" test_masquerade_vrf "pfifo" test_masquerade_veth +test_fib exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh index 902f8114bc80..87f2b4c725aa 100755 --- a/tools/testing/selftests/net/netfilter/nft_audit.sh +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -48,12 +48,31 @@ logread_pid=$! trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT exec 3<"$logfile" +lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/' +summarize_logs() { + sum=0 + while read line; do + eval $(sed "$lsplit" <<< "$line") + [[ $sum -gt 0 ]] && { + [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && { + let "sum += val" + continue + } + echo "$tpfx entries=$sum $tsfx" + } + tpfx="$pfx" + tsfx="$sfx" + sum=$val + done + echo "$tpfx entries=$sum $tsfx" +} + do_test() { # (cmd, log) echo -n "testing for cmd: $1 ... " cat <&3 >/dev/null $1 >/dev/null || exit 1 sleep 0.1 - res=$(diff -a -u <(echo "$2") - <&3) + res=$(diff -a -u <(echo "$2") <(summarize_logs <&3)) [ $? -eq 0 ] && { echo "OK"; return; } echo "FAIL" grep -v '^\(---\|+++\|@@\)' <<< "$res" @@ -152,31 +171,17 @@ do_test 'nft reset rules t1 c2' \ 'table=t1 family=2 entries=3 op=nft_reset_rule' do_test 'nft reset rules table t1' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule' do_test 'nft reset rules t2 c3' \ -'table=t2 family=2 entries=189 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=126 op=nft_reset_rule' +'table=t2 family=2 entries=503 op=nft_reset_rule' do_test 'nft reset rules t2' \ -'table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=186 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=129 op=nft_reset_rule' +'table=t2 family=2 entries=509 op=nft_reset_rule' do_test 'nft reset rules' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=180 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=135 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule +table=t2 family=2 entries=509 op=nft_reset_rule' # resetting sets and elements @@ -200,13 +205,11 @@ do_test 'nft reset counters t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset counters t2' \ -'table=t2 family=2 entries=342 op=nft_reset_obj -table=t2 family=2 entries=158 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset counters' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=341 op=nft_reset_obj -table=t2 family=2 entries=159 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # resetting quotas @@ -217,13 +220,11 @@ do_test 'nft reset quotas t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset quotas t2' \ -'table=t2 family=2 entries=315 op=nft_reset_obj -table=t2 family=2 entries=185 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset quotas' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=314 op=nft_reset_obj -table=t2 family=2 entries=186 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # deleting rules diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index b3995550856a..a4ee5496f2a1 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -71,6 +71,8 @@ omtu=9000 lmtu=1500 rmtu=2000 +filesize=$((2 * 1024 * 1024)) + usage(){ echo "nft_flowtable.sh [OPTIONS]" echo @@ -81,12 +83,13 @@ usage(){ exit 1 } -while getopts "o:l:r:" o +while getopts "o:l:r:s:" o do case $o in o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; + s) filesize=$OPTARG;; *) usage;; esac done @@ -217,18 +220,10 @@ ns2out=$(mktemp) make_file() { - name=$1 - - SIZE=$((RANDOM % (1024 * 128))) - SIZE=$((SIZE + (1024 * 8))) - TSIZE=$((SIZE * 1024)) - - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + name="$1" + sz="$2" - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + head -c "$sz" < /dev/urandom > "$name" } check_counters() @@ -246,18 +241,18 @@ check_counters() local fs fs=$(du -sb "$nsin") local max_orig=${fs%%/*} - local max_repl=$((max_orig/4)) + local max_repl=$((max_orig)) # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook # should always be lower than the size of the transmitted file (max_orig). if [ "$orig_cnt" -gt "$max_orig" ];then - echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2 ret=1 ok=0 fi if [ "$repl_cnt" -gt $max_repl ];then - echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2 ret=1 ok=0 fi @@ -455,7 +450,7 @@ test_tcp_forwarding_nat() return $lret } -make_file "$nsin" +make_file "$nsin" "$filesize" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. @@ -664,8 +659,16 @@ if [ "$1" = "" ]; then l=$(((RANDOM%mtu) + low)) r=$(((RANDOM%mtu) + low)) - echo "re-run with random mtus: -o $o -l $l -r $r" - $0 -o "$o" -l "$l" -r "$r" + MINSIZE=$((2 * 1000 * 1000)) + MAXSIZE=$((64 * 1000 * 1000)) + + filesize=$(((RANDOM * RANDOM) % MAXSIZE)) + if [ "$filesize" -lt "$MINSIZE" ]; then + filesize=$((filesize+MINSIZE)) + fi + + echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index a9d109fcc15c..785e3875a6da 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -512,10 +512,10 @@ EOF :> "$TMPFILE1" :> "$TMPFILE2" - timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc & + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & local rpid1=$! - timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc & + timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc & local rpid2=$! ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & @@ -528,8 +528,8 @@ EOF # Send two packets, one should end up in ns1, other in ns2. # This is because nfqueue will delay packet for long enough so that # second packet will not find existing conntrack entry. - echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 - echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh new file mode 100755 index 000000000000..912cb9583af1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +if ! modprobe -q -n br_netfilter 2>&1; then + echo "SKIP: Test needs br_netfilter kernel module" + exit $ksft_skip +fi + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns host vtep router + +create_topology() +{ + ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep" + ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router" +} + +setup_host() +{ + # bring ports up + ip -n "$host" addr add 10.0.0.1/24 dev host-eth0 + ip -n "$host" link set host-eth0 up + + # Add VLAN 10,20 + for vid in 10 20; do + ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid + ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid + ip -n "$host" link set host-eth0.$vid up + done +} + +setup_vtep() +{ + # create bridge on vtep + ip -n "$vtep" link add name br0 type bridge + ip -n "$vtep" link set br0 type bridge vlan_filtering 1 + + # VLAN 10 is untagged PVID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged + + # VLAN 20 as other VID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 20 + + # single-vxlan device on vtep + ip -n "$vtep" address add dev vtep-router 60.0.0.1/24 + ip -n "$vtep" link add dev vxd type vxlan external \ + vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64 + ip -n "$vtep" link set vxd master br0 + + # Add VLAN-VNI 1-1 mappings + bridge -n "$vtep" link set dev vxd vlan_tunnel on + for vid in 10 20; do + bridge -n "$vtep" vlan add dev vxd vid $vid + bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid + bridge -n "$vtep" vni add dev vxd vni $vid + done + + # bring ports up + ip -n "$vtep" link set vxd up + ip -n "$vtep" link set vtep-router up + ip -n "$vtep" link set vtep-host up + ip -n "$vtep" link set dev br0 up +} + +setup_router() +{ + # bring ports up + ip -n "$router" link set router-vtep up +} + +setup() +{ + modprobe -q br_netfilter + create_topology + setup_host + setup_vtep + setup_router +} + +test_large_mtu_untagged_traffic() +{ + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0 + ip netns exec "$host" \ + ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + return 0 +} + +test_large_mtu_tagged_traffic() +{ + for vid in 10 20; do + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid + ip netns exec "$host" \ + ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + done + return 0 +} + +do_test() +{ + # Frames will be dropped so ping will not succeed + # If it doesn't panic, it passes + test_large_mtu_tagged_traffic + test_large_mtu_untagged_traffic +} + +setup && \ +echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c new file mode 100644 index 000000000000..84e29b7dffb6 --- /dev/null +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> +#include <linux/mqueue.h> + +#include "../kselftest_harness.h" + +static const struct { + struct nlmsghdr nlhdr; + struct genlmsghdr genlhdr; + struct nlattr ahdr; + __u16 val; + __u16 pad; +} dump_policies = { + .nlhdr = { + .nlmsg_len = sizeof(dump_policies), + .nlmsg_type = GENL_ID_CTRL, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .genlhdr = { + .cmd = CTRL_CMD_GETPOLICY, + .version = 2, + }, + .ahdr = { + .nla_len = 6, + .nla_type = CTRL_ATTR_FAMILY_ID, + }, + .val = GENL_ID_CTRL, + .pad = 0, +}; + +// Sanity check for the test itself, make sure the dump doesn't fit in one msg +TEST(test_sanity) +{ + int netlink_sock; + char buf[8192]; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + close(netlink_sock); +} + +TEST(close_in_progress) +{ + int netlink_sock; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + close(netlink_sock); +} + +TEST(close_with_ref) +{ + char cookie[NOTIFY_COOKIE_LEN] = {}; + int netlink_sock, mq_fd; + struct sigevent sigev; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0); + ASSERT_GE(mq_fd, 0); + + memset(&sigev, 0, sizeof(sigev)); + sigev.sigev_notify = SIGEV_THREAD; + sigev.sigev_value.sival_ptr = cookie; + sigev.sigev_signo = netlink_sock; + + syscall(__NR_mq_notify, mq_fd, &sigev); + + close(netlink_sock); + + // give mqueue time to fire + usleep(100 * 1000); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 569bce8b6383..66be7699c72c 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -197,6 +197,12 @@ # # - pmtu_ipv6_route_change # Same as above but with IPv6 +# +# - pmtu_ipv4_mp_exceptions +# Use the same topology as in pmtu_ipv4, but add routeable addresses +# on host A and B on lo reachable via both routers. Host A and B +# addresses have multipath routes to each other, b_r1 mtu = 1500. +# Check that PMTU exceptions are created for both paths. source lib.sh source net_helper.sh @@ -266,7 +272,8 @@ tests=" list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -343,6 +350,9 @@ tunnel6_a_addr="fd00:2::a" tunnel6_b_addr="fd00:2::b" tunnel6_mask="64" +host4_a_addr="192.168.99.99" +host4_b_addr="192.168.88.88" + dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" @@ -984,6 +994,52 @@ setup_ovs_bridge() { run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 } +setup_multipath_new() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1 + run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2 + run_cmd ${ns_a} ip nexthop add id 403 group 401/402 + run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1 + run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2 + run_cmd ${ns_b} ip nexthop add id 403 group 401/402 + run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403 +} + +setup_multipath_old() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip route add ${host4_b_addr} \ + src ${host4_a_addr} \ + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ + nexthop via ${prefix4}.${a_r2}.2 weight 1 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip route add ${host4_a_addr} \ + src ${host4_b_addr} \ + nexthop via ${prefix4}.${b_r1}.2 weight 1 \ + nexthop via ${prefix4}.${b_r2}.2 weight 1 +} + +setup_multipath() { + if [ "$USE_NH" = "yes" ]; then + setup_multipath_new + else + setup_multipath_old + fi + + # Set up routers with routes to dummies + run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1 + run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -1076,23 +1132,15 @@ link_get_mtu() { } route_get_dst_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - if [ -z "${dsfield}" ]; then - dsfield=0 - fi - - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" + ${ns_cmd} ip route get "$@" } route_get_dst_pmtu_from_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" } check_pmtu_value() { @@ -1235,10 +1283,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1285,9 +1333,9 @@ test_pmtu_ipv4_dscp_udp_exception() { UDP:"${dst2}":50000,tos="${dsfield}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -2056,7 +2104,7 @@ check_running() { pid=${1} cmd=${2} - [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "${cmd}" ] } test_cleanup_vxlanX_exception() { @@ -2329,6 +2377,36 @@ test_pmtu_ipv6_route_change() { test_pmtu_ipvX_route_change 6 } +test_pmtu_ipv4_mp_exceptions() { + setup namespaces routing multipath || return $ksft_skip + + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1500 + mtu "${ns_b}" veth_B-R1 1500 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Ping and expect two nexthop exceptions for two routes + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}" + + # Check that exceptions have been created with the correct PMTU + pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)" + pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)" + + check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1 + check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 4f31e92ebd96..84c524357075 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -48,6 +48,7 @@ #include <string.h> #include <sys/mman.h> #include <sys/socket.h> +#include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> @@ -59,6 +60,33 @@ static uint32_t cfg_max_num_members; +static void loopback_set_up_down(int state_up) +{ + struct ifreq ifreq = {}; + int fd, err; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket loopback"); + exit(1); + } + strcpy(ifreq.ifr_name, "lo"); + err = ioctl(fd, SIOCGIFFLAGS, &ifreq); + if (err) { + perror("SIOCGIFFLAGS"); + exit(1); + } + if (state_up != !!(ifreq.ifr_flags & IFF_UP)) { + ifreq.ifr_flags ^= IFF_UP; + err = ioctl(fd, SIOCSIFFLAGS, &ifreq); + if (err) { + perror("SIOCSIFFLAGS"); + exit(1); + } + } + close(fd); +} + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) @@ -251,6 +279,41 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[]) return 0; } +/* Test that creating/joining a fanout group fails for unbound socket without + * a specified protocol + */ +static void test_unbound_fanout(void) +{ + int val, fd0, fd1, err; + + fprintf(stderr, "test: unbound fanout\n"); + fd0 = socket(PF_PACKET, SOCK_RAW, 0); + if (fd0 < 0) { + perror("socket packet"); + exit(1); + } + /* Try to create a new fanout group. Should fail. */ + val = (PACKET_FANOUT_HASH << 16) | 1; + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout create\n"); + exit(1); + } + fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1); + if (fd1 == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + /* Try to join an existing fanout group. Should fail. */ + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout join\n"); + exit(1); + } + close(fd0); + close(fd1); +} + /* Test illegal mode + flag combination */ static void test_control_single(void) { @@ -264,17 +327,22 @@ static void test_control_single(void) } /* Test illegal group with different modes or flags */ -static void test_control_group(void) +static void test_control_group(int toggle) { int fds[2]; - fprintf(stderr, "test: control multiple sockets\n"); + if (toggle) + fprintf(stderr, "test: control multiple sockets with link down toggle\n"); + else + fprintf(stderr, "test: control multiple sockets\n"); fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } + if (toggle) + loopback_set_up_down(0); if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); @@ -294,6 +362,8 @@ static void test_control_group(void) fprintf(stderr, "ERROR: failed to join group\n"); exit(1); } + if (toggle) + loopback_set_up_down(1); if (close(fds[1]) || close(fds[0])) { fprintf(stderr, "ERROR: closing sockets\n"); exit(1); @@ -488,8 +558,10 @@ int main(int argc, char **argv) const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 20, ret; + test_unbound_fanout(); test_control_single(); - test_control_group(); + test_control_group(0); + test_control_group(1); test_control_group_max_num_members(); test_unique_fanout_group_ids(); diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore new file mode 100644 index 000000000000..1c6f04e2aa11 --- /dev/null +++ b/tools/testing/selftests/net/rds/.gitignore @@ -0,0 +1 @@ +include.sh diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index da9714bc7aad..1803c39dbacb 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -4,9 +4,10 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh TEST_PROGS := run.sh \ - include.sh \ test.py -EXTRA_CLEAN := /tmp/rds_logs +TEST_FILES := include.sh + +EXTRA_CLEAN := /tmp/rds_logs include.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index e6bb109bcead..4a7178d11193 100644..100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -14,8 +14,11 @@ import sys import atexit from pwd import getpwuid from os import stat -from lib.py import ip +# Allow utils module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../")) +from lib.py.utils import ip libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdf6f10d0558..7f05b5f9b76f 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,10 +21,10 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec - kci_test_macsec_offload kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get + kci_test_fdb_del kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto @@ -559,73 +559,6 @@ kci_test_macsec() end_test "PASS: macsec" } -kci_test_macsec_offload() -{ - sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ - sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ - probed=false - local ret=0 - run_cmd_grep "^Usage: ip macsec" ip macsec help - if [ $? -ne 0 ]; then - end_test "SKIP: macsec: iproute2 too old" - return $ksft_skip - fi - - if ! mount | grep -q debugfs; then - mount -t debugfs none /sys/kernel/debug/ &> /dev/null - fi - - # setup netdevsim since dummydev doesn't have offload support - if [ ! -w /sys/bus/netdevsim/new_device ] ; then - run_cmd modprobe -q netdevsim - - if [ $ret -ne 0 ]; then - end_test "SKIP: macsec_offload can't load netdevsim" - return $ksft_skip - fi - probed=true - fi - - echo "0" > /sys/bus/netdevsim/new_device - while [ ! -d $sysfsnet ] ; do :; done - udevadm settle - dev=`ls $sysfsnet` - - ip link set $dev up - if [ ! -d $sysfsd ] ; then - end_test "FAIL: macsec_offload can't create device $dev" - return 1 - fi - run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev - if [ $? -eq 1 ] ; then - end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" - return 1 - fi - run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac - run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac - run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac - run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac - - msname=kci_macsec1 - run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef - run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" - # clean up any leftovers - for msdev in kci_macsec{1,2,3,4} ; do - ip link del $msdev 2> /dev/null - done - echo 0 > /sys/bus/netdevsim/del_device - $probed && rmmod netdevsim - - if [ $ret -ne 0 ]; then - end_test "FAIL: macsec_offload" - return 1 - fi - end_test "PASS: macsec_offload" -} - #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -809,10 +742,10 @@ kci_test_ipsec_offload() # does driver have correct offload info run_cmd diff $sysfsf - << EOF SA count=2 tx=3 -sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 +sa[0] tx ipaddr=$dstip sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[0] key=0x34333231 38373635 32313039 36353433 -sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0 +sa[1] rx ipaddr=$srcip sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF @@ -1065,6 +998,45 @@ kci_test_fdb_get() end_test "PASS: bridge fdb get" } +kci_test_fdb_del() +{ + local test_mac=de:ad:be:ef:13:37 + local dummydev="dummy1" + local brdev="test-br0" + local ret=0 + + run_cmd_grep 'bridge fdb get' bridge fdb help + if [ $? -ne 0 ]; then + end_test "SKIP: fdb del tests: iproute2 too old" + return $ksft_skip + fi + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP fdb del tests: cannot add net namespace $testns" + return $ksft_skip + fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" + run_cmd $IP link add $dummydev type dummy + run_cmd $IP link add name $brdev type bridge vlan_filtering 1 + run_cmd $IP link set dev $dummydev master $brdev + run_cmd $BRIDGE fdb add $test_mac dev $dummydev master static vlan 1 + run_cmd $BRIDGE vlan del vid 1 dev $dummydev + run_cmd $BRIDGE fdb get $test_mac br $brdev vlan 1 + run_cmd $BRIDGE fdb del $test_mac dev $dummydev master vlan 1 + run_cmd_fail $BRIDGE fdb get $test_mac br $brdev vlan 1 + + ip netns del $testns &>/dev/null + + if [ $ret -ne 0 ]; then + end_test "FAIL: bridge fdb del" + return 1 + fi + + end_test "PASS: bridge fdb del" +} + kci_test_neigh_get() { dstmac=de:ad:be:ef:13:37 diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index db44e77428dd..5db2f65cddc4 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -46,6 +46,7 @@ static inline char *test_snprintf(const char *fmt, va_list vargs) va_copy(tmp, vargs); n = vsnprintf(ret, size, fmt, tmp); + va_end(tmp); if (n < 0) return NULL; diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 084db4ecdff6..0abb9807d742 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -6,6 +6,8 @@ static union tcp_addr tcp_md5_client; +#define FILTER_TEST_NKEYS 16 + static int test_port = 7788; static void make_listen(int sk) { @@ -813,23 +815,197 @@ static void duplicate_tests(void) setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); } +static void fetch_all_keys(int sk, struct tcp_ao_getsockopt *keys) +{ + socklen_t optlen = sizeof(struct tcp_ao_getsockopt); + + memset(keys, 0, sizeof(struct tcp_ao_getsockopt) * FILTER_TEST_NKEYS); + keys[0].get_all = 1; + keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &keys[0], &optlen)) + test_error("getsockopt"); +} + +static int prepare_test_keys(struct tcp_ao_getsockopt *keys) +{ + const char *test_password = "Test password number "; + struct tcp_ao_add test_ao[FILTER_TEST_NKEYS]; + char test_password_scratch[64] = {}; + u8 rcvid = 100, sndid = 100; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + snprintf(test_password_scratch, 64, "%s %d", test_password, i); + test_prepare_key(&test_ao[i], DEFAULT_TEST_ALGO, this_ip_dest, + false, false, DEFAULT_TEST_PREFIX, 0, sndid++, + rcvid++, 0, 0, strlen(test_password_scratch), + test_password_scratch); + } + test_ao[0].set_current = 1; + test_ao[1].set_rnext = 1; + /* One key with a different addr and overlapping sndid, rcvid */ + tcp_addr_to_sockaddr_in(&test_ao[2].addr, &this_ip_addr, 0); + test_ao[2].sndid = 100; + test_ao[2].rcvid = 100; + + /* Add keys in a random order */ + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + int randidx = rand() % (FILTER_TEST_NKEYS - i); + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &test_ao[randidx], sizeof(struct tcp_ao_add))) + test_error("setsockopt()"); + memcpy(&test_ao[randidx], &test_ao[FILTER_TEST_NKEYS - 1 - i], + sizeof(struct tcp_ao_add)); + } + + fetch_all_keys(sk, keys); + + return sk; +} + +/* Assumes passwords are unique */ +static int compare_mkts(struct tcp_ao_getsockopt *expected, int nexpected, + struct tcp_ao_getsockopt *actual, int nactual) +{ + int matches = 0; + + for (int i = 0; i < nexpected; i++) { + for (int j = 0; j < nactual; j++) { + if (memcmp(expected[i].key, actual[j].key, + TCP_AO_MAXKEYLEN) == 0) + matches++; + } + } + return nexpected - matches; +} + +static void filter_keys_checked(int sk, struct tcp_ao_getsockopt *filter, + struct tcp_ao_getsockopt *expected, + unsigned int nexpected, const char *tst) +{ + struct tcp_ao_getsockopt filtered_keys[FILTER_TEST_NKEYS] = {}; + struct tcp_ao_getsockopt all_keys[FILTER_TEST_NKEYS] = {}; + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + fetch_all_keys(sk, all_keys); + memcpy(&filtered_keys[0], filter, sizeof(struct tcp_ao_getsockopt)); + filtered_keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, filtered_keys, &len)) + test_error("getsockopt"); + if (filtered_keys[0].nkeys != nexpected) { + test_fail("wrong nr of keys, expected %u got %u", nexpected, + filtered_keys[0].nkeys); + goto out_close; + } + if (compare_mkts(expected, nexpected, filtered_keys, + filtered_keys[0].nkeys)) { + test_fail("got wrong keys back"); + goto out_close; + } + test_ok("filter keys: %s", tst); + +out_close: + close(sk); + memset(filter, 0, sizeof(struct tcp_ao_getsockopt)); +} + +static void filter_tests(void) +{ + struct tcp_ao_getsockopt original_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt expected_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt filter = {}; + int sk, f, nmatches; + socklen_t len; + + f = 2; + sk = prepare_test_keys(original_keys); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + memcpy(&filter.addr, &original_keys[f].addr, + sizeof(original_keys[f].addr)); + filter.prefix = original_keys[f].prefix; + filter_keys_checked(sk, &filter, &original_keys[f], 1, + "by sndid, rcvid, address"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_current) { + f = i; + break; + } + } + if (f < 0) + test_error("No current key after adding one"); + filter.is_current = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_current"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_rnext) { + f = i; + break; + } + } + if (f < 0) + test_error("No rnext key after adding one"); + filter.is_rnext = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_rnext"); + + f = -1; + nmatches = 0; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].sndid == 100) { + f = i; + memcpy(&expected_keys[nmatches], &original_keys[i], + sizeof(struct tcp_ao_getsockopt)); + nmatches++; + } + } + if (f < 0) + test_error("No key for sndid 100"); + if (nmatches != 2) + test_error("Should have 2 keys with sndid 100"); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + filter.addr.ss_family = test_family; + filter_keys_checked(sk, &filter, expected_keys, nmatches, + "by sndid, rcvid"); + + sk = prepare_test_keys(original_keys); + filter.get_all = 1; + filter.nkeys = FILTER_TEST_NKEYS / 2; + len = sizeof(struct tcp_ao_getsockopt); + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &filter, &len)) + test_error("getsockopt"); + if (filter.nkeys == FILTER_TEST_NKEYS) + test_ok("filter keys: correct nkeys when in.nkeys < matches"); + else + test_fail("filter keys: wrong nkeys, expected %u got %u", + FILTER_TEST_NKEYS, filter.nkeys); +} + static void *client_fn(void *arg) { if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) test_error("Can't convert ip address"); extend_tests(); einval_tests(); + filter_tests(); duplicate_tests(); - /* - * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters - * returning proper nr & keys; - */ return NULL; } int main(int argc, char *argv[]) { - test_init(121, client_fn, NULL); + test_init(126, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index f27a12d2a2c9..1a706d03bb6b 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -266,6 +266,25 @@ TEST_F(tls_basic, bad_cipher) EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); } +TEST_F(tls_basic, recseq_wrap) +{ + struct tls_crypto_info_keys tls12; + char const *test_str = "test_read"; + int send_len = 10; + + if (self->notls) + SKIP(return, "no TLS support"); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); + + ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + ASSERT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + FIXTURE(tls) { int fd, cfd; diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index d626f22f9550..dae91eb97d69 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -77,6 +77,8 @@ static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; static bool cfg_print_nsec; +static uint32_t ts_opt_id; +static bool cfg_use_cmsg_opt_id; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -136,12 +138,13 @@ static void validate_key(int tskey, int tstype) /* compare key for each subsequent request * must only test for one type, the first one requested */ - if (saved_tskey == -1) + if (saved_tskey == -1 || cfg_use_cmsg_opt_id) saved_tskey_type = tstype; else if (saved_tskey_type != tstype) return; stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + stepsize = cfg_use_cmsg_opt_id ? 0 : stepsize; if (tskey != saved_tskey + stepsize) { fprintf(stderr, "ERROR: key %d, expected %d\n", tskey, saved_tskey + stepsize); @@ -484,7 +487,7 @@ static void fill_header_udp(void *p, bool is_ipv4) static void do_test(int family, unsigned int report_opt) { - char control[CMSG_SPACE(sizeof(uint32_t))]; + char control[2 * CMSG_SPACE(sizeof(uint32_t))]; struct sockaddr_ll laddr; unsigned int sock_opt; struct cmsghdr *cmsg; @@ -624,18 +627,32 @@ static void do_test(int family, unsigned int report_opt) msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (cfg_use_cmsg) { + if (cfg_use_cmsg || cfg_use_cmsg_opt_id) { memset(control, 0, sizeof(control)); msg.msg_control = control; - msg.msg_controllen = sizeof(control); + msg.msg_controllen = cfg_use_cmsg * CMSG_SPACE(sizeof(uint32_t)); + msg.msg_controllen += cfg_use_cmsg_opt_id * CMSG_SPACE(sizeof(uint32_t)); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SO_TIMESTAMPING; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + cmsg = NULL; + if (cfg_use_cmsg) { + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = report_opt; + } + if (cfg_use_cmsg_opt_id) { + cmsg = cmsg ? CMSG_NXTHDR(&msg, cmsg) : CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = ts_opt_id; + saved_tskey = ts_opt_id; + } - *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } val = sendmsg(fd, &msg, 0); @@ -685,6 +702,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -L listen on hostname and port\n" " -n: set no-payload option\n" " -N: print timestamps and durations in nsec (instead of usec)\n" + " -o N: use SCM_TS_OPT_ID control message to provide N as tskey\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" @@ -705,7 +723,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -746,6 +764,10 @@ static void parse_opt(int argc, char **argv) case 'N': cfg_print_nsec = true; break; + case 'o': + ts_opt_id = strtoul(optarg, NULL, 10); + cfg_use_cmsg_opt_id = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -803,6 +825,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) + error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 25baca4b148e..fe4649bb8786 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -37,11 +37,13 @@ run_test_v4v6() { run_test_tcpudpraw() { local -r args=$@ - run_test_v4v6 ${args} # tcp - run_test_v4v6 ${args} -u # udp - run_test_v4v6 ${args} -r # raw - run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) - run_test_v4v6 ${args} -P # pf_packet + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -u -o 42 # udp with fixed tskey + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -r -o 42 # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet } run_test_all() { diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 4f1edbafb946..6bb7dfaa30b6 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -46,8 +46,6 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - echo "#kernel" > $BASE - chmod go-rw $BASE } __chk_flag() { diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 59cb26cf3f73..d43afe243779 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -9,6 +9,8 @@ # YNL_GEN_FILES: TEST_GEN_FILES which need YNL YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_SPECS := \ + $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) $(YNL_OUTPUTS): $(OUTPUT)/libynl.a $(YNL_OUTPUTS): CFLAGS += \ @@ -16,6 +18,20 @@ $(YNL_OUTPUTS): CFLAGS += \ -I$(top_srcdir)/tools/net/ynl/lib/ \ -I$(top_srcdir)/tools/net/ynl/generated/ -$(OUTPUT)/libynl.a: +# Make sure we rebuild libynl if user added a new family. We can't easily +# depend on the contents of a variable so create a fake file with a hash. +YNL_GENS_HASH := $(shell echo $(YNL_GENS) | sha1sum | cut -c1-8) +$(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: + $(Q)rm -f $(OUTPUT)/.libynl-*.sig + $(Q)touch $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + +$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a + +EXTRA_CLEAN += \ + $(top_srcdir)/tools/net/ynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ + $(OUTPUT)/.libynl-*.sig \ + $(OUTPUT)/libynl.a diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 8de98ea7af80..e92e0b885861 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -130,9 +130,9 @@ QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIB QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) +QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) # OUTPUT is only set when run from the main makefile, otherwise # it defaults to this nolibc directory. diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile new file mode 100644 index 000000000000..3e84e26341d1 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -0,0 +1,2 @@ +TEST_PROGS = set_pcie_cooling_state.sh +include ../lib.mk diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh new file mode 100755 index 000000000000..9df606552af3 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_cooling_state.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +SYSFS= +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 +skipmsg="skip all tests:" + +PCIEPORTTYPE="PCIe_Port_Link_Speed" + +prerequisite() +{ + local ports + + if [ $UID != 0 ]; then + echo $skipmsg must be run as root >&2 + exit $ksft_skip + fi + + SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'` + + if [ ! -d "$SYSFS" ]; then + echo $skipmsg sysfs is not mounted >&2 + exit $ksft_skip + fi + + if ! ls $SYSFS/class/thermal/cooling_device* > /dev/null 2>&1; then + echo $skipmsg thermal cooling devices missing >&2 + exit $ksft_skip + fi + + ports=`grep -e "^$PCIEPORTTYPE" $SYSFS/class/thermal/cooling_device*/type | wc -l` + if [ $ports -eq 0 ]; then + echo $skipmsg pcie cooling devices missing >&2 + exit $ksft_skip + fi +} + +testport= +find_pcie_port() +{ + local patt="$1" + local pcieports + local max + local cur + local delta + local bestdelta=-1 + + pcieports=`grep -l -F -e "$patt" /sys/class/thermal/cooling_device*/type` + if [ -z "$pcieports" ]; then + return + fi + pcieports=${pcieports//\/type/} + # Find the port with the highest PCIe Link Speed + for port in $pcieports; do + max=`cat $port/max_state` + cur=`cat $port/cur_state` + delta=$((max-cur)) + if [ $delta -gt $bestdelta ]; then + testport="$port" + bestdelta=$delta + fi + done +} + +sysfspcidev= +find_sysfs_pci_dev() +{ + local typefile="$1/type" + local pcidir + + pcidir="$SYSFS/bus/pci/devices/`sed -e "s|^${PCIEPORTTYPE}_||g" $typefile`" + + if [ -r "$pcidir/current_link_speed" ]; then + sysfspcidev="$pcidir/current_link_speed" + fi +} + +usage() +{ + echo "Usage $0 [ -d dev ]" + echo -e "\t-d: PCIe port BDF string (e.g., 0000:00:04.0)" +} + +pattern="$PCIEPORTTYPE" +parse_arguments() +{ + while getopts d:h opt; do + case $opt in + h) + usage "$0" + exit 0 + ;; + d) + pattern="$PCIEPORTTYPE_$OPTARG" + ;; + *) + usage "$0" + exit 0 + ;; + esac + done +} + +parse_arguments "$@" +prerequisite +find_pcie_port "$pattern" +if [ -z "$testport" ]; then + echo $skipmsg "pcie cooling device not found from sysfs" >&2 + exit $ksft_skip +fi +find_sysfs_pci_dev "$testport" +if [ -z "$sysfspcidev" ]; then + echo $skipmsg "PCIe port device not found from sysfs" >&2 + exit $ksft_skip +fi + +./set_pcie_speed.sh "$testport" "$sysfspcidev" +retval=$? + +exit $retval diff --git a/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh new file mode 100755 index 000000000000..584596949312 --- /dev/null +++ b/tools/testing/selftests/pcie_bwctrl/set_pcie_speed.sh @@ -0,0 +1,67 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +set -e + +TESTNAME=set_pcie_speed + +declare -a PCIELINKSPEED=( + "2.5 GT/s PCIe" + "5.0 GT/s PCIe" + "8.0 GT/s PCIe" + "16.0 GT/s PCIe" + "32.0 GT/s PCIe" + "64.0 GT/s PCIe" +) + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +retval=0 + +coolingdev="$1" +statefile="$coolingdev/cur_state" +maxfile="$coolingdev/max_state" +linkspeedfile="$2" + +oldstate=`cat $statefile` +maxstate=`cat $maxfile` + +set_state() +{ + local state=$1 + local linkspeed + local expected_linkspeed + + echo $state > $statefile + + sleep 1 + + linkspeed="`cat $linkspeedfile`" + expected_linkspeed=$((maxstate-state)) + expected_str="${PCIELINKSPEED[$expected_linkspeed]}" + if [ ! "${expected_str}" = "${linkspeed}" ]; then + echo "$TESTNAME failed: expected: ${expected_str}; got ${linkspeed}" + retval=1 + fi +} + +cleanup_skip () +{ + set_state $oldstate + exit $ksft_skip +} + +trap cleanup_skip EXIT + +echo "$TESTNAME: testing states $maxstate .. $oldstate with $coolingdev" +for i in $(seq $maxstate -1 $oldstate); do + set_state "$i" +done + +trap EXIT +if [ $retval -eq 0 ]; then + echo "$TESTNAME [PASS]" +else + echo "$TESTNAME [FAIL]" +fi +exit $retval diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c index c62564c264b1..ce413a221bac 100644 --- a/tools/testing/selftests/pidfd/pidfd_open_test.c +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c @@ -13,6 +13,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/ioctl.h> #include <sys/mount.h> #include <sys/prctl.h> #include <sys/wait.h> @@ -21,6 +22,32 @@ #include "pidfd.h" #include "../kselftest.h" +#ifndef PIDFS_IOCTL_MAGIC +#define PIDFS_IOCTL_MAGIC 0xFF +#endif + +#ifndef PIDFD_GET_INFO +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_INFO_CGROUPID (1UL << 0) + +struct pidfd_info { + __u64 request_mask; + __u64 cgroupid; + __u32 pid; + __u32 tgid; + __u32 ppid; + __u32 ruid; + __u32 rgid; + __u32 euid; + __u32 egid; + __u32 suid; + __u32 sgid; + __u32 fsuid; + __u32 fsgid; + __u32 spare0[1]; +}; +#endif + static int safe_int(const char *numstr, int *converted) { char *err = NULL; @@ -120,10 +147,13 @@ out: int main(int argc, char **argv) { + struct pidfd_info info = { + .request_mask = PIDFD_INFO_CGROUPID, + }; int pidfd = -1, ret = 1; pid_t pid; - ksft_set_plan(3); + ksft_set_plan(4); pidfd = sys_pidfd_open(-1, 0); if (pidfd >= 0) { @@ -153,6 +183,56 @@ int main(int argc, char **argv) pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); + goto on_error; + } + if (info.pid != pid) { + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ppid != getppid()) { + ksft_print_msg("ppid %d does not match ppid from ioctl %d\n", + pid, info.pid); + goto on_error; + } + if (info.ruid != getuid()) { + ksft_print_msg("uid %d does not match uid from ioctl %d\n", + getuid(), info.ruid); + goto on_error; + } + if (info.rgid != getgid()) { + ksft_print_msg("gid %d does not match gid from ioctl %d\n", + getgid(), info.rgid); + goto on_error; + } + if (info.euid != geteuid()) { + ksft_print_msg("euid %d does not match euid from ioctl %d\n", + geteuid(), info.euid); + goto on_error; + } + if (info.egid != getegid()) { + ksft_print_msg("egid %d does not match egid from ioctl %d\n", + getegid(), info.egid); + goto on_error; + } + if (info.suid != geteuid()) { + ksft_print_msg("suid %d does not match suid from ioctl %d\n", + geteuid(), info.suid); + goto on_error; + } + if (info.sgid != getegid()) { + ksft_print_msg("sgid %d does not match sgid from ioctl %d\n", + getegid(), info.sgid); + goto on_error; + } + if ((info.request_mask & PIDFD_INFO_CGROUPID) && info.cgroupid == 0) { + ksft_print_msg("cgroupid should not be 0 when PIDFD_INFO_CGROUPID is set\n"); + goto on_error; + } + ksft_test_result_pass("get info from pidfd test: passed\n"); + ret = 0; on_error: diff --git a/tools/testing/selftests/powerpc/alignment/settings b/tools/testing/selftests/powerpc/alignment/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/cache_shape/settings b/tools/testing/selftests/powerpc/cache_shape/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/copyloops/settings b/tools/testing/selftests/powerpc/copyloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dexcr/settings b/tools/testing/selftests/powerpc/dexcr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/lib/settings b/tools/testing/selftests/powerpc/lib/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/lib/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/math/settings b/tools/testing/selftests/powerpc/math/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mce/settings b/tools/testing/selftests/powerpc/mce/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/settings b/tools/testing/selftests/powerpc/mm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c index ed9143990888..9c0d343d7137 100644 --- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -175,7 +175,7 @@ static int test(void) page_size = getpagesize(); getrlimit(RLIMIT_STACK, &rlimit); - printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + printf("Stack rlimit is 0x%llx\n", (unsigned long long)rlimit.rlim_cur); printf("Testing loads ...\n"); test_one_type(LOAD, page_size, rlimit.rlim_cur); diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 3ae77ba93208..8cf9fd5fed1c 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -211,8 +211,8 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at %p\n", - file_name, filesize, fileblock); + printf("allocated %s for 0x%llx bytes at %p\n", + file_name, (long long)filesize, fileblock); printf("testing file map...\n"); diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 48344a74b212..35f0098399cc 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -313,16 +313,16 @@ static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) fclose(f); - if (nr_anamolies == 0) { - remove(path); - return; - } - sprintf(logfile, logfilename, tid); strcpy(path, logdir); strcat(path, separator); strcat(path, logfile); + if (nr_anamolies == 0) { + remove(path); + return; + } + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", tid, nr_anamolies, path); } diff --git a/tools/testing/selftests/powerpc/nx-gzip/settings b/tools/testing/selftests/powerpc/nx-gzip/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_attributes/settings b/tools/testing/selftests/powerpc/papr_attributes/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_sysparm/settings b/tools/testing/selftests/powerpc/papr_sysparm/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_vpd/settings b/tools/testing/selftests/powerpc/papr_vpd/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2070a1e2b3a5..d8dd9a9c6c1b 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -144,9 +144,6 @@ static int test_body(void) /* Run for 16Bi instructions */ FAIL_IF(do_count_loop(events, 16000000000, overhead, true)); - /* Run for 64Bi instructions */ - FAIL_IF(do_count_loop(events, 64000000000, overhead, true)); - event_close(&events[0]); event_close(&events[1]); diff --git a/tools/testing/selftests/powerpc/pmu/settings b/tools/testing/selftests/powerpc/pmu/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/primitives/settings b/tools/testing/selftests/powerpc/primitives/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/ptrace/settings b/tools/testing/selftests/powerpc/ptrace/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/scripts/settings b/tools/testing/selftests/powerpc/scripts/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index f43aa4b77fba..9a4612e2e953 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -36,8 +36,7 @@ fi tainted=$(cat /proc/sys/kernel/tainted) if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel already tainted!" >&2 - exit 1 + echo "Warning: kernel already tainted! ($tainted)" >&2 fi mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" @@ -68,9 +67,10 @@ fi echo "Waiting for timeout ..." wait +orig_tainted=$tainted tainted=$(cat /proc/sys/kernel/tainted) -if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel became tainted!" >&2 +if [[ "$tainted" != "$orig_tainted" ]]; then + echo "Error: kernel newly tainted, before ($orig_tainted) after ($tainted)" >&2 exit 1 fi diff --git a/tools/testing/selftests/powerpc/security/settings b/tools/testing/selftests/powerpc/security/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index 08f9afe3b95c..c101b1391696 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -321,5 +321,5 @@ int main(int argc, char **argv) if (!args) args = ARG_COMPLETE; - test_harness(signal_fuzzer, "signal_fuzzer"); + return test_harness(signal_fuzzer, "signal_fuzzer"); } diff --git a/tools/testing/selftests/powerpc/stringloops/settings b/tools/testing/selftests/powerpc/stringloops/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/switch_endian/settings b/tools/testing/selftests/powerpc/switch_endian/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/syscalls/settings b/tools/testing/selftests/powerpc/syscalls/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 421cb082f6be..0a4bc479ae39 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -176,5 +176,5 @@ int tm_signal_context_force_tm(void) int main(int argc, char **argv) { - test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); + return test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 06b801906f27..968864b052ec 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -46,6 +46,5 @@ int tm_signal_sigreturn_nt(void) int main(int argc, char **argv) { - test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); + return test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); } - diff --git a/tools/testing/selftests/powerpc/vphn/settings b/tools/testing/selftests/powerpc/vphn/settings new file mode 100644 index 000000000000..2e8566183318 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index 011252fe238c..58064151f2c8 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -146,6 +146,7 @@ static void usage(char *progname) " -T val set the ptp clock time to 'val' seconds\n" " -x val get an extended ptp clock time with the desired number of samples (up to %d)\n" " -X get a ptp clock cross timestamp\n" + " -y val pre/post tstamp timebase to use {realtime|monotonic|monotonic-raw}\n" " -z test combinations of rising/falling external time stamp flags\n", progname, PTP_MAX_SAMPLES); } @@ -189,6 +190,7 @@ int main(int argc, char *argv[]) int seconds = 0; int settime = 0; int channel = -1; + clockid_t ext_clockid = CLOCK_REALTIME; int64_t t1, t2, tp; int64_t interval, offset; @@ -198,7 +200,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -278,6 +280,21 @@ int main(int argc, char *argv[]) case 'X': getcross = 1; break; + case 'y': + if (!strcasecmp(optarg, "realtime")) + ext_clockid = CLOCK_REALTIME; + else if (!strcasecmp(optarg, "monotonic")) + ext_clockid = CLOCK_MONOTONIC; + else if (!strcasecmp(optarg, "monotonic-raw")) + ext_clockid = CLOCK_MONOTONIC_RAW; + else { + fprintf(stderr, + "type needs to be realtime, monotonic or monotonic-raw; was given %s\n", + optarg); + return -1; + } + break; + case 'z': flagtest = 1; break; @@ -566,6 +583,7 @@ int main(int argc, char *argv[]) } soe->n_samples = getextended; + soe->clockid = ext_clockid; if (ioctl(fd, PTP_SYS_OFFSET_EXTENDED, soe)) { perror("PTP_SYS_OFFSET_EXTENDED"); @@ -574,12 +592,46 @@ int main(int argc, char *argv[]) getextended); for (i = 0; i < getextended; i++) { - printf("sample #%2d: system time before: %lld.%09u\n", - i, soe->ts[i][0].sec, soe->ts[i][0].nsec); + switch (ext_clockid) { + case CLOCK_REALTIME: + printf("sample #%2d: real time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + case CLOCK_MONOTONIC: + printf("sample #%2d: monotonic time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + case CLOCK_MONOTONIC_RAW: + printf("sample #%2d: monotonic-raw time before: %lld.%09u\n", + i, soe->ts[i][0].sec, + soe->ts[i][0].nsec); + break; + default: + break; + } printf(" phc time: %lld.%09u\n", soe->ts[i][1].sec, soe->ts[i][1].nsec); - printf(" system time after: %lld.%09u\n", - soe->ts[i][2].sec, soe->ts[i][2].nsec); + switch (ext_clockid) { + case CLOCK_REALTIME: + printf(" real time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + case CLOCK_MONOTONIC: + printf(" monotonic time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + case CLOCK_MONOTONIC_RAW: + printf(" monotonic-raw time after: %lld.%09u\n", + soe->ts[i][2].sec, + soe->ts[i][2].nsec); + break; + default: + break; + } } } diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh index c3808c490d92..f87046b702d8 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh @@ -56,27 +56,30 @@ do echo > $i/kvm-test-1-run-qemu.sh.out export TORTURE_AFFINITY= kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate - cat << ' ___EOF___' >> $T/cpubatches.awk - END { - affinitylist = ""; - if (!gotcpus()) { - print "echo No CPU-affinity information, so no taskset command."; - } else if (cpu_count !~ /^[0-9][0-9]*$/) { - print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; - } else { - affinitylist = nextcpus(cpu_count); - if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) - print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; - else if (!dumpcpustate()) - print "echo " scenario ": Could not dump state, so no taskset command."; - else - print "export TORTURE_AFFINITY=" affinitylist; + if test -z "${TORTURE_NO_AFFINITY}" + then + cat << ' ___EOF___' >> $T/cpubatches.awk + END { + affinitylist = ""; + if (!gotcpus()) { + print "echo No CPU-affinity information, so no taskset command."; + } else if (cpu_count !~ /^[0-9][0-9]*$/) { + print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command."; + } else { + affinitylist = nextcpus(cpu_count); + if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/)) + print "echo " scenario ": Bogus CPU-affinity information, so no taskset command."; + else if (!dumpcpustate()) + print "echo " scenario ": Could not dump state, so no taskset command."; + else + print "export TORTURE_AFFINITY=" affinitylist; + } } - } - ___EOF___ - cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" - affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" - $affinity_export + ___EOF___ + cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`" + affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`" + $affinity_export + fi kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 & done for i in $runfiles diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 7af73ddc148d..42e5e8597a1a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -42,6 +42,7 @@ TORTURE_JITTER_STOP="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" +TORTURE_NO_AFFINITY="" TORTURE_QEMU_MEM=512 torture_qemu_mem_default=1 TORTURE_REMOTE= @@ -82,6 +83,7 @@ usage () { echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" echo " --memory megabytes|nnnG" + echo " --no-affinity" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." @@ -220,6 +222,9 @@ do torture_qemu_mem_default= shift ;; + --no-affinity) + TORTURE_NO_AFFINITY="no-affinity" + ;; --no-initrd) TORTURE_INITRD=""; export TORTURE_INITRD ;; @@ -417,6 +422,7 @@ TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_K TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD +TORTURE_NO_AFFINITY="$TORTURE_NO_AFFINITY"; export TORTURE_NO_AFFINITY TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 98b6175e5aa0..45f572570a8c 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -5,6 +5,7 @@ TREE04 TREE05 TREE07 TREE09 +SRCU-L SRCU-N SRCU-P SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L new file mode 100644 index 000000000000..3b4fa8dbef8a --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L @@ -0,0 +1,10 @@ +CONFIG_RCU_TRACE=n +CONFIG_SMP=y +CONFIG_NR_CPUS=6 +CONFIG_HOTPLUG_CPU=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +#CHECK#CONFIG_RCU_EXPERT=n +CONFIG_KPROBES=n +CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot new file mode 100644 index 000000000000..0207b3138c5b --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot @@ -0,0 +1,3 @@ +rcutorture.torture_type=srcu +rcutorture.reader_flavor=0x4 +rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot index ce0694fd9b92..b54cf87dc110 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot @@ -1,2 +1,3 @@ rcutorture.torture_type=srcu +rcutorture.reader_flavor=0x2 rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 index a323d8948b7c..759ee51d3ddc 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=56 +CONFIG_NR_CPUS=74 CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c index 0c045080d808..3bbf3042fb06 100644 --- a/tools/testing/selftests/resctrl/cmt_test.c +++ b/tools/testing/selftests/resctrl/cmt_test.c @@ -99,14 +99,13 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of } /* Field 3 is llc occ resc value */ - if (runs > 0) - sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); + sum_llc_occu_resc += strtoul(token_array[3], NULL, 0); runs++; } fclose(fp); return show_results_info(sum_llc_occu_resc, no_of_bits, span, - MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true); + MAX_DIFF, MAX_DIFF_PERCENT, runs, true); } static void cmt_test_cleanup(void) @@ -116,15 +115,13 @@ static void cmt_test_cleanup(void) static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams) { - const char * const *cmd = uparams->benchmark_cmd; - const char *new_cmd[BENCHMARK_ARGS]; + struct fill_buf_param fill_buf = {}; unsigned long cache_total_size = 0; int n = uparams->bits ? : 5; unsigned long long_mask; - char *span_str = NULL; int count_of_bits; size_t span; - int ret, i; + int ret; ret = get_full_cbm("L3", &long_mask); if (ret) @@ -155,32 +152,26 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param span = cache_portion_size(cache_total_size, param.mask, long_mask); - if (strcmp(cmd[0], "fill_buf") == 0) { - /* Duplicate the command to be able to replace span in it */ - for (i = 0; uparams->benchmark_cmd[i]; i++) - new_cmd[i] = uparams->benchmark_cmd[i]; - new_cmd[i] = NULL; - - ret = asprintf(&span_str, "%zu", span); - if (ret < 0) - return -1; - new_cmd[1] = span_str; - cmd = new_cmd; + if (uparams->fill_buf) { + fill_buf.buf_size = span; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + fill_buf.buf_size = span; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; } remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, cmd, ¶m); + ret = resctrl_val(test, uparams, ¶m); if (ret) - goto out; + return ret; ret = check_results(¶m, span, n); if (ret && (get_vendor() == ARCH_INTEL)) ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); -out: - free(span_str); - return ret; } diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c index ae120f1735c0..19a01a52dc1a 100644 --- a/tools/testing/selftests/resctrl/fill_buf.c +++ b/tools/testing/selftests/resctrl/fill_buf.c @@ -88,18 +88,6 @@ static int fill_one_span_read(unsigned char *buf, size_t buf_size) return sum; } -static void fill_one_span_write(unsigned char *buf, size_t buf_size) -{ - unsigned char *end_ptr = buf + buf_size; - unsigned char *p; - - p = buf; - while (p < end_ptr) { - *p = '1'; - p += (CL_SIZE / 2); - } -} - void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) { int ret = 0; @@ -114,20 +102,11 @@ void fill_cache_read(unsigned char *buf, size_t buf_size, bool once) *value_sink = ret; } -static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once) -{ - while (1) { - fill_one_span_write(buf, buf_size); - if (once) - break; - } -} - -unsigned char *alloc_buffer(size_t buf_size, int memflush) +unsigned char *alloc_buffer(size_t buf_size, bool memflush) { void *buf = NULL; uint64_t *p64; - size_t s64; + ssize_t s64; int ret; ret = posix_memalign(&buf, PAGE_SIZE, buf_size); @@ -151,19 +130,15 @@ unsigned char *alloc_buffer(size_t buf_size, int memflush) return buf; } -int run_fill_buf(size_t buf_size, int memflush, int op, bool once) +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type) { - unsigned char *buf; - - buf = alloc_buffer(buf_size, memflush); - if (!buf) - return -1; + unsigned long cache_total_size = 0; + int ret; - if (op == 0) - fill_cache_read(buf, buf_size, once); - else - fill_cache_write(buf, buf_size, once); - free(buf); + ret = get_cache_size(cpu_no, cache_type, &cache_total_size); + if (ret) + return ret; - return 0; + return cache_total_size * 2 > MINIMUM_SPAN ? + cache_total_size * 2 : MINIMUM_SPAN; } diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c index ab8496a4925b..536d9089d2f6 100644 --- a/tools/testing/selftests/resctrl/mba_test.c +++ b/tools/testing/selftests/resctrl/mba_test.c @@ -21,7 +21,7 @@ static int mba_init(const struct resctrl_val_param *param, int domain_id) { int ret; - ret = initialize_mem_bw_imc(); + ret = initialize_read_mem_bw_imc(); if (ret) return ret; @@ -39,7 +39,8 @@ static int mba_setup(const struct resctrl_test *test, const struct user_params *uparams, struct resctrl_val_param *p) { - static int runs_per_allocation, allocation = 100; + static unsigned int allocation = ALLOCATION_MIN; + static int runs_per_allocation; char allocation_str[64]; int ret; @@ -50,7 +51,7 @@ static int mba_setup(const struct resctrl_test *test, if (runs_per_allocation++ != 0) return 0; - if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX) + if (allocation > ALLOCATION_MAX) return END_OF_TESTS; sprintf(allocation_str, "%d", allocation); @@ -59,7 +60,7 @@ static int mba_setup(const struct resctrl_test *test, if (ret < 0) return ret; - allocation -= ALLOCATION_STEP; + allocation += ALLOCATION_STEP; return 0; } @@ -67,13 +68,14 @@ static int mba_setup(const struct resctrl_test *test, static int mba_measure(const struct user_params *uparams, struct resctrl_val_param *param, pid_t bm_pid) { - return measure_mem_bw(uparams, param, bm_pid, "reads"); + return measure_read_mem_bw(uparams, param, bm_pid); } static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) { - int allocation, runs; + unsigned int allocation; bool ret = false; + int runs; ksft_print_msg("Results are displayed in (MB)\n"); /* Memory bandwidth from 100% down to 10% */ @@ -84,18 +86,21 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) int avg_diff_per; float avg_diff; - /* - * The first run is discarded due to inaccurate value from - * phase transition. - */ - for (runs = NUM_OF_RUNS * allocation + 1; + for (runs = NUM_OF_RUNS * allocation; runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1); - avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1); + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; + if (avg_bw_imc < THROTTLE_THRESHOLD || avg_bw_resc < THROTTLE_THRESHOLD) { + ksft_print_msg("Bandwidth below threshold (%d MiB). Dropping results from MBA schemata %u.\n", + THROTTLE_THRESHOLD, + ALLOCATION_MIN + ALLOCATION_STEP * allocation); + continue; + } + avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -103,7 +108,7 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) avg_diff_per > MAX_DIFF_PERCENT ? "Fail:" : "Pass:", MAX_DIFF_PERCENT, - ALLOCATION_MAX - ALLOCATION_STEP * allocation); + ALLOCATION_MIN + ALLOCATION_STEP * allocation); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); @@ -122,8 +127,9 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc) static int check_results(void) { + unsigned long bw_resc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; + unsigned long bw_imc[NUM_OF_RUNS * ALLOCATION_MAX / ALLOCATION_STEP]; char *token_array[8], output[] = RESULT_FILE_NAME, temp[512]; - unsigned long bw_imc[1024], bw_resc[1024]; int runs; FILE *fp; @@ -170,11 +176,27 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param .setup = mba_setup, .measure = mba_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); if (ret) return ret; diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c index 6b5a3b52d861..315b2ef3b3bc 100644 --- a/tools/testing/selftests/resctrl/mbm_test.c +++ b/tools/testing/selftests/resctrl/mbm_test.c @@ -22,17 +22,13 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) int runs, ret, avg_diff_per; float avg_diff = 0; - /* - * Discard the first value which is inaccurate due to monitoring setup - * transition phase. - */ - for (runs = 1; runs < NUM_OF_RUNS ; runs++) { + for (runs = 0; runs < NUM_OF_RUNS; runs++) { sum_bw_imc += bw_imc[runs]; sum_bw_resc += bw_resc[runs]; } - avg_bw_imc = sum_bw_imc / 4; - avg_bw_resc = sum_bw_resc / 4; + avg_bw_imc = sum_bw_imc / NUM_OF_RUNS; + avg_bw_resc = sum_bw_resc / NUM_OF_RUNS; avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc; avg_diff_per = (int)(avg_diff * 100); @@ -40,7 +36,8 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span) ksft_print_msg("%s Check MBM diff within %d%%\n", ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT); ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per); - ksft_print_msg("Span (MB): %zu\n", span / MB); + if (span) + ksft_print_msg("Span (MB): %zu\n", span / MB); ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc); ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc); @@ -90,7 +87,7 @@ static int mbm_init(const struct resctrl_val_param *param, int domain_id) { int ret; - ret = initialize_mem_bw_imc(); + ret = initialize_read_mem_bw_imc(); if (ret) return ret; @@ -121,7 +118,7 @@ static int mbm_setup(const struct resctrl_test *test, static int mbm_measure(const struct user_params *uparams, struct resctrl_val_param *param, pid_t bm_pid) { - return measure_mem_bw(uparams, param, bm_pid, "reads"); + return measure_read_mem_bw(uparams, param, bm_pid); } static void mbm_test_cleanup(void) @@ -138,15 +135,31 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param .setup = mbm_setup, .measure = mbm_measure, }; + struct fill_buf_param fill_buf = {}; int ret; remove(RESULT_FILE_NAME); - ret = resctrl_val(test, uparams, uparams->benchmark_cmd, ¶m); + if (uparams->fill_buf) { + fill_buf.buf_size = uparams->fill_buf->buf_size; + fill_buf.memflush = uparams->fill_buf->memflush; + param.fill_buf = &fill_buf; + } else if (!uparams->benchmark_cmd[0]) { + ssize_t buf_size; + + buf_size = get_fill_buf_size(uparams->cpu, "L3"); + if (buf_size < 0) + return buf_size; + fill_buf.buf_size = buf_size; + fill_buf.memflush = true; + param.fill_buf = &fill_buf; + } + + ret = resctrl_val(test, uparams, ¶m); if (ret) return ret; - ret = check_results(DEFAULT_SPAN); + ret = check_results(param.fill_buf ? param.fill_buf->buf_size : 0); if (ret && (get_vendor() == ARCH_INTEL)) ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n"); diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h index 2dda56084588..dab1953fc7a0 100644 --- a/tools/testing/selftests/resctrl/resctrl.h +++ b/tools/testing/selftests/resctrl/resctrl.h @@ -41,18 +41,48 @@ #define BENCHMARK_ARGS 64 -#define DEFAULT_SPAN (250 * MB) +#define MINIMUM_SPAN (250 * MB) + +/* + * Memory bandwidth (in MiB) below which the bandwidth comparisons + * between iMC and resctrl are considered unreliable. For example RAS + * features or memory performance features that generate memory traffic + * may drive accesses that are counted differently by performance counters + * and MBM respectively, for instance generating "overhead" traffic which + * is not counted against any specific RMID. + */ +#define THROTTLE_THRESHOLD 750 + +/* + * fill_buf_param: "fill_buf" benchmark parameters + * @buf_size: Size (in bytes) of buffer used in benchmark. + * "fill_buf" allocates and initializes buffer of + * @buf_size. User can change value via command line. + * @memflush: If false the buffer will not be flushed after + * allocation and initialization, otherwise the + * buffer will be flushed. User can change value via + * command line (via integers with 0 interpreted as + * false and anything else as true). + */ +struct fill_buf_param { + size_t buf_size; + bool memflush; +}; /* * user_params: User supplied parameters * @cpu: CPU number to which the benchmark will be bound to * @bits: Number of bits used for cache allocation size * @benchmark_cmd: Benchmark command to run during (some of the) tests + * @fill_buf: Pointer to user provided parameters for "fill_buf", + * NULL if user did not provide parameters and test + * specific defaults should be used. */ struct user_params { int cpu; int bits; const char *benchmark_cmd[BENCHMARK_ARGS]; + const struct fill_buf_param *fill_buf; }; /* @@ -87,21 +117,29 @@ struct resctrl_test { * @init: Callback function to initialize test environment * @setup: Callback function to setup per test run environment * @measure: Callback that performs the measurement (a single test) + * @fill_buf: Parameters for default "fill_buf" benchmark. + * Initialized with user provided parameters, possibly + * adapted to be relevant to the test. If user does + * not provide parameters for "fill_buf" nor a + * replacement benchmark then initialized with defaults + * appropriate for test. NULL if user provided + * benchmark. */ struct resctrl_val_param { - const char *ctrlgrp; - const char *mongrp; - char filename[64]; - unsigned long mask; - int num_of_runs; - int (*init)(const struct resctrl_val_param *param, - int domain_id); - int (*setup)(const struct resctrl_test *test, - const struct user_params *uparams, - struct resctrl_val_param *param); - int (*measure)(const struct user_params *uparams, - struct resctrl_val_param *param, - pid_t bm_pid); + const char *ctrlgrp; + const char *mongrp; + char filename[64]; + unsigned long mask; + int num_of_runs; + int (*init)(const struct resctrl_val_param *param, + int domain_id); + int (*setup)(const struct resctrl_test *test, + const struct user_params *uparams, + struct resctrl_val_param *param); + int (*measure)(const struct user_params *uparams, + struct resctrl_val_param *param, + pid_t bm_pid); + struct fill_buf_param *fill_buf; }; struct perf_event_read { @@ -126,7 +164,6 @@ int filter_dmesg(void); int get_domain_id(const char *resource, int cpu_no, int *domain_id); int mount_resctrlfs(void); int umount_resctrlfs(void); -const char *get_bw_report_type(const char *bw_report); bool resctrl_resource_exists(const char *resource); bool resctrl_mon_feature_exists(const char *resource, const char *feature); bool resource_info_file_exists(const char *resource, const char *file); @@ -139,19 +176,17 @@ int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no, int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp); int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags); -unsigned char *alloc_buffer(size_t buf_size, int memflush); +unsigned char *alloc_buffer(size_t buf_size, bool memflush); void mem_flush(unsigned char *buf, size_t buf_size); void fill_cache_read(unsigned char *buf, size_t buf_size, bool once); -int run_fill_buf(size_t buf_size, int memflush, int op, bool once); -int initialize_mem_bw_imc(void); -int measure_mem_bw(const struct user_params *uparams, - struct resctrl_val_param *param, pid_t bm_pid, - const char *bw_report); +ssize_t get_fill_buf_size(int cpu_no, const char *cache_type); +int initialize_read_mem_bw_imc(void); +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid); void initialize_mem_bw_resctrl(const struct resctrl_val_param *param, int domain_id); int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param); unsigned long create_bit_mask(unsigned int start, unsigned int len); unsigned int count_contiguous_bits(unsigned long val, unsigned int *start); diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c index ecbb7605a981..3335af815b21 100644 --- a/tools/testing/selftests/resctrl/resctrl_tests.c +++ b/tools/testing/selftests/resctrl/resctrl_tests.c @@ -148,6 +148,78 @@ cleanup: test_cleanup(test); } +/* + * Allocate and initialize a struct fill_buf_param with user provided + * (via "-b fill_buf <fill_buf parameters>") parameters. + * + * Use defaults (that may not be appropriate for all tests) for any + * fill_buf parameters omitted by the user. + * + * Historically it may have been possible for user space to provide + * additional parameters, "operation" ("read" vs "write") in + * benchmark_cmd[3] and "once" (run "once" or until terminated) in + * benchmark_cmd[4]. Changing these parameters have never been + * supported with the default of "read" operation and running until + * terminated built into the tests. Any unsupported values for + * (original) "fill_buf" parameters are treated as failure. + * + * Return: On failure, forcibly exits the test on any parsing failure, + * returns NULL if no parsing needed (user did not actually provide + * "-b fill_buf"). + * On success, returns pointer to newly allocated and fully + * initialized struct fill_buf_param that caller must free. + */ +static struct fill_buf_param *alloc_fill_buf_param(struct user_params *uparams) +{ + struct fill_buf_param *fill_param = NULL; + char *endptr = NULL; + + if (!uparams->benchmark_cmd[0] || strcmp(uparams->benchmark_cmd[0], "fill_buf")) + return NULL; + + fill_param = malloc(sizeof(*fill_param)); + if (!fill_param) + ksft_exit_skip("Unable to allocate memory for fill_buf parameters.\n"); + + if (uparams->benchmark_cmd[1] && *uparams->benchmark_cmd[1] != '\0') { + errno = 0; + fill_param->buf_size = strtoul(uparams->benchmark_cmd[1], &endptr, 10); + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark buffer size.\n"); + } + } else { + fill_param->buf_size = MINIMUM_SPAN; + } + + if (uparams->benchmark_cmd[2] && *uparams->benchmark_cmd[2] != '\0') { + errno = 0; + fill_param->memflush = strtol(uparams->benchmark_cmd[2], &endptr, 10) != 0; + if (errno || *endptr != '\0') { + free(fill_param); + ksft_exit_skip("Unable to parse benchmark memflush parameter.\n"); + } + } else { + fill_param->memflush = true; + } + + if (uparams->benchmark_cmd[3] && *uparams->benchmark_cmd[3] != '\0') { + if (strcmp(uparams->benchmark_cmd[3], "0")) { + free(fill_param); + ksft_exit_skip("Only read operations supported.\n"); + } + } + + if (uparams->benchmark_cmd[4] && *uparams->benchmark_cmd[4] != '\0') { + if (strcmp(uparams->benchmark_cmd[4], "false")) { + free(fill_param); + ksft_exit_skip("fill_buf is required to run until termination.\n"); + } + } + + return fill_param; +} + static void init_user_params(struct user_params *uparams) { memset(uparams, 0, sizeof(*uparams)); @@ -158,11 +230,11 @@ static void init_user_params(struct user_params *uparams) int main(int argc, char **argv) { + struct fill_buf_param *fill_param = NULL; int tests = ARRAY_SIZE(resctrl_tests); bool test_param_seen = false; struct user_params uparams; - char *span_str = NULL; - int ret, c, i; + int c, i; init_user_params(&uparams); @@ -239,6 +311,10 @@ int main(int argc, char **argv) } last_arg: + fill_param = alloc_fill_buf_param(&uparams); + if (fill_param) + uparams.fill_buf = fill_param; + ksft_print_header(); /* @@ -257,24 +333,11 @@ last_arg: filter_dmesg(); - if (!uparams.benchmark_cmd[0]) { - /* If no benchmark is given by "-b" argument, use fill_buf. */ - uparams.benchmark_cmd[0] = "fill_buf"; - ret = asprintf(&span_str, "%u", DEFAULT_SPAN); - if (ret < 0) - ksft_exit_fail_msg("Out of memory!\n"); - uparams.benchmark_cmd[1] = span_str; - uparams.benchmark_cmd[2] = "1"; - uparams.benchmark_cmd[3] = "0"; - uparams.benchmark_cmd[4] = "false"; - uparams.benchmark_cmd[5] = NULL; - } - ksft_set_plan(tests); for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) run_single_test(resctrl_tests[i], &uparams); - free(span_str); + free(fill_param); ksft_finished(); } diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c index 8c275f6b4dd7..7c08e936572d 100644 --- a/tools/testing/selftests/resctrl/resctrl_val.c +++ b/tools/testing/selftests/resctrl/resctrl_val.c @@ -12,13 +12,10 @@ #define UNCORE_IMC "uncore_imc" #define READ_FILE_NAME "events/cas_count_read" -#define WRITE_FILE_NAME "events/cas_count_write" #define DYN_PMU_PATH "/sys/bus/event_source/devices" #define SCALE 0.00006103515625 #define MAX_IMCS 20 #define MAX_TOKENS 5 -#define READ 0 -#define WRITE 1 #define CON_MBM_LOCAL_BYTES_PATH \ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes" @@ -41,85 +38,71 @@ struct imc_counter_config { static char mbm_total_path[1024]; static int imcs; -static struct imc_counter_config imc_counters_config[MAX_IMCS][2]; +static struct imc_counter_config imc_counters_config[MAX_IMCS]; static const struct resctrl_test *current_test; -void membw_initialize_perf_event_attr(int i, int j) +static void read_mem_bw_initialize_perf_event_attr(int i) { - memset(&imc_counters_config[i][j].pe, 0, + memset(&imc_counters_config[i].pe, 0, sizeof(struct perf_event_attr)); - imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type; - imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr); - imc_counters_config[i][j].pe.disabled = 1; - imc_counters_config[i][j].pe.inherit = 1; - imc_counters_config[i][j].pe.exclude_guest = 0; - imc_counters_config[i][j].pe.config = - imc_counters_config[i][j].umask << 8 | - imc_counters_config[i][j].event; - imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER; - imc_counters_config[i][j].pe.read_format = + imc_counters_config[i].pe.type = imc_counters_config[i].type; + imc_counters_config[i].pe.size = sizeof(struct perf_event_attr); + imc_counters_config[i].pe.disabled = 1; + imc_counters_config[i].pe.inherit = 1; + imc_counters_config[i].pe.exclude_guest = 0; + imc_counters_config[i].pe.config = + imc_counters_config[i].umask << 8 | + imc_counters_config[i].event; + imc_counters_config[i].pe.sample_type = PERF_SAMPLE_IDENTIFIER; + imc_counters_config[i].pe.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; } -void membw_ioctl_perf_event_ioc_reset_enable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_reset_enable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0); - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_RESET, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_ENABLE, 0); } -void membw_ioctl_perf_event_ioc_disable(int i, int j) +static void read_mem_bw_ioctl_perf_event_ioc_disable(int i) { - ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0); + ioctl(imc_counters_config[i].fd, PERF_EVENT_IOC_DISABLE, 0); } /* - * get_event_and_umask: Parse config into event and umask + * get_read_event_and_umask: Parse config into event and umask * @cas_count_cfg: Config * @count: iMC number - * @op: Operation (read/write) */ -void get_event_and_umask(char *cas_count_cfg, int count, bool op) +static void get_read_event_and_umask(char *cas_count_cfg, int count) { char *token[MAX_TOKENS]; int i = 0; - strcat(cas_count_cfg, ","); token[0] = strtok(cas_count_cfg, "=,"); for (i = 1; i < MAX_TOKENS; i++) token[i] = strtok(NULL, "=,"); - for (i = 0; i < MAX_TOKENS; i++) { + for (i = 0; i < MAX_TOKENS - 1; i++) { if (!token[i]) break; - if (strcmp(token[i], "event") == 0) { - if (op == READ) - imc_counters_config[count][READ].event = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].event = - strtol(token[i + 1], NULL, 16); - } - if (strcmp(token[i], "umask") == 0) { - if (op == READ) - imc_counters_config[count][READ].umask = - strtol(token[i + 1], NULL, 16); - else - imc_counters_config[count][WRITE].umask = - strtol(token[i + 1], NULL, 16); - } + if (strcmp(token[i], "event") == 0) + imc_counters_config[count].event = strtol(token[i + 1], NULL, 16); + if (strcmp(token[i], "umask") == 0) + imc_counters_config[count].umask = strtol(token[i + 1], NULL, 16); } } -static int open_perf_event(int i, int cpu_no, int j) +static int open_perf_read_event(int i, int cpu_no) { - imc_counters_config[i][j].fd = - perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1, + imc_counters_config[i].fd = + perf_event_open(&imc_counters_config[i].pe, -1, cpu_no, -1, PERF_FLAG_FD_CLOEXEC); - if (imc_counters_config[i][j].fd == -1) { + if (imc_counters_config[i].fd == -1) { fprintf(stderr, "Error opening leader %llx\n", - imc_counters_config[i][j].pe.config); + imc_counters_config[i].pe.config); return -1; } @@ -127,7 +110,7 @@ static int open_perf_event(int i, int cpu_no, int j) return 0; } -/* Get type and config (read and write) of an iMC counter */ +/* Get type and config of an iMC counter's read event. */ static int read_from_imc_dir(char *imc_dir, int count) { char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024]; @@ -141,7 +124,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) { + if (fscanf(fp, "%u", &imc_counters_config[count].type) <= 0) { ksft_perror("Could not get iMC type"); fclose(fp); @@ -149,9 +132,6 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - imc_counters_config[count][WRITE].type = - imc_counters_config[count][READ].type; - /* Get read config */ sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME); fp = fopen(imc_counter_cfg, "r"); @@ -160,7 +140,7 @@ static int read_from_imc_dir(char *imc_dir, int count) return -1; } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { + if (fscanf(fp, "%1023s", cas_count_cfg) <= 0) { ksft_perror("Could not get iMC cas count read"); fclose(fp); @@ -168,34 +148,19 @@ static int read_from_imc_dir(char *imc_dir, int count) } fclose(fp); - get_event_and_umask(cas_count_cfg, count, READ); - - /* Get write config */ - sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME); - fp = fopen(imc_counter_cfg, "r"); - if (!fp) { - ksft_perror("Failed to open iMC config file"); - - return -1; - } - if (fscanf(fp, "%s", cas_count_cfg) <= 0) { - ksft_perror("Could not get iMC cas count write"); - fclose(fp); - - return -1; - } - fclose(fp); - - get_event_and_umask(cas_count_cfg, count, WRITE); + get_read_event_and_umask(cas_count_cfg, count); return 0; } /* * A system can have 'n' number of iMC (Integrated Memory Controller) - * counters, get that 'n'. For each iMC counter get it's type and config. - * Also, each counter has two configs, one for read and the other for write. - * A config again has two parts, event and umask. + * counters, get that 'n'. Discover the properties of the available + * counters in support of needed performance measurement via perf. + * For each iMC counter get it's type and config. Also obtain each + * counter's event and umask for the memory read events that will be + * measured. + * * Enumerate all these details into an array of structures. * * Return: >= 0 on success. < 0 on failure. @@ -256,55 +221,46 @@ static int num_of_imcs(void) return count; } -int initialize_mem_bw_imc(void) +int initialize_read_mem_bw_imc(void) { - int imc, j; + int imc; imcs = num_of_imcs(); if (imcs <= 0) return imcs; /* Initialize perf_event_attr structures for all iMC's */ - for (imc = 0; imc < imcs; imc++) { - for (j = 0; j < 2; j++) - membw_initialize_perf_event_attr(imc, j); - } + for (imc = 0; imc < imcs; imc++) + read_mem_bw_initialize_perf_event_attr(imc); return 0; } -static void perf_close_imc_mem_bw(void) +static void perf_close_imc_read_mem_bw(void) { int mc; for (mc = 0; mc < imcs; mc++) { - if (imc_counters_config[mc][READ].fd != -1) - close(imc_counters_config[mc][READ].fd); - if (imc_counters_config[mc][WRITE].fd != -1) - close(imc_counters_config[mc][WRITE].fd); + if (imc_counters_config[mc].fd != -1) + close(imc_counters_config[mc].fd); } } /* - * perf_open_imc_mem_bw - Open perf fds for IMCs + * perf_open_imc_read_mem_bw - Open perf fds for IMCs * @cpu_no: CPU number that the benchmark PID is bound to * * Return: = 0 on success. < 0 on failure. */ -static int perf_open_imc_mem_bw(int cpu_no) +static int perf_open_imc_read_mem_bw(int cpu_no) { int imc, ret; - for (imc = 0; imc < imcs; imc++) { - imc_counters_config[imc][READ].fd = -1; - imc_counters_config[imc][WRITE].fd = -1; - } + for (imc = 0; imc < imcs; imc++) + imc_counters_config[imc].fd = -1; for (imc = 0; imc < imcs; imc++) { - ret = open_perf_event(imc, cpu_no, READ); - if (ret) - goto close_fds; - ret = open_perf_event(imc, cpu_no, WRITE); + ret = open_perf_read_event(imc, cpu_no); if (ret) goto close_fds; } @@ -312,60 +268,52 @@ static int perf_open_imc_mem_bw(int cpu_no) return 0; close_fds: - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); return -1; } /* - * do_mem_bw_test - Perform memory bandwidth test + * do_imc_read_mem_bw_test - Perform memory bandwidth test * * Runs memory bandwidth test over one second period. Also, handles starting * and stopping of the IMC perf counters around the test. */ -static void do_imc_mem_bw_test(void) +static void do_imc_read_mem_bw_test(void) { int imc; - for (imc = 0; imc < imcs; imc++) { - membw_ioctl_perf_event_ioc_reset_enable(imc, READ); - membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE); - } + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_reset_enable(imc); sleep(1); - /* Stop counters after a second to get results (both read and write) */ - for (imc = 0; imc < imcs; imc++) { - membw_ioctl_perf_event_ioc_disable(imc, READ); - membw_ioctl_perf_event_ioc_disable(imc, WRITE); - } + /* Stop counters after a second to get results. */ + for (imc = 0; imc < imcs; imc++) + read_mem_bw_ioctl_perf_event_ioc_disable(imc); } /* - * get_mem_bw_imc - Memory bandwidth as reported by iMC counters - * @bw_report: Bandwidth report type (reads, writes) + * get_read_mem_bw_imc - Memory read bandwidth as reported by iMC counters * - * Memory bandwidth utilized by a process on a socket can be calculated - * using iMC counters. Perf events are used to read these counters. + * Memory read bandwidth utilized by a process on a socket can be calculated + * using iMC counters' read events. Perf events are used to read these + * counters. * * Return: = 0 on success. < 0 on failure. */ -static int get_mem_bw_imc(const char *bw_report, float *bw_imc) +static int get_read_mem_bw_imc(float *bw_imc) { - float reads, writes, of_mul_read, of_mul_write; + float reads = 0, of_mul_read = 1; int imc; - /* Start all iMC counters to log values (both read and write) */ - reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1; - /* - * Get results which are stored in struct type imc_counter_config + * Log read event values from all iMC counters into + * struct imc_counter_config. * Take overflow into consideration before calculating total bandwidth. */ for (imc = 0; imc < imcs; imc++) { struct imc_counter_config *r = - &imc_counters_config[imc][READ]; - struct imc_counter_config *w = - &imc_counters_config[imc][WRITE]; + &imc_counters_config[imc]; if (read(r->fd, &r->return_value, sizeof(struct membw_read_format)) == -1) { @@ -373,12 +321,6 @@ static int get_mem_bw_imc(const char *bw_report, float *bw_imc) return -1; } - if (read(w->fd, &w->return_value, - sizeof(struct membw_read_format)) == -1) { - ksft_perror("Couldn't get write bandwidth through iMC"); - return -1; - } - __u64 r_time_enabled = r->return_value.time_enabled; __u64 r_time_running = r->return_value.time_running; @@ -386,27 +328,10 @@ static int get_mem_bw_imc(const char *bw_report, float *bw_imc) of_mul_read = (float)r_time_enabled / (float)r_time_running; - __u64 w_time_enabled = w->return_value.time_enabled; - __u64 w_time_running = w->return_value.time_running; - - if (w_time_enabled != w_time_running) - of_mul_write = (float)w_time_enabled / - (float)w_time_running; reads += r->return_value.value * of_mul_read * SCALE; - writes += w->return_value.value * of_mul_write * SCALE; } - if (strcmp(bw_report, "reads") == 0) { - *bw_imc = reads; - return 0; - } - - if (strcmp(bw_report, "writes") == 0) { - *bw_imc = writes; - return 0; - } - - *bw_imc = reads + writes; + *bw_imc = reads; return 0; } @@ -448,7 +373,7 @@ static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total) return 0; } -static pid_t bm_pid, ppid; +static pid_t bm_pid; void ctrlc_handler(int signum, siginfo_t *info, void *ptr) { @@ -506,13 +431,6 @@ void signal_handler_unregister(void) } } -static void parent_exit(pid_t ppid) -{ - kill(ppid, SIGKILL); - umount_resctrlfs(); - exit(EXIT_FAILURE); -} - /* * print_results_bw: the memory bandwidth results are stored in a file * @filename: file that stores the results @@ -552,35 +470,31 @@ static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc, } /* - * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs + * measure_read_mem_bw - Measures read memory bandwidth numbers while benchmark runs * @uparams: User supplied parameters * @param: Parameters passed to resctrl_val() * @bm_pid: PID that runs the benchmark - * @bw_report: Bandwidth report type (reads, writes) * * Measure memory bandwidth from resctrl and from another source which is * perf imc value or could be something else if perf imc event is not * available. Compare the two values to validate resctrl value. It takes * 1 sec to measure the data. + * resctrl does not distinguish between read and write operations so + * its data includes all memory operations. */ -int measure_mem_bw(const struct user_params *uparams, - struct resctrl_val_param *param, pid_t bm_pid, - const char *bw_report) +int measure_read_mem_bw(const struct user_params *uparams, + struct resctrl_val_param *param, pid_t bm_pid) { unsigned long bw_resc, bw_resc_start, bw_resc_end; FILE *mem_bw_fp; float bw_imc; int ret; - bw_report = get_bw_report_type(bw_report); - if (!bw_report) - return -1; - mem_bw_fp = open_mem_bw_resctrl(mbm_total_path); if (!mem_bw_fp) return -1; - ret = perf_open_imc_mem_bw(uparams->cpu); + ret = perf_open_imc_read_mem_bw(uparams->cpu); if (ret < 0) goto close_fp; @@ -590,17 +504,17 @@ int measure_mem_bw(const struct user_params *uparams, rewind(mem_bw_fp); - do_imc_mem_bw_test(); + do_imc_read_mem_bw_test(); ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end); if (ret < 0) goto close_imc; - ret = get_mem_bw_imc(bw_report, &bw_imc); + ret = get_read_mem_bw_imc(&bw_imc); if (ret < 0) goto close_imc; - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); fclose(mem_bw_fp); bw_resc = (bw_resc_end - bw_resc_start) / MB; @@ -608,87 +522,30 @@ int measure_mem_bw(const struct user_params *uparams, return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc); close_imc: - perf_close_imc_mem_bw(); + perf_close_imc_read_mem_bw(); close_fp: fclose(mem_bw_fp); return ret; } /* - * run_benchmark - Run a specified benchmark or fill_buf (default benchmark) - * in specified signal. Direct benchmark stdio to /dev/null. - * @signum: signal number - * @info: signal info - * @ucontext: user context in signal handling - */ -static void run_benchmark(int signum, siginfo_t *info, void *ucontext) -{ - int operation, ret, memflush; - char **benchmark_cmd; - size_t span; - bool once; - FILE *fp; - - benchmark_cmd = info->si_ptr; - - /* - * Direct stdio of child to /dev/null, so that only parent writes to - * stdio (console) - */ - fp = freopen("/dev/null", "w", stdout); - if (!fp) { - ksft_perror("Unable to direct benchmark status to /dev/null"); - parent_exit(ppid); - } - - if (strcmp(benchmark_cmd[0], "fill_buf") == 0) { - /* Execute default fill_buf benchmark */ - span = strtoul(benchmark_cmd[1], NULL, 10); - memflush = atoi(benchmark_cmd[2]); - operation = atoi(benchmark_cmd[3]); - if (!strcmp(benchmark_cmd[4], "true")) { - once = true; - } else if (!strcmp(benchmark_cmd[4], "false")) { - once = false; - } else { - ksft_print_msg("Invalid once parameter\n"); - parent_exit(ppid); - } - - if (run_fill_buf(span, memflush, operation, once)) - fprintf(stderr, "Error in running fill buffer\n"); - } else { - /* Execute specified benchmark */ - ret = execvp(benchmark_cmd[0], benchmark_cmd); - if (ret) - ksft_perror("execvp"); - } - - fclose(stdout); - ksft_print_msg("Unable to run specified benchmark\n"); - parent_exit(ppid); -} - -/* * resctrl_val: execute benchmark and measure memory bandwidth on * the benchmark * @test: test information structure * @uparams: user supplied parameters - * @benchmark_cmd: benchmark command and its arguments * @param: parameters passed to resctrl_val() * * Return: 0 when the test was run, < 0 on error. */ int resctrl_val(const struct resctrl_test *test, const struct user_params *uparams, - const char * const *benchmark_cmd, struct resctrl_val_param *param) { - struct sigaction sigact; - int ret = 0, pipefd[2]; - char pipe_message = 0; - union sigval value; + unsigned char *buf = NULL; + cpu_set_t old_affinity; int domain_id; + int ret = 0; + pid_t ppid; if (strcmp(param->filename, "") == 0) sprintf(param->filename, "stdio"); @@ -699,111 +556,65 @@ int resctrl_val(const struct resctrl_test *test, return ret; } - /* - * If benchmark wasn't successfully started by child, then child should - * kill parent, so save parent's pid - */ ppid = getpid(); - if (pipe(pipefd)) { - ksft_perror("Unable to create pipe"); + /* Taskset test to specified CPU. */ + ret = taskset_benchmark(ppid, uparams->cpu, &old_affinity); + if (ret) + return ret; - return -1; + /* Write test to specified control & monitoring group in resctrl FS. */ + ret = write_bm_pid_to_resctrl(ppid, param->ctrlgrp, param->mongrp); + if (ret) + goto reset_affinity; + + if (param->init) { + ret = param->init(param, domain_id); + if (ret) + goto reset_affinity; } /* - * Fork to start benchmark, save child's pid so that it can be killed - * when needed + * If not running user provided benchmark, run the default + * "fill_buf". First phase of "fill_buf" is to prepare the + * buffer that the benchmark will operate on. No measurements + * are needed during this phase and prepared memory will be + * passed to next part of benchmark via copy-on-write thus + * no impact on the benchmark that relies on reading from + * memory only. */ + if (param->fill_buf) { + buf = alloc_buffer(param->fill_buf->buf_size, + param->fill_buf->memflush); + if (!buf) { + ret = -ENOMEM; + goto reset_affinity; + } + } + fflush(stdout); bm_pid = fork(); if (bm_pid == -1) { + ret = -errno; ksft_perror("Unable to fork"); - - return -1; + goto free_buf; } - if (bm_pid == 0) { - /* - * Mask all signals except SIGUSR1, parent uses SIGUSR1 to - * start benchmark - */ - sigfillset(&sigact.sa_mask); - sigdelset(&sigact.sa_mask, SIGUSR1); - - sigact.sa_sigaction = run_benchmark; - sigact.sa_flags = SA_SIGINFO; - - /* Register for "SIGUSR1" signal from parent */ - if (sigaction(SIGUSR1, &sigact, NULL)) { - ksft_perror("Can't register child for signal"); - parent_exit(ppid); - } - - /* Tell parent that child is ready */ - close(pipefd[0]); - pipe_message = 1; - if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed signaling parent process"); - close(pipefd[1]); - return -1; - } - close(pipefd[1]); - - /* Suspend child until delivery of "SIGUSR1" from parent */ - sigsuspend(&sigact.sa_mask); - - ksft_perror("Child is done"); - parent_exit(ppid); - } - - ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); - /* - * The cast removes constness but nothing mutates benchmark_cmd within - * the context of this process. At the receiving process, it becomes - * argv, which is mutable, on exec() but that's after fork() so it - * doesn't matter for the process running the tests. + * What needs to be measured runs in separate process until + * terminated. */ - value.sival_ptr = (void *)benchmark_cmd; - - /* Taskset benchmark to specified cpu */ - ret = taskset_benchmark(bm_pid, uparams->cpu, NULL); - if (ret) - goto out; - - /* Write benchmark to specified control&monitoring grp in resctrl FS */ - ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp); - if (ret) - goto out; - - if (param->init) { - ret = param->init(param, domain_id); - if (ret) - goto out; - } - - /* Parent waits for child to be ready. */ - close(pipefd[1]); - while (pipe_message != 1) { - if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) < - sizeof(pipe_message)) { - ksft_perror("Failed reading message from child process"); - close(pipefd[0]); - goto out; - } + if (bm_pid == 0) { + if (param->fill_buf) + fill_cache_read(buf, param->fill_buf->buf_size, false); + else if (uparams->benchmark_cmd[0]) + execvp(uparams->benchmark_cmd[0], (char **)uparams->benchmark_cmd); + exit(EXIT_SUCCESS); } - close(pipefd[0]); - /* Signal child to start benchmark */ - if (sigqueue(bm_pid, SIGUSR1, value) == -1) { - ksft_perror("sigqueue SIGUSR1 to child"); - ret = -1; - goto out; - } + ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid); - /* Give benchmark enough time to fully run */ + /* Give benchmark enough time to fully run. */ sleep(1); /* Test runs until the callback setup() tells the test to stop. */ @@ -821,8 +632,10 @@ int resctrl_val(const struct resctrl_test *test, break; } -out: kill(bm_pid, SIGKILL); - +free_buf: + free(buf); +reset_affinity: + taskset_restore(ppid, &old_affinity); return ret; } diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c index 250c320349a7..d38d6dd90be4 100644 --- a/tools/testing/selftests/resctrl/resctrlfs.c +++ b/tools/testing/selftests/resctrl/resctrlfs.c @@ -182,7 +182,7 @@ int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size return -1; } - if (fscanf(fp, "%s", cache_str) <= 0) { + if (fscanf(fp, "%63s", cache_str) <= 0) { ksft_perror("Could not get cache_size"); fclose(fp); @@ -831,23 +831,6 @@ int filter_dmesg(void) return 0; } -const char *get_bw_report_type(const char *bw_report) -{ - if (strcmp(bw_report, "reads") == 0) - return bw_report; - if (strcmp(bw_report, "writes") == 0) - return bw_report; - if (strcmp(bw_report, "nt-writes") == 0) { - return "writes"; - } - if (strcmp(bw_report, "total") == 0) - return bw_report; - - fprintf(stderr, "Requested iMC bandwidth report type unavailable\n"); - - return NULL; -} - int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags) { diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; - rseq_feature_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -209,7 +250,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d7364ea4d201..4e217b620e0c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -68,12 +68,6 @@ extern unsigned int rseq_size; /* Flags used during rseq registration. */ extern unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -extern unsigned int rseq_feature_size; - enum rseq_mo { RSEQ_MO_RELAXED = 0, RSEQ_MO_CONSUME = 1, /* Unused */ @@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(void) static inline bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); } /* @@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node_id(void) static inline bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); } static inline uint32_t rseq_current_mm_cid(void) diff --git a/tools/testing/selftests/rtc/Makefile b/tools/testing/selftests/rtc/Makefile index 55198ecc04db..9dbb395c5c79 100644 --- a/tools/testing/selftests/rtc/Makefile +++ b/tools/testing/selftests/rtc/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I$(top_srcdir)/usr/include LDLIBS += -lrt -lpthread -lm TEST_GEN_PROGS = rtctest diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 9647b14b47c5..e103097d0b5b 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -25,6 +25,12 @@ static char *rtc_file = "/dev/rtc0"; +enum rtc_alarm_state { + RTC_ALARM_UNKNOWN, + RTC_ALARM_ENABLED, + RTC_ALARM_DISABLED, +}; + FIXTURE(rtc) { int fd; }; @@ -82,6 +88,24 @@ static void nanosleep_with_retries(long ns) } } +static enum rtc_alarm_state get_rtc_alarm_state(int fd) +{ + struct rtc_param param = { 0 }; + int rc; + + /* Validate kernel reflects unsupported RTC alarm state */ + param.param = RTC_PARAM_FEATURES; + param.index = 0; + rc = ioctl(fd, RTC_PARAM_GET, ¶m); + if (rc < 0) + return RTC_ALARM_UNKNOWN; + + if ((param.uvalue & _BITUL(RTC_FEATURE_ALARM)) == 0) + return RTC_ALARM_DISABLED; + + return RTC_ALARM_ENABLED; +} + TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) { int rc; long iter_count = 0; @@ -197,11 +221,16 @@ TEST_F(rtc, alarm_alm_set) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -210,6 +239,11 @@ TEST_F(rtc, alarm_alm_set) { rc = ioctl(self->fd, RTC_ALM_SET, &tm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -255,11 +289,16 @@ TEST_F(rtc, alarm_wkalm_set) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -270,6 +309,11 @@ TEST_F(rtc, alarm_wkalm_set) { rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -307,11 +351,16 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &tm); ASSERT_NE(-1, rc); @@ -320,6 +369,11 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) { rc = ioctl(self->fd, RTC_ALM_SET, &tm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -365,11 +419,16 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { fd_set readfds; time_t secs, new; int rc; + enum rtc_alarm_state alarm_state = RTC_ALARM_UNKNOWN; if (self->fd == -1 && errno == ENOENT) SKIP(return, "Skipping test since %s does not exist", rtc_file); ASSERT_NE(-1, self->fd); + alarm_state = get_rtc_alarm_state(self->fd); + if (alarm_state == RTC_ALARM_DISABLED) + SKIP(return, "Skipping test since alarms are not supported."); + rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time); ASSERT_NE(-1, rc); @@ -380,6 +439,11 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { rc = ioctl(self->fd, RTC_WKALM_SET, &alarm); if (rc == -1) { + /* + * Report error if rtc alarm was enabled. Fallback to check ioctl + * error number if rtc alarm state is unknown. + */ + ASSERT_EQ(RTC_ALARM_UNKNOWN, alarm_state); ASSERT_EQ(EINVAL, errno); TH_LOG("skip alarms are not supported."); return; @@ -412,6 +476,8 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { int main(int argc, char **argv) { + int ret = -1; + switch (argc) { case 2: rtc_file = argv[1]; @@ -423,5 +489,12 @@ int main(int argc, char **argv) return 1; } - return test_harness_run(argc, argv); + /* Run the test if rtc_file is accessible */ + if (access(rtc_file, R_OK) == 0) + ret = test_harness_run(argc, argv); + else + ksft_exit_skip("[SKIP]: Cannot access rtc file %s - Exiting\n", + rtc_file); + + return ret; } diff --git a/tools/testing/selftests/sched_ext/Makefile b/tools/testing/selftests/sched_ext/Makefile index 0754a2c110a1..011762224600 100644 --- a/tools/testing/selftests/sched_ext/Makefile +++ b/tools/testing/selftests/sched_ext/Makefile @@ -3,23 +3,12 @@ include ../../../build/Build.include include ../../../scripts/Makefile.arch include ../../../scripts/Makefile.include -include ../lib.mk -ifneq ($(LLVM),) -ifneq ($(filter %/,$(LLVM)),) -LLVM_PREFIX := $(LLVM) -else ifneq ($(filter -%,$(LLVM)),) -LLVM_SUFFIX := $(LLVM) -endif - -CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as -else -CC := gcc -endif # LLVM +TEST_GEN_PROGS := runner -ifneq ($(CROSS_COMPILE),) -$(error CROSS_COMPILE not supported for scx selftests) -endif # CROSS_COMPILE +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +include ../lib.mk CURDIR := $(abspath .) REPOROOT := $(abspath ../../../..) @@ -34,18 +23,23 @@ GENHDR := $(GENDIR)/autoconf.h SCXTOOLSDIR := $(TOOLSDIR)/sched_ext SCXTOOLSINCDIR := $(TOOLSDIR)/sched_ext/include -OUTPUT_DIR := $(CURDIR)/build +OUTPUT_DIR := $(OUTPUT)/build OBJ_DIR := $(OUTPUT_DIR)/obj INCLUDE_DIR := $(OUTPUT_DIR)/include BPFOBJ_DIR := $(OBJ_DIR)/libbpf SCXOBJ_DIR := $(OBJ_DIR)/sched_ext BPFOBJ := $(BPFOBJ_DIR)/libbpf.a LIBBPF_OUTPUT := $(OBJ_DIR)/libbpf/libbpf.a -DEFAULT_BPFTOOL := $(OUTPUT_DIR)/sbin/bpftool -HOST_BUILD_DIR := $(OBJ_DIR) -HOST_OUTPUT_DIR := $(OUTPUT_DIR) -VMLINUX_BTF_PATHS ?= ../../../../vmlinux \ +DEFAULT_BPFTOOL := $(OUTPUT_DIR)/host/sbin/bpftool +HOST_OBJ_DIR := $(OBJ_DIR)/host/bpftool +HOST_LIBBPF_OUTPUT := $(OBJ_DIR)/host/libbpf/ +HOST_LIBBPF_DESTDIR := $(OUTPUT_DIR)/host/ +HOST_DESTDIR := $(OUTPUT_DIR)/host/ + +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) @@ -80,17 +74,23 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \ # Use '-idirafter': Don't interfere with include mechanics except where the # build would have failed anyways. define get_sys_includes -$(shell $(1) -v -E - </dev/null 2>&1 \ +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') endef +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ $(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian) \ -I$(CURDIR)/include -I$(CURDIR)/include/bpf-compat \ -I$(INCLUDE_DIR) -I$(APIDIR) -I$(SCXTOOLSINCDIR) \ -I$(REPOROOT)/include \ - $(call get_sys_includes,$(CLANG)) \ + $(CLANG_SYS_INCLUDES) \ -Wall -Wno-compare-distinct-pointer-types \ -Wno-incompatible-function-pointer-types \ -O2 -mcpu=v3 @@ -98,7 +98,7 @@ BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) \ # sort removes libbpf duplicates when not cross-building MAKE_DIRS := $(sort $(OBJ_DIR)/libbpf $(OBJ_DIR)/libbpf \ $(OBJ_DIR)/bpftool $(OBJ_DIR)/resolve_btfids \ - $(INCLUDE_DIR) $(SCXOBJ_DIR)) + $(HOST_OBJ_DIR) $(INCLUDE_DIR) $(SCXOBJ_DIR)) $(MAKE_DIRS): $(call msg,MKDIR,,$@) @@ -108,18 +108,19 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ $(APIDIR)/linux/bpf.h \ | $(OBJ_DIR)/libbpf $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(OBJ_DIR)/libbpf/ \ + ARCH=$(ARCH) CC="$(CC)" CROSS_COMPILE=$(CROSS_COMPILE) \ EXTRA_CFLAGS='-g -O0 -fPIC' \ DESTDIR=$(OUTPUT_DIR) prefix= all install_headers $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ - $(LIBBPF_OUTPUT) | $(OBJ_DIR)/bpftool + $(LIBBPF_OUTPUT) | $(HOST_OBJ_DIR) $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \ EXTRA_CFLAGS='-g -O0' \ - OUTPUT=$(OBJ_DIR)/bpftool/ \ - LIBBPF_OUTPUT=$(OBJ_DIR)/libbpf/ \ - LIBBPF_DESTDIR=$(OUTPUT_DIR)/ \ - prefix= DESTDIR=$(OUTPUT_DIR)/ install-bin + OUTPUT=$(HOST_OBJ_DIR)/ \ + LIBBPF_OUTPUT=$(HOST_LIBBPF_OUTPUT) \ + LIBBPF_DESTDIR=$(HOST_LIBBPF_DESTDIR) \ + prefix= DESTDIR=$(HOST_DESTDIR) install-bin $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR) ifeq ($(VMLINUX_H),) @@ -150,9 +151,7 @@ $(INCLUDE_DIR)/%.bpf.skel.h: $(SCXOBJ_DIR)/%.bpf.o $(INCLUDE_DIR)/vmlinux.h $(BP override define CLEAN rm -rf $(OUTPUT_DIR) - rm -f *.o *.bpf.o *.bpf.skel.h *.bpf.subskel.h rm -f $(TEST_GEN_PROGS) - rm -f runner endef # Every testcase takes all of the BPF progs are dependencies by default. This @@ -185,7 +184,7 @@ auto-test-targets := \ testcase-targets := $(addsuffix .o,$(addprefix $(SCXOBJ_DIR)/,$(auto-test-targets))) -$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) +$(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) $(BPFOBJ) $(CC) $(CFLAGS) -c $< -o $@ # Create all of the test targets object files, whose testcase objects will be @@ -196,21 +195,15 @@ $(SCXOBJ_DIR)/runner.o: runner.c | $(SCXOBJ_DIR) # function doesn't support using implicit rules otherwise. $(testcase-targets): $(SCXOBJ_DIR)/%.o: %.c $(SCXOBJ_DIR)/runner.o $(all_test_bpfprogs) | $(SCXOBJ_DIR) $(eval test=$(patsubst %.o,%.c,$(notdir $@))) - $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/runner.o + $(CC) $(CFLAGS) -c $< -o $@ $(SCXOBJ_DIR)/util.o: util.c | $(SCXOBJ_DIR) $(CC) $(CFLAGS) -c $< -o $@ -runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) +$(OUTPUT)/runner: $(SCXOBJ_DIR)/runner.o $(SCXOBJ_DIR)/util.o $(BPFOBJ) $(testcase-targets) @echo "$(testcase-targets)" $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) -TEST_GEN_PROGS := runner - -all: runner - -.PHONY: all clean help - .DEFAULT_GOAL := all .DELETE_ON_ERROR: diff --git a/tools/testing/selftests/sched_ext/create_dsq.bpf.c b/tools/testing/selftests/sched_ext/create_dsq.bpf.c index 23f79ed343f0..2cfc4ffd60e2 100644 --- a/tools/testing/selftests/sched_ext/create_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/create_dsq.bpf.c @@ -51,8 +51,8 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(create_dsq_init) SEC(".struct_ops.link") struct sched_ext_ops create_dsq_ops = { - .init_task = create_dsq_init_task, - .exit_task = create_dsq_exit_task, - .init = create_dsq_init, + .init_task = (void *) create_dsq_init_task, + .exit_task = (void *) create_dsq_exit_task, + .init = (void *) create_dsq_init, .name = "create_dsq", }; diff --git a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c index e97ad41d354a..37d9bf6fb745 100644 --- a/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_bogus_dsq_fail.bpf.c @@ -35,8 +35,8 @@ void BPF_STRUCT_OPS(ddsp_bogus_dsq_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_bogus_dsq_fail_ops = { - .select_cpu = ddsp_bogus_dsq_fail_select_cpu, - .exit = ddsp_bogus_dsq_fail_exit, + .select_cpu = (void *) ddsp_bogus_dsq_fail_select_cpu, + .exit = (void *) ddsp_bogus_dsq_fail_exit, .name = "ddsp_bogus_dsq_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c index dde7e7dafbfb..dffc97d9cdf1 100644 --- a/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c +++ b/tools/testing/selftests/sched_ext/ddsp_vtimelocal_fail.bpf.c @@ -32,8 +32,8 @@ void BPF_STRUCT_OPS(ddsp_vtimelocal_fail_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops ddsp_vtimelocal_fail_ops = { - .select_cpu = ddsp_vtimelocal_fail_select_cpu, - .exit = ddsp_vtimelocal_fail_exit, + .select_cpu = (void *) ddsp_vtimelocal_fail_select_cpu, + .exit = (void *) ddsp_vtimelocal_fail_exit, .name = "ddsp_vtimelocal_fail", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c index efb4672decb4..6a7db1502c29 100644 --- a/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c +++ b/tools/testing/selftests/sched_ext/dsp_local_on.bpf.c @@ -56,10 +56,10 @@ void BPF_STRUCT_OPS(dsp_local_on_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops dsp_local_on_ops = { - .select_cpu = dsp_local_on_select_cpu, - .enqueue = dsp_local_on_enqueue, - .dispatch = dsp_local_on_dispatch, - .exit = dsp_local_on_exit, + .select_cpu = (void *) dsp_local_on_select_cpu, + .enqueue = (void *) dsp_local_on_enqueue, + .dispatch = (void *) dsp_local_on_dispatch, + .exit = (void *) dsp_local_on_exit, .name = "dsp_local_on", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c index b0b99531d5d5..e1bd13e48889 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.bpf.c @@ -12,10 +12,18 @@ char _license[] SEC("license") = "GPL"; +u32 exit_kind; + +void BPF_STRUCT_OPS_SLEEPABLE(enq_last_no_enq_fails_exit, struct scx_exit_info *info) +{ + exit_kind = info->kind; +} + SEC(".struct_ops.link") struct sched_ext_ops enq_last_no_enq_fails_ops = { .name = "enq_last_no_enq_fails", /* Need to define ops.enqueue() with SCX_OPS_ENQ_LAST */ .flags = SCX_OPS_ENQ_LAST, + .exit = (void *) enq_last_no_enq_fails_exit, .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c index 2a3eda5e2c0b..73e679953e27 100644 --- a/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c +++ b/tools/testing/selftests/sched_ext/enq_last_no_enq_fails.c @@ -31,8 +31,12 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; link = bpf_map__attach_struct_ops(skel->maps.enq_last_no_enq_fails_ops); - if (link) { - SCX_ERR("Incorrectly succeeded in to attaching scheduler"); + if (!link) { + SCX_ERR("Incorrectly failed at attaching scheduler"); + return SCX_TEST_FAIL; + } + if (!skel->bss->exit_kind) { + SCX_ERR("Incorrectly stayed loaded"); return SCX_TEST_FAIL; } @@ -50,7 +54,7 @@ static void cleanup(void *ctx) struct scx_test enq_last_no_enq_fails = { .name = "enq_last_no_enq_fails", - .description = "Verify we fail to load a scheduler if we specify " + .description = "Verify we eject a scheduler if we specify " "the SCX_OPS_ENQ_LAST flag without defining " "ops.enqueue()", .setup = setup, diff --git a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c index b3dfc1033cd6..1efb50d61040 100644 --- a/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c +++ b/tools/testing/selftests/sched_ext/enq_select_cpu_fails.bpf.c @@ -36,8 +36,8 @@ void BPF_STRUCT_OPS(enq_select_cpu_fails_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops enq_select_cpu_fails_ops = { - .select_cpu = enq_select_cpu_fails_select_cpu, - .enqueue = enq_select_cpu_fails_enqueue, + .select_cpu = (void *) enq_select_cpu_fails_select_cpu, + .enqueue = (void *) enq_select_cpu_fails_enqueue, .name = "enq_select_cpu_fails", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/exit.bpf.c b/tools/testing/selftests/sched_ext/exit.bpf.c index ae12ddaac921..d75d4faf07f6 100644 --- a/tools/testing/selftests/sched_ext/exit.bpf.c +++ b/tools/testing/selftests/sched_ext/exit.bpf.c @@ -15,6 +15,8 @@ UEI_DEFINE(uei); #define EXIT_CLEANLY() scx_bpf_exit(exit_point, "%d", exit_point) +#define DSQ_ID 0 + s32 BPF_STRUCT_OPS(exit_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags) { @@ -31,7 +33,7 @@ void BPF_STRUCT_OPS(exit_enqueue, struct task_struct *p, u64 enq_flags) if (exit_point == EXIT_ENQUEUE) EXIT_CLEANLY(); - scx_bpf_dispatch(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, enq_flags); + scx_bpf_dispatch(p, DSQ_ID, SCX_SLICE_DFL, enq_flags); } void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) @@ -39,7 +41,7 @@ void BPF_STRUCT_OPS(exit_dispatch, s32 cpu, struct task_struct *p) if (exit_point == EXIT_DISPATCH) EXIT_CLEANLY(); - scx_bpf_consume(SCX_DSQ_GLOBAL); + scx_bpf_consume(DSQ_ID); } void BPF_STRUCT_OPS(exit_enable, struct task_struct *p) @@ -67,18 +69,18 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(exit_init) if (exit_point == EXIT_INIT) EXIT_CLEANLY(); - return 0; + return scx_bpf_create_dsq(DSQ_ID, -1); } SEC(".struct_ops.link") struct sched_ext_ops exit_ops = { - .select_cpu = exit_select_cpu, - .enqueue = exit_enqueue, - .dispatch = exit_dispatch, - .init_task = exit_init_task, - .enable = exit_enable, - .exit = exit_exit, - .init = exit_init, + .select_cpu = (void *) exit_select_cpu, + .enqueue = (void *) exit_enqueue, + .dispatch = (void *) exit_dispatch, + .init_task = (void *) exit_init_task, + .enable = (void *) exit_enable, + .exit = (void *) exit_exit, + .init = (void *) exit_init, .name = "exit", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/hotplug.bpf.c b/tools/testing/selftests/sched_ext/hotplug.bpf.c index 8f2601db39f3..6c9f25c9bf53 100644 --- a/tools/testing/selftests/sched_ext/hotplug.bpf.c +++ b/tools/testing/selftests/sched_ext/hotplug.bpf.c @@ -46,16 +46,16 @@ void BPF_STRUCT_OPS_SLEEPABLE(hotplug_cpu_offline, s32 cpu) SEC(".struct_ops.link") struct sched_ext_ops hotplug_cb_ops = { - .cpu_online = hotplug_cpu_online, - .cpu_offline = hotplug_cpu_offline, - .exit = hotplug_exit, + .cpu_online = (void *) hotplug_cpu_online, + .cpu_offline = (void *) hotplug_cpu_offline, + .exit = (void *) hotplug_exit, .name = "hotplug_cbs", .timeout_ms = 1000U, }; SEC(".struct_ops.link") struct sched_ext_ops hotplug_nocb_ops = { - .exit = hotplug_exit, + .exit = (void *) hotplug_exit, .name = "hotplug_nocbs", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c index 47ea89a626c3..5eb9edb1837d 100644 --- a/tools/testing/selftests/sched_ext/init_enable_count.bpf.c +++ b/tools/testing/selftests/sched_ext/init_enable_count.bpf.c @@ -45,9 +45,9 @@ void BPF_STRUCT_OPS(cnt_disable, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops init_enable_count_ops = { - .init_task = cnt_init_task, - .exit_task = cnt_exit_task, - .enable = cnt_enable, - .disable = cnt_disable, + .init_task = (void *) cnt_init_task, + .exit_task = (void *) cnt_exit_task, + .enable = (void *) cnt_enable, + .disable = (void *) cnt_disable, .name = "init_enable_count", }; diff --git a/tools/testing/selftests/sched_ext/maximal.bpf.c b/tools/testing/selftests/sched_ext/maximal.bpf.c index 00bfa9cb95d3..4d4cd8d966db 100644 --- a/tools/testing/selftests/sched_ext/maximal.bpf.c +++ b/tools/testing/selftests/sched_ext/maximal.bpf.c @@ -131,34 +131,34 @@ void BPF_STRUCT_OPS(maximal_exit, struct scx_exit_info *info) SEC(".struct_ops.link") struct sched_ext_ops maximal_ops = { - .select_cpu = maximal_select_cpu, - .enqueue = maximal_enqueue, - .dequeue = maximal_dequeue, - .dispatch = maximal_dispatch, - .runnable = maximal_runnable, - .running = maximal_running, - .stopping = maximal_stopping, - .quiescent = maximal_quiescent, - .yield = maximal_yield, - .core_sched_before = maximal_core_sched_before, - .set_weight = maximal_set_weight, - .set_cpumask = maximal_set_cpumask, - .update_idle = maximal_update_idle, - .cpu_acquire = maximal_cpu_acquire, - .cpu_release = maximal_cpu_release, - .cpu_online = maximal_cpu_online, - .cpu_offline = maximal_cpu_offline, - .init_task = maximal_init_task, - .enable = maximal_enable, - .exit_task = maximal_exit_task, - .disable = maximal_disable, - .cgroup_init = maximal_cgroup_init, - .cgroup_exit = maximal_cgroup_exit, - .cgroup_prep_move = maximal_cgroup_prep_move, - .cgroup_move = maximal_cgroup_move, - .cgroup_cancel_move = maximal_cgroup_cancel_move, - .cgroup_set_weight = maximal_cgroup_set_weight, - .init = maximal_init, - .exit = maximal_exit, + .select_cpu = (void *) maximal_select_cpu, + .enqueue = (void *) maximal_enqueue, + .dequeue = (void *) maximal_dequeue, + .dispatch = (void *) maximal_dispatch, + .runnable = (void *) maximal_runnable, + .running = (void *) maximal_running, + .stopping = (void *) maximal_stopping, + .quiescent = (void *) maximal_quiescent, + .yield = (void *) maximal_yield, + .core_sched_before = (void *) maximal_core_sched_before, + .set_weight = (void *) maximal_set_weight, + .set_cpumask = (void *) maximal_set_cpumask, + .update_idle = (void *) maximal_update_idle, + .cpu_acquire = (void *) maximal_cpu_acquire, + .cpu_release = (void *) maximal_cpu_release, + .cpu_online = (void *) maximal_cpu_online, + .cpu_offline = (void *) maximal_cpu_offline, + .init_task = (void *) maximal_init_task, + .enable = (void *) maximal_enable, + .exit_task = (void *) maximal_exit_task, + .disable = (void *) maximal_disable, + .cgroup_init = (void *) maximal_cgroup_init, + .cgroup_exit = (void *) maximal_cgroup_exit, + .cgroup_prep_move = (void *) maximal_cgroup_prep_move, + .cgroup_move = (void *) maximal_cgroup_move, + .cgroup_cancel_move = (void *) maximal_cgroup_cancel_move, + .cgroup_set_weight = (void *) maximal_cgroup_set_weight, + .init = (void *) maximal_init, + .exit = (void *) maximal_exit, .name = "maximal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null.bpf.c b/tools/testing/selftests/sched_ext/maybe_null.bpf.c index 27d0f386acfb..cf4ae870cd4e 100644 --- a/tools/testing/selftests/sched_ext/maybe_null.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null.bpf.c @@ -29,8 +29,8 @@ bool BPF_STRUCT_OPS(maybe_null_success_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_success = { - .dispatch = maybe_null_success_dispatch, - .yield = maybe_null_success_yield, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_success_dispatch, + .yield = (void *) maybe_null_success_yield, + .enable = (void *) maybe_null_running, .name = "minimal", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c index c0641050271d..ec724d7b33d1 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_dsp.bpf.c @@ -19,7 +19,7 @@ void BPF_STRUCT_OPS(maybe_null_fail_dispatch, s32 cpu, struct task_struct *p) SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .dispatch = maybe_null_fail_dispatch, - .enable = maybe_null_running, + .dispatch = (void *) maybe_null_fail_dispatch, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_dispatch", }; diff --git a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c index 3c1740028e3b..e6552cace020 100644 --- a/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c +++ b/tools/testing/selftests/sched_ext/maybe_null_fail_yld.bpf.c @@ -22,7 +22,7 @@ bool BPF_STRUCT_OPS(maybe_null_fail_yield, struct task_struct *from, SEC(".struct_ops.link") struct sched_ext_ops maybe_null_fail = { - .yield = maybe_null_fail_yield, - .enable = maybe_null_running, + .yield = (void *) maybe_null_fail_yield, + .enable = (void *) maybe_null_running, .name = "maybe_null_fail_yield", }; diff --git a/tools/testing/selftests/sched_ext/prog_run.bpf.c b/tools/testing/selftests/sched_ext/prog_run.bpf.c index 6a4d7c48e3f2..00c267626a68 100644 --- a/tools/testing/selftests/sched_ext/prog_run.bpf.c +++ b/tools/testing/selftests/sched_ext/prog_run.bpf.c @@ -28,6 +28,6 @@ void BPF_STRUCT_OPS(prog_run_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops prog_run_ops = { - .exit = prog_run_exit, + .exit = (void *) prog_run_exit, .name = "prog_run", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c index 2ed2991afafe..f171ac470970 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl.bpf.c @@ -35,6 +35,6 @@ void BPF_STRUCT_OPS(select_cpu_dfl_enqueue, struct task_struct *p, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_ops = { - .enqueue = select_cpu_dfl_enqueue, + .enqueue = (void *) select_cpu_dfl_enqueue, .name = "select_cpu_dfl", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c index 4bb5abb2d369..9efdbb7da928 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dfl_nodispatch.bpf.c @@ -82,8 +82,8 @@ s32 BPF_STRUCT_OPS(select_cpu_dfl_nodispatch_init_task, SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dfl_nodispatch_ops = { - .select_cpu = select_cpu_dfl_nodispatch_select_cpu, - .enqueue = select_cpu_dfl_nodispatch_enqueue, - .init_task = select_cpu_dfl_nodispatch_init_task, + .select_cpu = (void *) select_cpu_dfl_nodispatch_select_cpu, + .enqueue = (void *) select_cpu_dfl_nodispatch_enqueue, + .init_task = (void *) select_cpu_dfl_nodispatch_init_task, .name = "select_cpu_dfl_nodispatch", }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c index f0b96a4a04b2..59bfc4f36167 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch.bpf.c @@ -35,7 +35,7 @@ dispatch: SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_ops = { - .select_cpu = select_cpu_dispatch_select_cpu, + .select_cpu = (void *) select_cpu_dispatch_select_cpu, .name = "select_cpu_dispatch", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c index 7b42ddce0f56..3bbd5fcdfb18 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_bad_dsq.bpf.c @@ -30,8 +30,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_bad_dsq_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_bad_dsq_ops = { - .select_cpu = select_cpu_dispatch_bad_dsq_select_cpu, - .exit = select_cpu_dispatch_bad_dsq_exit, + .select_cpu = (void *) select_cpu_dispatch_bad_dsq_select_cpu, + .exit = (void *) select_cpu_dispatch_bad_dsq_exit, .name = "select_cpu_dispatch_bad_dsq", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c index 653e3dc0b4dc..0fda57fe0ecf 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_dispatch_dbl_dsp.bpf.c @@ -31,8 +31,8 @@ void BPF_STRUCT_OPS(select_cpu_dispatch_dbl_dsp_exit, struct scx_exit_info *ei) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_dispatch_dbl_dsp_ops = { - .select_cpu = select_cpu_dispatch_dbl_dsp_select_cpu, - .exit = select_cpu_dispatch_dbl_dsp_exit, + .select_cpu = (void *) select_cpu_dispatch_dbl_dsp_select_cpu, + .exit = (void *) select_cpu_dispatch_dbl_dsp_exit, .name = "select_cpu_dispatch_dbl_dsp", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c index 7f3ebf4fc2ea..e6c67bcf5e6e 100644 --- a/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c +++ b/tools/testing/selftests/sched_ext/select_cpu_vtime.bpf.c @@ -81,12 +81,12 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(select_cpu_vtime_init) SEC(".struct_ops.link") struct sched_ext_ops select_cpu_vtime_ops = { - .select_cpu = select_cpu_vtime_select_cpu, - .dispatch = select_cpu_vtime_dispatch, - .running = select_cpu_vtime_running, - .stopping = select_cpu_vtime_stopping, - .enable = select_cpu_vtime_enable, - .init = select_cpu_vtime_init, + .select_cpu = (void *) select_cpu_vtime_select_cpu, + .dispatch = (void *) select_cpu_vtime_dispatch, + .running = (void *) select_cpu_vtime_running, + .stopping = (void *) select_cpu_vtime_stopping, + .enable = (void *) select_cpu_vtime_enable, + .init = (void *) select_cpu_vtime_init, .name = "select_cpu_vtime", .timeout_ms = 1000U, }; diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/signal/.gitignore index 50a19a8888ce..3f339865a3b6 100644 --- a/tools/testing/selftests/sigaltstack/.gitignore +++ b/tools/testing/selftests/signal/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only +mangle_uc_sigmask sas diff --git a/tools/testing/selftests/sigaltstack/Makefile b/tools/testing/selftests/signal/Makefile index 3e96d5d47036..e0bf7058d19c 100644 --- a/tools/testing/selftests/sigaltstack/Makefile +++ b/tools/testing/selftests/signal/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only CFLAGS = -Wall -TEST_GEN_PROGS = sas +TEST_GEN_PROGS = mangle_uc_sigmask +TEST_GEN_PROGS += sas include ../lib.mk diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/signal/current_stack_pointer.h index 09da8f1011ce..09da8f1011ce 100644 --- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h +++ b/tools/testing/selftests/signal/current_stack_pointer.h diff --git a/tools/testing/selftests/signal/mangle_uc_sigmask.c b/tools/testing/selftests/signal/mangle_uc_sigmask.c new file mode 100644 index 000000000000..b79ab92178a8 --- /dev/null +++ b/tools/testing/selftests/signal/mangle_uc_sigmask.c @@ -0,0 +1,184 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2024 ARM Ltd. + * + * Author: Dev Jain <dev.jain@arm.com> + * + * Test describing a clear distinction between signal states - delivered and + * blocked, and their relation with ucontext. + * + * A process can request blocking of a signal by masking it into its set of + * blocked signals; such a signal, when sent to the process by the kernel, + * will get blocked by the process and it may later unblock it and take an + * action. At that point, the signal will be delivered. + * + * We test the following functionalities of the kernel: + * + * ucontext_t describes the interrupted context of the thread; this implies + * that, in case of registering a handler and catching the corresponding + * signal, that state is before what was jumping into the handler. + * + * The thread's mask of blocked signals can be permanently changed, i.e, not + * just during the execution of the handler, by mangling with uc_sigmask + * from inside the handler. + * + * Assume that we block the set of signals, S1, by sigaction(), and say, the + * signal for which the handler was installed, is S2. When S2 is sent to the + * program, it will be considered "delivered", since we will act on the + * signal and jump to the handler. Any instances of S1 or S2 raised, while the + * program is executing inside the handler, will be blocked; they will be + * delivered immediately upon termination of the handler. + * + * For standard signals (also see real-time signals in the man page), multiple + * blocked instances of the same signal are not queued; such a signal will + * be delivered just once. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <ucontext.h> + +#include "../kselftest.h" + +void handler_verify_ucontext(int signo, siginfo_t *info, void *uc) +{ + int ret; + + /* Kernel dumps ucontext with USR2 blocked */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGUSR2); + ksft_test_result(ret == 1, "USR2 blocked in ucontext\n"); + + /* + * USR2 is blocked; can be delivered neither here, nor after + * exit from handler + */ + if (raise(SIGUSR2)) + ksft_exit_fail_perror("raise"); +} + +void handler_segv(int signo, siginfo_t *info, void *uc) +{ + /* + * Three cases possible: + * 1. Program already terminated due to segmentation fault. + * 2. SEGV was blocked even after returning from handler_usr. + * 3. SEGV was delivered on returning from handler_usr. + * The last option must happen. + */ + ksft_test_result_pass("SEGV delivered\n"); +} + +static int cnt; + +void handler_usr(int signo, siginfo_t *info, void *uc) +{ + int ret; + + /* + * Break out of infinite recursion caused by raise(SIGUSR1) invoked + * from inside the handler + */ + ++cnt; + if (cnt > 1) + return; + + /* SEGV blocked during handler execution, delivered on return */ + if (raise(SIGSEGV)) + ksft_exit_fail_perror("raise"); + + ksft_print_msg("SEGV bypassed successfully\n"); + + /* + * Signal responsible for handler invocation is blocked by default; + * delivered on return, leading to recursion + */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + ksft_test_result(cnt == 1, + "USR1 is blocked, cannot invoke handler right now\n"); + + /* Raise USR1 again; only one instance must be delivered upon exit */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* SEGV has been blocked in sa_mask, but ucontext is empty */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGSEGV); + ksft_test_result(ret == 0, "SEGV not blocked in ucontext\n"); + + /* USR1 has been blocked, but ucontext is empty */ + ret = sigismember(&(((ucontext_t *)uc)->uc_sigmask), SIGUSR1); + ksft_test_result(ret == 0, "USR1 not blocked in ucontext\n"); + + /* + * Mangle ucontext; this will be copied back into ¤t->blocked + * on return from the handler. + */ + if (sigaddset(&((ucontext_t *)uc)->uc_sigmask, SIGUSR2)) + ksft_exit_fail_perror("sigaddset"); +} + +int main(int argc, char *argv[]) +{ + struct sigaction act, act2; + sigset_t set, oldset; + + ksft_print_header(); + ksft_set_plan(7); + + act.sa_flags = SA_SIGINFO; + act.sa_sigaction = &handler_usr; + + /* Add SEGV to blocked mask */ + if (sigemptyset(&act.sa_mask) || sigaddset(&act.sa_mask, SIGSEGV) + || (sigismember(&act.sa_mask, SIGSEGV) != 1)) + ksft_exit_fail_msg("Cannot add SEGV to blocked mask\n"); + + if (sigaction(SIGUSR1, &act, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + act2.sa_flags = SA_SIGINFO; + act2.sa_sigaction = &handler_segv; + + if (sigaction(SIGSEGV, &act2, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + /* Invoke handler */ + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* USR1 must not be queued */ + ksft_test_result(cnt == 2, "handler invoked only twice\n"); + + /* Mangled ucontext implies USR2 is blocked for current thread */ + if (raise(SIGUSR2)) + ksft_exit_fail_perror("raise"); + + ksft_print_msg("USR2 bypassed successfully\n"); + + act.sa_sigaction = &handler_verify_ucontext; + if (sigaction(SIGUSR1, &act, NULL)) + ksft_exit_fail_perror("Cannot install handler"); + + if (raise(SIGUSR1)) + ksft_exit_fail_perror("raise"); + + /* + * Raising USR2 in handler_verify_ucontext is redundant since it + * is blocked + */ + ksft_print_msg("USR2 still blocked on return from handler\n"); + + /* Confirm USR2 blockage by sigprocmask() too */ + if (sigemptyset(&set)) + ksft_exit_fail_perror("sigemptyset"); + + if (sigprocmask(SIG_BLOCK, &set, &oldset)) + ksft_exit_fail_perror("sigprocmask"); + + ksft_test_result(sigismember(&oldset, SIGUSR2) == 1, + "USR2 present in ¤t->blocked\n"); + + ksft_finished(); +} diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/signal/sas.c index 07227fab1cc9..07227fab1cc9 100644 --- a/tools/testing/selftests/sigaltstack/sas.c +++ b/tools/testing/selftests/signal/sas.c diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json index d1278de8ebc3..c9309a44a87e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json @@ -67,7 +67,7 @@ }, { "id": "4943", - "name": "Add basic filter with cmp ematch u32/link layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/link layer and multiple actions", "category": [ "filter", "basic" @@ -155,7 +155,7 @@ }, { "id": "32d8", - "name": "Add basic filter with cmp ematch u32/network layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/network layer and multiple actions", "category": [ "filter", "basic" @@ -243,7 +243,7 @@ }, { "id": "62d7", - "name": "Add basic filter with cmp ematch u32/transport layer and miltiple actions", + "name": "Add basic filter with cmp ematch u32/transport layer and multiple actions", "category": [ "filter", "basic" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json index 03723cf84379..35c9a7dbe1c4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json @@ -67,7 +67,7 @@ }, { "id": "0234", - "name": "Add cgroup filter with cmp ematch u32/link layer and miltiple actions", + "name": "Add cgroup filter with cmp ematch u32/link layer and multiple actions", "category": [ "filter", "cgroup" @@ -155,7 +155,7 @@ }, { "id": "2733", - "name": "Add cgroup filter with cmp ematch u32/network layer and miltiple actions", + "name": "Add cgroup filter with cmp ematch u32/network layer and multiple actions", "category": [ "filter", "cgroup" @@ -1189,7 +1189,7 @@ }, { "id": "4319", - "name": "Replace cgroup filter with diffferent match", + "name": "Replace cgroup filter with different match", "category": [ "filter", "cgroup" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json index 58189327f644..996448afe31b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json @@ -507,7 +507,7 @@ }, { "id": "4341", - "name": "Add flow filter with muliple ops", + "name": "Add flow filter with multiple ops", "category": [ "filter", "flow" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json index 8d8de8f65aef..05cedca67cca 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json @@ -111,7 +111,7 @@ }, { "id": "7994", - "name": "Add route filter with miltiple actions", + "name": "Add route filter with multiple actions", "category": [ "filter", "route" diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json index 24bd0c2a3014..b2ca9d4e991b 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json +++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json @@ -329,5 +329,29 @@ "teardown": [ "$TC qdisc del dev $DEV1 parent root drr" ] + }, + { + "id": "1234", + "name": "Exercise IDR leaks by creating/deleting a filter many (2048) times", + "category": [ + "filter", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DEV1 parent root handle 10: drr", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 match ip src 0.0.0.2/32 action drop", + "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop" + ], + "cmdUnderTest": "bash -c 'for i in {1..2048} ;do echo filter delete dev $DEV1 pref 3;echo filter add dev $DEV1 parent 10:0 protocol ip prio 3 u32 match ip src 0.0.0.3/32 action drop;done | $TC -b -'", + "expExitCode": "0", + "verifyCmd": "$TC filter show dev $DEV1", + "matchPattern": "protocol ip pref 3 u32", + "matchCount": "3", + "teardown": [ + "$TC qdisc del dev $DEV1 parent root drr" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json new file mode 100644 index 000000000000..d3dd65b05b5f --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -0,0 +1,98 @@ +[ + { + "id": "ca5e", + "name": "Check class delete notification for ffff:", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC qdisc add dev $DUMMY parent ffff:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true", + "$TC class del dev $DUMMY classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + }, + { + "id": "e4b7", + "name": "Check class delete notification for root ffff:", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle ffff: drr", + "$TC filter add dev $DUMMY parent ffff: basic classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr", + "$TC qdisc add dev $DUMMY parent ffff:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true", + "$TC class del dev $DUMMY classid ffff:1", + "$TC class add dev $DUMMY parent ffff: classid ffff:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr ffff: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle ffff: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + }, + { + "id": "33a9", + "name": "Check ingress is not searchable on backlog update", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY ingress", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: drr", + "$TC filter add dev $DUMMY parent 2: basic classid 2:1", + "$TC class add dev $DUMMY parent 2: classid 2:1 drr", + "$TC qdisc add dev $DUMMY parent 2:1 netem delay 1s", + "ping -c1 -W0.01 -I $DUMMY 10.10.10.1 || true" + ], + "cmdUnderTest": "$TC class del dev $DUMMY classid 2:1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$TC qdisc del dev $DUMMY ingress", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } +] diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 0e73a16874c4..32203593c62e 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) LDLIBS += -lrt -lpthread -lm # these are all "safe" tests that don't modify diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c index 205b76a4abb4..777d9494b683 100644 --- a/tools/testing/selftests/timers/adjtick.c +++ b/tools/testing/selftests/timers/adjtick.c @@ -22,14 +22,10 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_MONOTONIC_RAW 4 - -#define NSEC_PER_SEC 1000000000LL -#define USEC_PER_SEC 1000000 - #define MILLION 1000000 long systick; diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c index ad52e608b88e..a9ef76ea6051 100644 --- a/tools/testing/selftests/timers/alarmtimer-suspend.c +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -28,24 +28,10 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> +#include <include/vdso/time64.h> +#include <errno.h> #include "../kselftest.h" -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 -#define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 - - -#define NSEC_PER_SEC 1000000000ULL #define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ #define SUSPEND_SECS 15 @@ -142,8 +128,8 @@ int main(void) alarmcount = 0; if (timer_create(alarm_clock_id, &se, &tm1) == -1) { - printf("timer_create failed, %s unsupported?\n", - clockstring(alarm_clock_id)); + printf("timer_create failed, %s unsupported?: %s\n", + clockstring(alarm_clock_id), strerror(errno)); break; } diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c index 36a49fba6c9b..9d1573769d55 100644 --- a/tools/testing/selftests/timers/inconsistency-check.c +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -28,24 +28,13 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CALLS_PER_LOOP 64 -#define NSEC_PER_SEC 1000000000ULL - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 + +#define CALLS_PER_LOOP 64 char *clockstring(int clockid) { @@ -152,7 +141,7 @@ int main(int argc, char *argv[]) { int clockid, opt; int userclock = CLOCK_REALTIME; - int maxclocks = NR_CLOCKIDS; + int maxclocks = CLOCK_TAI + 1; int runtime = 10; struct timespec ts; diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c index 986abbdb1521..04004a7c0934 100644 --- a/tools/testing/selftests/timers/leap-a-day.c +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -48,9 +48,9 @@ #include <string.h> #include <signal.h> #include <unistd.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL #define CLOCK_TAI 11 time_t next_leap; diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c index f3179a605bba..63de2334a291 100644 --- a/tools/testing/selftests/timers/mqueue-lat.c +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -29,9 +29,9 @@ #include <signal.h> #include <errno.h> #include <mqueue.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL #define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c index df1d03516e7b..252c6308c569 100644 --- a/tools/testing/selftests/timers/nanosleep.c +++ b/tools/testing/selftests/timers/nanosleep.c @@ -27,23 +27,11 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 #define UNSUPPORTED 0xf00f @@ -132,11 +120,12 @@ int main(int argc, char **argv) { long long length; int clockid, ret; + int max_clocks = CLOCK_TAI + 1; ksft_print_header(); - ksft_set_plan(NR_CLOCKIDS); + ksft_set_plan(max_clocks); - for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + for (clockid = CLOCK_REALTIME; clockid < max_clocks; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ if (clockid == CLOCK_PROCESS_CPUTIME_ID || diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c index edb5acacf214..de23dc0c9f97 100644 --- a/tools/testing/selftests/timers/nsleep-lat.c +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -24,26 +24,13 @@ #include <sys/timex.h> #include <string.h> #include <signal.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000ULL - #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ - -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 #define UNSUPPORTED 0xf00f @@ -145,11 +132,12 @@ int main(int argc, char **argv) { long long length; int clockid, ret; + int max_clocks = CLOCK_TAI + 1; ksft_print_header(); - ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT); + ksft_set_plan(max_clocks - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT); - for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + for (clockid = CLOCK_REALTIME; clockid < max_clocks; clockid++) { /* Skip cputime clockids since nanosleep won't increment cputime */ if (clockid == CLOCK_PROCESS_CPUTIME_ID || diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 16bd49492efa..9814b3a1c77d 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -15,24 +15,27 @@ #include <string.h> #include <unistd.h> #include <time.h> +#include <include/vdso/time64.h> #include <pthread.h> #include "../kselftest.h" #define DELAY 2 -#define USECS_PER_SEC 1000000 -#define NSECS_PER_SEC 1000000000 static void __fatal_error(const char *test, const char *name, const char *what) { char buf[64]; + char *ret_str = NULL; - strerror_r(errno, buf, sizeof(buf)); + ret_str = strerror_r(errno, buf, sizeof(buf)); - if (name && strlen(name)) - ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf); + if (name && strlen(name) && ret_str) + ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, ret_str); + else if (ret_str) + ksft_exit_fail_msg("%s %s %s\n", test, what, ret_str); else - ksft_exit_fail_msg("%s %s %s\n", test, what, buf); + ksft_exit_fail_msg("%s %s\n", test, what); + } #define fatal_error(name, what) __fatal_error(__func__, name, what) @@ -82,9 +85,9 @@ static int check_diff(struct timeval start, struct timeval end) long long diff; diff = end.tv_usec - start.tv_usec; - diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC; + diff += (end.tv_sec - start.tv_sec) * USEC_PER_SEC; - if (llabs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) { + if (llabs(diff - DELAY * USEC_PER_SEC) > USEC_PER_SEC / 2) { printf("Diff too high: %lld..", diff); return -1; } @@ -444,7 +447,7 @@ static inline int64_t calcdiff_ns(struct timespec t1, struct timespec t2) { int64_t diff; - diff = NSECS_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); + diff = NSEC_PER_SEC * (int64_t)((int) t1.tv_sec - (int) t2.tv_sec); diff += ((int) t1.tv_nsec - (int) t2.tv_nsec); return diff; } @@ -475,7 +478,7 @@ static void check_sigev_none(int which, const char *name) do { if (clock_gettime(which, &now)) fatal_error(name, "clock_gettime()"); - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); if (timer_gettime(timerid, &its)) fatal_error(name, "timer_gettime()"); @@ -532,7 +535,7 @@ static void check_gettime(int which, const char *name) wraps++; prev = its; - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); if (timer_delete(timerid)) fatal_error(name, "timer_delete()"); @@ -583,7 +586,7 @@ static void check_overrun(int which, const char *name) do { if (clock_gettime(which, &now)) fatal_error(name, "clock_gettime()"); - } while (calcdiff_ns(now, start) < NSECS_PER_SEC); + } while (calcdiff_ns(now, start) < NSEC_PER_SEC); /* Unblock it, which should deliver a signal */ if (sigprocmask(SIG_UNBLOCK, &set, NULL)) diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c index 030143eb09b4..957f7cd29cb1 100644 --- a/tools/testing/selftests/timers/raw_skew.c +++ b/tools/testing/selftests/timers/raw_skew.c @@ -25,11 +25,9 @@ #include <sys/time.h> #include <sys/timex.h> #include <time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_MONOTONIC_RAW 4 -#define NSEC_PER_SEC 1000000000LL - #define shift_right(x, s) ({ \ __typeof__(x) __x = (x); \ __typeof__(s) __s = (s); \ diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c index f7d978721b9e..ed244315e11c 100644 --- a/tools/testing/selftests/timers/set-2038.c +++ b/tools/testing/selftests/timers/set-2038.c @@ -27,10 +27,9 @@ #include <unistd.h> #include <time.h> #include <sys/time.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL - #define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c index 7ce240c89b21..9d8437c13929 100644 --- a/tools/testing/selftests/timers/set-timer-lat.c +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -28,24 +28,12 @@ #include <signal.h> #include <stdlib.h> #include <pthread.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define CLOCK_REALTIME 0 -#define CLOCK_MONOTONIC 1 -#define CLOCK_PROCESS_CPUTIME_ID 2 -#define CLOCK_THREAD_CPUTIME_ID 3 -#define CLOCK_MONOTONIC_RAW 4 -#define CLOCK_REALTIME_COARSE 5 -#define CLOCK_MONOTONIC_COARSE 6 -#define CLOCK_BOOTTIME 7 -#define CLOCK_REALTIME_ALARM 8 -#define CLOCK_BOOTTIME_ALARM 9 +/* CLOCK_HWSPECIFIC == CLOCK_SGI_CYCLE (Deprecated) */ #define CLOCK_HWSPECIFIC 10 -#define CLOCK_TAI 11 -#define NR_CLOCKIDS 12 - -#define NSEC_PER_SEC 1000000000ULL #define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ #define TIMER_SECS 1 @@ -80,7 +68,7 @@ char *clockstring(int clockid) return "CLOCK_BOOTTIME_ALARM"; case CLOCK_TAI: return "CLOCK_TAI"; - }; + } return "UNKNOWN_CLOCKID"; } @@ -254,6 +242,7 @@ int main(void) struct sigaction act; int signum = SIGRTMAX; int ret = 0; + int max_clocks = CLOCK_TAI + 1; /* Set up signal handler: */ sigfillset(&act.sa_mask); @@ -262,7 +251,7 @@ int main(void) sigaction(signum, &act, NULL); printf("Setting timers for every %i seconds\n", TIMER_SECS); - for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + for (clock_id = 0; clock_id < max_clocks; clock_id++) { if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || (clock_id == CLOCK_THREAD_CPUTIME_ID) || diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c index d500884801d8..6b7801055ad1 100644 --- a/tools/testing/selftests/timers/valid-adjtimex.c +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -29,11 +29,9 @@ #include <string.h> #include <signal.h> #include <unistd.h> +#include <include/vdso/time64.h> #include "../kselftest.h" -#define NSEC_PER_SEC 1000000000LL -#define USEC_PER_SEC 1000000LL - #define ADJ_SETOFFSET 0x0100 #include <sys/syscall.h> diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index af9cedbf5357..1cf14a8da438 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -9,10 +9,8 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -endif CFLAGS := -std=gnu99 -O2 @@ -37,9 +35,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(KHDR_INCLUDES) \ -isystem $(top_srcdir)/include/uapi -$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/tools/include/generated \ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ -idirafter $(top_srcdir)/include \ - -D__ASSEMBLY__ -Wa,--noexecstack + -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 7dd5668ea8a6..28f35620c499 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -222,8 +222,7 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && - ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK) diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index b1ea532c5996..8757f738b0b1 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,6 +3,7 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ +#include <linux/compiler.h> #include <tools/le_byteshift.h> #include <sys/random.h> #include <sys/auxv.h> @@ -73,10 +74,10 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u counter[1] = s[13]; } -typedef uint8_t u8; -typedef uint32_t u32; -typedef uint64_t u64; -#include <vdso/getrandom.h> +void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) +{ + ksft_exit_skip("Not implemented on architecture\n"); +} int main(int argc, char *argv[]) { @@ -90,10 +91,8 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { - if (getrandom(key, sizeof(key), 0) != sizeof(key)) { - printf("getrandom() failed!\n"); - return KSFT_SKIP; - } + if (getrandom(key, sizeof(key), 0) != sizeof(key)) + ksft_exit_skip("getrandom() failed unexpectedly\n"); memset(counter1, 0, sizeof(counter1)); reference_chacha20_blocks(output1, key, counter1, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { @@ -102,8 +101,10 @@ int main(int argc, char *argv[]) if (split) __arch_chacha20_blocks_nostack(output2, key, counter2, split); __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Main loop outputs do not match on trial %u, split %u\n", trial, split); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Main loop counters do not match on trial %u, split %u\n", trial, split); } } memset(counter1, 0, sizeof(counter1)); @@ -113,14 +114,19 @@ int main(int argc, char *argv[]) reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after first round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after first round\n"); reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after second round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after second round\n"); ksft_test_result_pass("chacha: PASS\n"); - return KSFT_PASS; + ksft_exit_pass(); + return 0; } diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 72a1d9b43a84..95057f7567db 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -11,6 +11,7 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <sched.h> #include <signal.h> #include <sys/auxv.h> #include <sys/mman.h> @@ -40,6 +41,9 @@ } while (0) #endif +#define ksft_assert(condition) \ + do { if (!(condition)) ksft_exit_fail_msg("Assertion failed: %s\n", #condition); } while (0) + static struct { pthread_mutex_t lock; void **states; @@ -59,10 +63,12 @@ static void *vgetrandom_get_state(void) size_t page_size = getpagesize(); size_t new_cap; size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ + size_t state_size_aligned, cache_line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) ?: 1; void *new_block, *new_states; - alloc_size = (num * vgrnd.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); - num = (page_size / vgrnd.params.size_of_opaque_state) * (alloc_size / page_size); + state_size_aligned = (vgrnd.params.size_of_opaque_state + cache_line_size - 1) & (~(cache_line_size - 1)); + alloc_size = (num * state_size_aligned + page_size - 1) & (~(page_size - 1)); + num = (page_size / state_size_aligned) * (alloc_size / page_size); new_block = mmap(0, alloc_size, vgrnd.params.mmap_prot, vgrnd.params.mmap_flags, -1, 0); if (new_block == MAP_FAILED) goto out; @@ -78,7 +84,7 @@ static void *vgetrandom_get_state(void) if (((uintptr_t)new_block & (page_size - 1)) + vgrnd.params.size_of_opaque_state > page_size) new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); vgrnd.states[i] = new_block; - new_block += vgrnd.params.size_of_opaque_state; + new_block += state_size_aligned; } vgrnd.len = num; goto success; @@ -109,26 +115,19 @@ static void vgetrandom_init(void) const char *version = versions[VDSO_VERSION]; const char *name = names[VDSO_NAMES][6]; unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - size_t ret; + ssize_t ret; - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - exit(KSFT_SKIP); - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present\n"); vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name); - if (!vgrnd.fn) { - printf("%s is missing!\n", name); - exit(KSFT_FAIL); - } + if (!vgrnd.fn) + ksft_exit_skip("%s@%s symbol is missing from vDSO\n", name, version); ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL); - if (ret == -ENOSYS) { - printf("unsupported architecture\n"); - exit(KSFT_SKIP); - } else if (ret) { - printf("failed to fetch vgetrandom params!\n"); - exit(KSFT_FAIL); - } + if (ret == -ENOSYS) + ksft_exit_skip("CPU does not have runtime support\n"); + else if (ret) + ksft_exit_fail_msg("Failed to fetch vgetrandom params: %zd\n", ret); } static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) @@ -137,10 +136,7 @@ static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) if (!state) { state = vgetrandom_get_state(); - if (!state) { - printf("vgetrandom_get_state failed!\n"); - exit(KSFT_FAIL); - } + ksft_assert(state); } return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state); } @@ -152,7 +148,7 @@ static void *test_vdso_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = vgetrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -162,7 +158,7 @@ static void *test_libc_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = getrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -172,7 +168,7 @@ static void *test_syscall_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -207,7 +203,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -216,7 +212,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -225,7 +221,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -250,48 +246,46 @@ static void kselftest(void) for (size_t i = 0; i < 1000; ++i) { ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); - if (ret != sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_assert(ret == sizeof(weird_size)); } ksft_test_result_pass("getrandom: PASS\n"); unshare(CLONE_NEWUSER); - assert(unshare(CLONE_NEWTIME) == 0); + ksft_assert(unshare(CLONE_NEWTIME) == 0); child = fork(); - assert(child >= 0); + ksft_assert(child >= 0); if (!child) { vgetrandom_init(); child = getpid(); - assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); - assert(kill(child, SIGSTOP) == 0); - assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); + ksft_assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); + ksft_assert(kill(child, SIGSTOP) == 0); + ksft_assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); _exit(0); } for (;;) { struct ptrace_syscall_info info = { 0 }; int status, ret; - assert(waitpid(child, &status, 0) >= 0); + ksft_assert(waitpid(child, &status, 0) >= 0); if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) - exit(KSFT_FAIL); + ksft_assert(WEXITSTATUS(status) == 0); break; } - assert(WIFSTOPPED(status)); + ksft_assert(WIFSTOPPED(status)); if (WSTOPSIG(status) == SIGSTOP) - assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); + ksft_assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) { - assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); + ksft_assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom && info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_exit_fail_msg("vgetrandom passed buffer to syscall getrandom unexpectedly\n"); } - assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); + ksft_assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); } ksft_test_result_pass("getrandom timens: PASS\n"); - exit(KSFT_PASS); + ksft_exit_pass(); } static void usage(const char *argv0) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S new file mode 100644 index 000000000000..d6e09af7c0a9 --- /dev/null +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define __ASSEMBLY__ + +#if defined(__aarch64__) +#include "../../../../arch/arm64/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__loongarch__) +#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" +#elif defined(__powerpc__) || defined(__powerpc64__) +#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__s390x__) +#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" +#elif defined(__x86_64__) +#include "../../../../arch/x86/entry/vdso/vgetrandom-chacha.S" +#endif diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c index bc71cbca0dde..a1f506ba5578 100644 --- a/tools/testing/selftests/watchdog/watchdog-test.c +++ b/tools/testing/selftests/watchdog/watchdog-test.c @@ -334,7 +334,13 @@ int main(int argc, char *argv[]) printf("Watchdog Ticking Away!\n"); + /* + * Register the signals + */ signal(SIGINT, term); + signal(SIGTERM, term); + signal(SIGKILL, term); + signal(SIGQUIT, term); while (1) { keep_alive(); diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh index 405ff262ca93..55500f901fbc 100755 --- a/tools/testing/selftests/wireguard/netns.sh +++ b/tools/testing/selftests/wireguard/netns.sh @@ -332,6 +332,7 @@ waitiface $netns1 vethc waitiface $netns2 veths n0 bash -c 'printf 1 > /proc/sys/net/ipv4/ip_forward' +[[ -e /proc/sys/net/netfilter/nf_conntrack_udp_timeout ]] || modprobe nf_conntrack n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout' n0 bash -c 'printf 2 > /proc/sys/net/netfilter/nf_conntrack_udp_timeout_stream' n0 iptables -t nat -A POSTROUTING -s 192.168.1.0/24 -d 10.0.0.0/24 -j SNAT --to 10.0.0.1 diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 9d172210e2c6..139fd9aa8b12 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 17263696b5d8..66dbb362385f 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -96,10 +96,13 @@ void *kmem_cache_alloc_lru(struct kmem_cache *cachep, struct list_lru *lru, p = node; } else { pthread_mutex_unlock(&cachep->lock); - if (cachep->align) - posix_memalign(&p, cachep->align, cachep->size); - else + if (cachep->align) { + if (posix_memalign(&p, cachep->align, cachep->size) < 0) + return NULL; + } else { p = malloc(cachep->size); + } + if (cachep->ctor) cachep->ctor(p); else if (gfp & __GFP_ZERO) @@ -195,8 +198,9 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, } if (cachep->align) { - posix_memalign(&p[i], cachep->align, - cachep->size); + if (posix_memalign(&p[i], cachep->align, + cachep->size) < 0) + break; } else { p[i] = malloc(cachep->size); if (!p[i]) diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index a6bc51d0b0bf..923ee2492256 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -69,6 +69,7 @@ generated/bit-length.h: FORCE @if ! grep -qws CONFIG_$(LONG_BIT)BIT generated/bit-length.h; then \ echo "Generating $@"; \ echo "#define CONFIG_$(LONG_BIT)BIT 1" > $@; \ + echo "#define CONFIG_PHYS_ADDR_T_$(LONG_BIT)BIT 1" >> $@; \ fi FORCE: ; diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c index c53f220eb6cc..8fab5e13c7c3 100644 --- a/tools/testing/vma/vma.c +++ b/tools/testing/vma/vma.c @@ -4,6 +4,8 @@ #include <stdio.h> #include <stdlib.h> +#include "generated/bit-length.h" + #include "maple-shared.h" #include "vma_internal.h" @@ -1522,6 +1524,45 @@ static bool test_copy_vma(void) return true; } +static bool test_expand_only_mode(void) +{ + unsigned long flags = VM_READ | VM_WRITE | VM_MAYREAD | VM_MAYWRITE; + struct mm_struct mm = {}; + VMA_ITERATOR(vmi, &mm, 0); + struct vm_area_struct *vma_prev, *vma; + VMG_STATE(vmg, &mm, &vmi, 0x5000, 0x9000, flags, 5); + + /* + * Place a VMA prior to the one we're expanding so we assert that we do + * not erroneously try to traverse to the previous VMA even though we + * have, through the use of VMG_FLAG_JUST_EXPAND, indicated we do not + * need to do so. + */ + alloc_and_link_vma(&mm, 0, 0x2000, 0, flags); + + /* + * We will be positioned at the prev VMA, but looking to expand to + * 0x9000. + */ + vma_iter_set(&vmi, 0x3000); + vma_prev = alloc_and_link_vma(&mm, 0x3000, 0x5000, 3, flags); + vmg.prev = vma_prev; + vmg.merge_flags = VMG_FLAG_JUST_EXPAND; + + vma = vma_merge_new_range(&vmg); + ASSERT_NE(vma, NULL); + ASSERT_EQ(vma, vma_prev); + ASSERT_EQ(vmg.state, VMA_MERGE_SUCCESS); + ASSERT_EQ(vma->vm_start, 0x3000); + ASSERT_EQ(vma->vm_end, 0x9000); + ASSERT_EQ(vma->vm_pgoff, 3); + ASSERT_TRUE(vma_write_started(vma)); + ASSERT_EQ(vma_iter_addr(&vmi), 0x3000); + + cleanup_mm(&mm, &vmi); + return true; +} + int main(void) { int num_tests = 0, num_fail = 0; @@ -1553,6 +1594,7 @@ int main(void) TEST(vmi_prealloc_fail); TEST(merge_extend); TEST(copy_vma); + TEST(expand_only_mode); #undef TEST diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index c5b9da034511..e76ff579e1fd 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -44,7 +44,9 @@ #define VM_LOCKED 0x00002000 #define VM_IO 0x00004000 #define VM_DONTEXPAND 0x00040000 +#define VM_LOCKONFAULT 0x00080000 #define VM_ACCOUNT 0x00100000 +#define VM_NORESERVE 0x00200000 #define VM_MIXEDMAP 0x10000000 #define VM_STACK VM_GROWSDOWN #define VM_SHADOW_STACK VM_NONE @@ -53,6 +55,14 @@ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask represents all the VMA flag bits used by mlock */ +#define VM_LOCKED_MASK (VM_LOCKED | VM_LOCKONFAULT) + +#ifdef CONFIG_64BIT +/* VM is sealed, in vm_flags */ +#define VM_SEALED _BITUL(63) +#endif + #define FIRST_USER_ADDRESS 0UL #define USER_PGTABLES_CEILING 0UL @@ -698,8 +708,9 @@ static inline void tlb_finish_mmu(struct mmu_gather *) { } -static inline void get_file(struct file *) +static inline struct file *get_file(struct file *f) { + return f; } static inline int vma_dup_policy(struct vm_area_struct *, struct vm_area_struct *) @@ -920,4 +931,106 @@ static inline bool signal_pending(void *) return false; } +static inline bool is_file_hugepages(struct file *) +{ + return false; +} + +static inline int security_vm_enough_memory_mm(struct mm_struct *, long) +{ + return true; +} + +static inline bool may_expand_vm(struct mm_struct *, vm_flags_t, unsigned long) +{ + return true; +} + +static inline void vm_flags_init(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma->__vm_flags = flags; +} + +static inline void vm_flags_set(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags |= flags; +} + +static inline void vm_flags_clear(struct vm_area_struct *vma, + vm_flags_t flags) +{ + vma_start_write(vma); + vma->__vm_flags &= ~flags; +} + +static inline int call_mmap(struct file *, struct vm_area_struct *) +{ + return 0; +} + +static inline int shmem_zero_setup(struct vm_area_struct *) +{ + return 0; +} + +static inline void vma_set_anonymous(struct vm_area_struct *vma) +{ + vma->vm_ops = NULL; +} + +static inline void ksm_add_vma(struct vm_area_struct *) +{ +} + +static inline void perf_event_mmap(struct vm_area_struct *) +{ +} + +static inline bool vma_is_dax(struct vm_area_struct *) +{ + return false; +} + +static inline struct vm_area_struct *get_gate_vma(struct mm_struct *) +{ + return NULL; +} + +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); + +/* Update vma->vm_page_prot to reflect vma->vm_flags. */ +static inline void vma_set_page_prot(struct vm_area_struct *vma) +{ + unsigned long vm_flags = vma->vm_flags; + pgprot_t vm_page_prot; + + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vma->vm_page_prot, vm_get_page_prot(vm_flags)); + + if (vma_wants_writenotify(vma, vm_page_prot)) { + vm_flags &= ~VM_SHARED; + /* testing: we inline vm_pgprot_modify() to avoid clash with vma.h. */ + vm_page_prot = pgprot_modify(vm_page_prot, vm_get_page_prot(vm_flags)); + } + /* remove_protection_ptes reads vma->vm_page_prot without mmap_lock */ + WRITE_ONCE(vma->vm_page_prot, vm_page_prot); +} + +static inline bool arch_validate_flags(unsigned long) +{ + return true; +} + +static inline void vma_close(struct vm_area_struct *) +{ +} + +static inline int mmap_file(struct file *, struct vm_area_struct *) +{ + return 0; +} + #endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/thermal/lib/Makefile b/tools/thermal/lib/Makefile index 82db451935c5..f2552f73a64c 100644 --- a/tools/thermal/lib/Makefile +++ b/tools/thermal/lib/Makefile @@ -3,7 +3,7 @@ LIBTHERMAL_TOOLS_VERSION = 0 LIBTHERMAL_TOOLS_PATCHLEVEL = 0 -LIBTHERMAL_TOOLS_EXTRAVERSION = 1 +LIBTHERMAL_TOOLS_EXTRAVERSION = 2 MAKEFLAGS += --no-print-directory diff --git a/tools/thermal/thermal-engine/thermal-engine.c b/tools/thermal/thermal-engine/thermal-engine.c index 9b1476a2680f..0764dc754771 100644 --- a/tools/thermal/thermal-engine/thermal-engine.c +++ b/tools/thermal/thermal-engine/thermal-engine.c @@ -38,6 +38,14 @@ struct thermal_data { struct thermal_handler *th; }; +static int show_threshold(struct thermal_threshold *th, __maybe_unused void *arg) +{ + INFO("threshold temp=%d, direction=%d\n", + th->temperature, th->direction); + + return 0; +} + static int show_trip(struct thermal_trip *tt, __maybe_unused void *arg) { INFO("trip id=%d, type=%d, temp=%d, hyst=%d\n", @@ -70,6 +78,8 @@ static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) for_each_thermal_trip(tz->trip, show_trip, NULL); + for_each_thermal_threshold(tz->thresholds, show_threshold, NULL); + show_temp(tz, arg); show_governor(tz, arg); @@ -77,6 +87,30 @@ static int show_tz(struct thermal_zone *tz, __maybe_unused void *arg) return 0; } +static int set_threshold(struct thermal_zone *tz, __maybe_unused void *arg) +{ + struct thermal_handler *th = arg; + int thresholds[] = { 43000, 65000, 49000, 55000, 57000 }; + size_t i; + + INFO("Setting threshold for thermal zone '%s', id=%d\n", tz->name, tz->id); + + if (thermal_cmd_threshold_flush(th, tz)) { + ERROR("Failed to flush all previous thresholds\n"); + return -1; + } + + for (i = 0; i < sizeof(thresholds) / sizeof(thresholds[0]); i++) + if (thermal_cmd_threshold_add(th, tz, thresholds[i], + THERMAL_THRESHOLD_WAY_UP | + THERMAL_THRESHOLD_WAY_DOWN)) { + ERROR("Failed to set threshold\n"); + return -1; + } + + return 0; +} + static int tz_create(const char *name, int tz_id, __maybe_unused void *arg) { INFO("Thermal zone '%s'/%d created\n", name, tz_id); @@ -197,20 +231,62 @@ static int gov_change(int tz_id, const char *name, __maybe_unused void *arg) return 0; } +static int threshold_add(int tz_id, int temp, int direction, __maybe_unused void *arg) +{ + INFO("Threshold added tz_id=%d: temp=%d, direction=%d\n", tz_id, temp, direction); + + return 0; +} + +static int threshold_delete(int tz_id, int temp, int direction, __maybe_unused void *arg) +{ + INFO("Threshold deleted tz_id=%d: temp=%d, direction=%d\n", tz_id, temp, direction); + + return 0; +} + +static int threshold_flush(int tz_id, __maybe_unused void *arg) +{ + INFO("Thresholds flushed tz_id=%d\n", tz_id); + + return 0; +} + +static int threshold_up(int tz_id, int temp, int prev_temp, __maybe_unused void *arg) +{ + INFO("Threshold crossed way up tz_id=%d: temp=%d, prev_temp=%d\n", + tz_id, temp, prev_temp); + + return 0; +} + +static int threshold_down(int tz_id, int temp, int prev_temp, __maybe_unused void *arg) +{ + INFO("Threshold crossed way down tz_id=%d: temp=%d, prev_temp=%d\n", + tz_id, temp, prev_temp); + + return 0; +} + static struct thermal_ops ops = { - .events.tz_create = tz_create, - .events.tz_delete = tz_delete, - .events.tz_disable = tz_disable, - .events.tz_enable = tz_enable, - .events.trip_high = trip_high, - .events.trip_low = trip_low, - .events.trip_add = trip_add, - .events.trip_delete = trip_delete, - .events.trip_change = trip_change, - .events.cdev_add = cdev_add, - .events.cdev_delete = cdev_delete, - .events.cdev_update = cdev_update, - .events.gov_change = gov_change + .events.tz_create = tz_create, + .events.tz_delete = tz_delete, + .events.tz_disable = tz_disable, + .events.tz_enable = tz_enable, + .events.trip_high = trip_high, + .events.trip_low = trip_low, + .events.trip_add = trip_add, + .events.trip_delete = trip_delete, + .events.trip_change = trip_change, + .events.cdev_add = cdev_add, + .events.cdev_delete = cdev_delete, + .events.cdev_update = cdev_update, + .events.gov_change = gov_change, + .events.threshold_add = threshold_add, + .events.threshold_delete = threshold_delete, + .events.threshold_flush = threshold_flush, + .events.threshold_up = threshold_up, + .events.threshold_down = threshold_down, }; static int thermal_event(__maybe_unused int fd, __maybe_unused void *arg) @@ -280,6 +356,7 @@ enum { THERMAL_ENGINE_DAEMON_ERROR, THERMAL_ENGINE_LOG_ERROR, THERMAL_ENGINE_THERMAL_ERROR, + THERMAL_ENGINE_THRESHOLD_ERROR, THERMAL_ENGINE_MAINLOOP_ERROR, }; @@ -318,6 +395,8 @@ int main(int argc, char *argv[]) return THERMAL_ENGINE_THERMAL_ERROR; } + for_each_thermal_zone(td.tz, set_threshold, td.th); + for_each_thermal_zone(td.tz, show_tz, td.th); if (mainloop_init()) { diff --git a/tools/thermal/thermometer/thermometer.c b/tools/thermal/thermometer/thermometer.c index 1a87a0a77f9f..022865da8e3c 100644 --- a/tools/thermal/thermometer/thermometer.c +++ b/tools/thermal/thermometer/thermometer.c @@ -259,6 +259,7 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, { int fd; char tz_path[PATH_MAX]; + struct tz *tz; sprintf(tz_path, CLASS_THERMAL"/%s/temp", path); @@ -268,13 +269,13 @@ static int thermometer_add_tz(const char *path, const char *name, int polling, return -1; } - thermometer->tz = realloc(thermometer->tz, - sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); - if (!thermometer->tz) { + tz = realloc(thermometer->tz, sizeof(*thermometer->tz) * (thermometer->nr_tz + 1)); + if (!tz) { ERROR("Failed to allocate thermometer->tz\n"); return -1; } + thermometer->tz = tz; thermometer->tz[thermometer->nr_tz].fd_temp = fd; thermometer->tz[thermometer->nr_tz].name = strdup(name); thermometer->tz[thermometer->nr_tz].polling = polling; diff --git a/tools/tracing/rtla/Makefile b/tools/tracing/rtla/Makefile index b5878be36125..a6a7dee16622 100644 --- a/tools/tracing/rtla/Makefile +++ b/tools/tracing/rtla/Makefile @@ -32,8 +32,10 @@ DOCSRC := ../../../Documentation/tools/rtla/ FEATURE_TESTS := libtraceevent FEATURE_TESTS += libtracefs +FEATURE_TESTS += libcpupower FEATURE_DISPLAY := libtraceevent FEATURE_DISPLAY += libtracefs +FEATURE_DISPLAY += libcpupower ifeq ($(V),1) Q = diff --git a/tools/tracing/rtla/Makefile.config b/tools/tracing/rtla/Makefile.config index 5f8c286712d4..92a6e12e42d3 100644 --- a/tools/tracing/rtla/Makefile.config +++ b/tools/tracing/rtla/Makefile.config @@ -43,6 +43,16 @@ else $(info libtracefs is missing. Please install libtracefs-dev/libtracefs-devel) endif +$(call feature_check,libcpupower) +ifeq ($(feature-libcpupower), 1) + $(call detected,CONFIG_LIBCPUPOWER) + CFLAGS += -DHAVE_LIBCPUPOWER_SUPPORT + EXTLIBS += -lcpupower +else + $(info libcpupower is missing, building without --deepest-idle-state support.) + $(info Please install libcpupower-dev/kernel-tools-libs-devel) +endif + ifeq ($(STOP_ERROR),1) $(error Please, check the errors above.) endif diff --git a/tools/tracing/rtla/Makefile.rtla b/tools/tracing/rtla/Makefile.rtla index 3ff0b8970896..cc1d6b615475 100644 --- a/tools/tracing/rtla/Makefile.rtla +++ b/tools/tracing/rtla/Makefile.rtla @@ -38,7 +38,7 @@ BINDIR := /usr/bin .PHONY: install install: doc_install @$(MKDIR) -p $(DESTDIR)$(BINDIR) - $(call QUIET_INSTALL,rtla)$(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(call QUIET_INSTALL,rtla)$(INSTALL) $(RTLA) -m 755 $(DESTDIR)$(BINDIR) @$(STRIP) $(DESTDIR)$(BINDIR)/rtla @test ! -f $(DESTDIR)$(BINDIR)/osnoise || $(RM) $(DESTDIR)$(BINDIR)/osnoise @$(LN) rtla $(DESTDIR)$(BINDIR)/osnoise diff --git a/tools/tracing/rtla/README.txt b/tools/tracing/rtla/README.txt index 4af3fd40f171..dd5621038c55 100644 --- a/tools/tracing/rtla/README.txt +++ b/tools/tracing/rtla/README.txt @@ -11,6 +11,7 @@ RTLA depends on the following libraries and tools: - libtracefs - libtraceevent + - libcpupower (optional, for --deepest-idle-state) It also depends on python3-docutils to compile man pages. @@ -26,6 +27,9 @@ For development, we suggest the following steps for compiling rtla: $ make $ sudo make install $ cd .. + $ cd $libcpupower_src + $ make + $ sudo make install $ cd $rtla_src $ make $ sudo make install diff --git a/tools/tracing/rtla/sample/timerlat_load.py b/tools/tracing/rtla/sample/timerlat_load.py index 8cc5eb2d2e69..a819c3588073 100644 --- a/tools/tracing/rtla/sample/timerlat_load.py +++ b/tools/tracing/rtla/sample/timerlat_load.py @@ -25,50 +25,54 @@ import sys import os parser = argparse.ArgumentParser(description='user-space timerlat thread in Python') -parser.add_argument("cpu", help='CPU to run timerlat thread') -parser.add_argument("-p", "--prio", help='FIFO priority') - +parser.add_argument("cpu", type=int, help='CPU to run timerlat thread') +parser.add_argument("-p", "--prio", type=int, help='FIFO priority') args = parser.parse_args() try: - affinity_mask = { int(args.cpu) } -except: - print("Invalid cpu: " + args.cpu) - exit(1) - -try: - os.sched_setaffinity(0, affinity_mask); -except: - print("Error setting affinity") - exit(1) + affinity_mask = {args.cpu} + os.sched_setaffinity(0, affinity_mask) +except Exception as e: + print(f"Error setting affinity: {e}") + sys.exit(1) -if (args.prio): +if args.prio: try: - param = os.sched_param(int(args.prio)) + param = os.sched_param(args.prio) os.sched_setscheduler(0, os.SCHED_FIFO, param) - except: - print("Error setting priority") - exit(1) + except Exception as e: + print(f"Error setting priority: {e}") + sys.exit(1) try: - timerlat_path = "/sys/kernel/tracing/osnoise/per_cpu/cpu" + args.cpu + "/timerlat_fd" + timerlat_path = f"/sys/kernel/tracing/osnoise/per_cpu/cpu{args.cpu}/timerlat_fd" timerlat_fd = open(timerlat_path, 'r') -except: +except PermissionError: + print("Permission denied. Please check your access rights.") + sys.exit(1) +except OSError: print("Error opening timerlat fd, did you run timerlat -U?") - exit(1) + sys.exit(1) try: - data_fd = open("/dev/full", 'r'); -except: - print("Error opening data fd") + data_fd = open("/dev/full", 'r') +except Exception as e: + print(f"Error opening data fd: {e}") + sys.exit(1) while True: try: timerlat_fd.read(1) - data_fd.read(20*1024*1024) - except: + data_fd.read(20 * 1024 * 1024) + except KeyboardInterrupt: print("Leaving") break + except IOError as e: + print(f"I/O error occurred: {e}") + break + except Exception as e: + print(f"Unexpected error: {e}") + break timerlat_fd.close() data_fd.close() diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 2f756628613d..45647495ce3b 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -442,7 +442,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) case 'd': params->duration = parse_seconds_duration(optarg); if (!params->duration) - osnoise_top_usage(params, "Invalid -D duration\n"); + osnoise_top_usage(params, "Invalid -d duration\n"); break; case 'e': tevent = trace_event_alloc(optarg); @@ -627,7 +627,7 @@ osnoise_top_apply_config(struct osnoise_tool *tool, struct osnoise_top_params *p auto_house_keeping(¶ms->monitored_cpus); } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; diff --git a/tools/tracing/rtla/src/timerlat_hist.c b/tools/tracing/rtla/src/timerlat_hist.c index a3907c390d67..8b66387e5f35 100644 --- a/tools/tracing/rtla/src/timerlat_hist.c +++ b/tools/tracing/rtla/src/timerlat_hist.c @@ -55,6 +55,7 @@ struct timerlat_hist_params { int entries; int warmup; int buffer_size; + int deepest_idle_state; }; struct timerlat_hist_cpu { @@ -62,9 +63,9 @@ struct timerlat_hist_cpu { int *thread; int *user; - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long min_irq; unsigned long long sum_irq; @@ -304,15 +305,15 @@ timerlat_print_summary(struct timerlat_hist_params *params, continue; if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", data->hist[cpu].user_count); } trace_seq_printf(trace->seq, "\n"); @@ -488,15 +489,15 @@ timerlat_print_stats_all(struct timerlat_hist_params *params, trace_seq_printf(trace->seq, "count:"); if (!params->no_irq) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.irq_count); if (!params->no_thread) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.thread_count); if (params->user_hist) - trace_seq_printf(trace->seq, "%9d ", + trace_seq_printf(trace->seq, "%9llu ", sum.user_count); trace_seq_printf(trace->seq, "\n"); @@ -655,7 +656,7 @@ static void timerlat_hist_usage(char *usage) " [-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", " [-P priority] [-E N] [-b N] [--no-irq] [--no-thread] [--no-header] [--no-summary] \\", " [--no-index] [--with-zeros] [--dma-latency us] [-C[=cgroup_name]] [--no-aa] [--dump-task] [-u|-k]", - " [--warm-up s]", + " [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -695,6 +696,7 @@ static void timerlat_hist_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -732,6 +734,9 @@ static struct timerlat_hist_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; params->bucket_size = 1; @@ -772,13 +777,14 @@ static struct timerlat_hist_params {"dump-task", no_argument, 0, '\1'}, {"warm-up", required_argument, 0, '\2'}, {"trace-buffer-size", required_argument, 0, '\3'}, + {"deepest-idle-state", required_argument, 0, '\4'}, {0, 0, 0, 0} }; /* getopt_long stores the option index here. */ int option_index = 0; - c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3", + c = getopt_long(argc, argv, "a:c:C::b:d:e:E:DhH:i:knp:P:s:t::T:uU0123456:7:8:9\1\2:\3:", long_options, &option_index); /* detect the end of the options. */ @@ -960,6 +966,9 @@ static struct timerlat_hist_params case '\3': params->buffer_size = get_llong_from_str(optarg); break; + case '\4': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_hist_usage("Invalid option"); } @@ -1064,7 +1073,7 @@ timerlat_hist_apply_config(struct osnoise_tool *tool, struct timerlat_hist_param * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_hist) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -1152,6 +1161,7 @@ int timerlat_hist_main(int argc, char *argv[]) int return_value = 1; pthread_t timerlat_u; int retval; + int nr_cpus, i; params = timerlat_hist_parse_args(argc, argv); if (!params) @@ -1201,6 +1211,28 @@ int timerlat_hist_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1332,6 +1364,13 @@ out_hist: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1340,6 +1379,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(tool); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 8c16419fe22a..059b468981e4 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -48,15 +48,16 @@ struct timerlat_top_params { int pretty_output; int warmup; int buffer_size; + int deepest_idle_state; cpu_set_t hk_cpu_set; struct sched_attr sched_param; struct trace_events *events; }; struct timerlat_top_cpu { - int irq_count; - int thread_count; - int user_count; + unsigned long long irq_count; + unsigned long long thread_count; + unsigned long long user_count; unsigned long long cur_irq; unsigned long long min_irq; @@ -280,7 +281,7 @@ static void timerlat_top_print(struct osnoise_tool *top, int cpu) /* * Unless trace is being lost, IRQ counter is always the max. */ - trace_seq_printf(s, "%3d #%-9d |", cpu, cpu_data->irq_count); + trace_seq_printf(s, "%3d #%-9llu |", cpu, cpu_data->irq_count); if (!cpu_data->irq_count) { trace_seq_printf(s, "%s %s %s %s |", no_value, no_value, no_value, no_value); @@ -447,7 +448,7 @@ static void timerlat_top_usage(char *usage) "", " usage: rtla timerlat [top] [-h] [-q] [-a us] [-d s] [-D] [-n] [-p us] [-i us] [-T us] [-s us] \\", " [[-t[file]] [-e sys[:event]] [--filter <filter>] [--trigger <trigger>] [-c cpu-list] [-H cpu-list]\\", - " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s]", + " [-P priority] [--dma-latency us] [--aa-only us] [-C[=cgroup_name]] [-u|-k] [--warm-up s] [--deepest-idle-state n]", "", " -h/--help: print this menu", " -a/--auto: set automatic trace mode, stopping the session if argument in us latency is hit", @@ -459,7 +460,7 @@ static void timerlat_top_usage(char *usage) " -c/--cpus cpus: run the tracer only on the given cpus", " -H/--house-keeping cpus: run rtla control threads only on the given cpus", " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", - " -d/--duration time[m|h|d]: duration of the session in seconds", + " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", @@ -481,6 +482,7 @@ static void timerlat_top_usage(char *usage) " -U/--user-load: enable timerlat for user-defined user-space workload", " --warm-up s: let the workload run for s seconds before collecting data", " --trace-buffer-size kB: set the per-cpu trace buffer size in kB", + " --deepest-idle-state n: only go down to idle state n on cpus used by timerlat to reduce exit from idle latency", NULL, }; @@ -518,6 +520,9 @@ static struct timerlat_top_params /* disabled by default */ params->dma_latency = -1; + /* disabled by default */ + params->deepest_idle_state = -2; + /* display data in microseconds */ params->output_divisor = 1000; @@ -550,6 +555,7 @@ static struct timerlat_top_params {"aa-only", required_argument, 0, '5'}, {"warm-up", required_argument, 0, '6'}, {"trace-buffer-size", required_argument, 0, '7'}, + {"deepest-idle-state", required_argument, 0, '8'}, {0, 0, 0, 0} }; @@ -613,7 +619,7 @@ static struct timerlat_top_params case 'd': params->duration = parse_seconds_duration(optarg); if (!params->duration) - timerlat_top_usage("Invalid -D duration\n"); + timerlat_top_usage("Invalid -d duration\n"); break; case 'e': tevent = trace_event_alloc(optarg); @@ -726,6 +732,9 @@ static struct timerlat_top_params case '7': params->buffer_size = get_llong_from_str(optarg); break; + case '8': + params->deepest_idle_state = get_llong_from_str(optarg); + break; default: timerlat_top_usage("Invalid option"); } @@ -830,7 +839,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * * If the user did not specify a type of thread, try user-threads first. * Fall back to kernel threads otherwise. */ - if (!params->kernel_workload && !params->user_workload) { + if (!params->kernel_workload && !params->user_top) { retval = tracefs_file_exists(NULL, "osnoise/per_cpu/cpu0/timerlat_fd"); if (retval) { debug_msg("User-space interface detected, setting user-threads\n"); @@ -850,7 +859,7 @@ timerlat_top_apply_config(struct osnoise_tool *top, struct timerlat_top_params * } } - if (isatty(1) && !params->quiet) + if (isatty(STDOUT_FILENO) && !params->quiet) params->pretty_output = 1; return 0; @@ -922,6 +931,7 @@ int timerlat_top_main(int argc, char *argv[]) int return_value = 1; char *max_lat; int retval; + int nr_cpus, i; params = timerlat_top_parse_args(argc, argv); if (!params) @@ -971,6 +981,28 @@ int timerlat_top_main(int argc, char *argv[]) } } + if (params->deepest_idle_state >= -1) { + if (!have_libcpupower_support()) { + err_msg("rtla built without libcpupower, --deepest-idle-state is not supported\n"); + goto out_free; + } + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + if (save_cpu_idle_disable_state(i) < 0) { + err_msg("Could not save cpu idle state.\n"); + goto out_free; + } + if (set_deepest_cpu_idle_state(i, params->deepest_idle_state) < 0) { + err_msg("Could not set deepest cpu idle state.\n"); + goto out_free; + } + } + } + if (params->trace_output) { record = osnoise_init_trace_tool("timerlat"); if (!record) { @@ -1125,6 +1157,13 @@ out_top: timerlat_aa_destroy(); if (dma_latency_fd >= 0) close(dma_latency_fd); + if (params->deepest_idle_state >= -1) { + for (i = 0; i < nr_cpus; i++) { + if (params->cpus && !CPU_ISSET(i, ¶ms->monitored_cpus)) + continue; + restore_cpu_idle_disable_state(i); + } + } trace_events_destroy(&record->trace, params->events); params->events = NULL; out_free: @@ -1134,6 +1173,7 @@ out_free: osnoise_destroy_tool(record); osnoise_destroy_tool(top); free(params); + free_cpu_idle_disable_states(); out_exit: exit(return_value); } diff --git a/tools/tracing/rtla/src/utils.c b/tools/tracing/rtla/src/utils.c index 9ac71a66840c..4995d35cf3ec 100644 --- a/tools/tracing/rtla/src/utils.c +++ b/tools/tracing/rtla/src/utils.c @@ -4,6 +4,9 @@ */ #define _GNU_SOURCE +#ifdef HAVE_LIBCPUPOWER_SUPPORT +#include <cpuidle.h> +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ #include <dirent.h> #include <stdarg.h> #include <stdlib.h> @@ -211,29 +214,25 @@ long parse_ns_duration(char *val) /* * This is a set of helper functions to use SCHED_DEADLINE. */ -#ifdef __x86_64__ -# define __NR_sched_setattr 314 -# define __NR_sched_getattr 315 -#elif __i386__ -# define __NR_sched_setattr 351 -# define __NR_sched_getattr 352 -#elif __arm__ -# define __NR_sched_setattr 380 -# define __NR_sched_getattr 381 -#elif __aarch64__ || __riscv -# define __NR_sched_setattr 274 -# define __NR_sched_getattr 275 -#elif __powerpc__ -# define __NR_sched_setattr 355 -# define __NR_sched_getattr 356 -#elif __s390x__ -# define __NR_sched_setattr 345 -# define __NR_sched_getattr 346 +#ifndef __NR_sched_setattr +# ifdef __x86_64__ +# define __NR_sched_setattr 314 +# elif __i386__ +# define __NR_sched_setattr 351 +# elif __arm__ +# define __NR_sched_setattr 380 +# elif __aarch64__ || __riscv +# define __NR_sched_setattr 274 +# elif __powerpc__ +# define __NR_sched_setattr 355 +# elif __s390x__ +# define __NR_sched_setattr 345 +# endif #endif #define SCHED_DEADLINE 6 -static inline int sched_setattr(pid_t pid, const struct sched_attr *attr, +static inline int syscall_sched_setattr(pid_t pid, const struct sched_attr *attr, unsigned int flags) { return syscall(__NR_sched_setattr, pid, attr, flags); } @@ -243,7 +242,7 @@ int __set_sched_attr(int pid, struct sched_attr *attr) int flags = 0; int retval; - retval = sched_setattr(pid, attr, flags); + retval = syscall_sched_setattr(pid, attr, flags); if (retval < 0) { err_msg("Failed to set sched attributes to the pid %d: %s\n", pid, strerror(errno)); @@ -519,6 +518,153 @@ int set_cpu_dma_latency(int32_t latency) return fd; } +#ifdef HAVE_LIBCPUPOWER_SUPPORT +static unsigned int **saved_cpu_idle_disable_state; +static size_t saved_cpu_idle_disable_state_alloc_ctr; + +/* + * save_cpu_idle_state_disable - save disable for all idle states of a cpu + * + * Saves the current disable of all idle states of a cpu, to be subsequently + * restored via restore_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int save_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int nr_cpus; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (saved_cpu_idle_disable_state == NULL) { + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + saved_cpu_idle_disable_state = calloc(nr_cpus, sizeof(unsigned int *)); + if (!saved_cpu_idle_disable_state) + return -1; + } + + saved_cpu_idle_disable_state[cpu] = calloc(nr_states, sizeof(unsigned int)); + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + saved_cpu_idle_disable_state_alloc_ctr++; + + for (state = 0; state < nr_states; state++) { + disabled = cpuidle_is_state_disabled(cpu, state); + if (disabled < 0) + return disabled; + saved_cpu_idle_disable_state[cpu][state] = disabled; + } + + return nr_states; +} + +/* + * restore_cpu_idle_disable_state - restore disable for all idle states of a cpu + * + * Restores the current disable state of all idle states of a cpu that was + * previously saved by save_cpu_idle_disable_state. + * + * Return: idle state count on success, negative on error + */ +int restore_cpu_idle_disable_state(unsigned int cpu) +{ + unsigned int nr_states; + unsigned int state; + int disabled; + int result; + + nr_states = cpuidle_state_count(cpu); + + if (nr_states == 0) + return 0; + + if (!saved_cpu_idle_disable_state) + return -1; + + for (state = 0; state < nr_states; state++) { + if (!saved_cpu_idle_disable_state[cpu]) + return -1; + disabled = saved_cpu_idle_disable_state[cpu][state]; + result = cpuidle_state_disable(cpu, state, disabled); + if (result < 0) + return result; + } + + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + saved_cpu_idle_disable_state_alloc_ctr--; + if (saved_cpu_idle_disable_state_alloc_ctr == 0) { + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; + } + + return nr_states; +} + +/* + * free_cpu_idle_disable_states - free saved idle state disable for all cpus + * + * Frees the memory used for storing cpu idle state disable for all cpus + * and states. + * + * Normally, the memory is freed automatically in + * restore_cpu_idle_disable_state; this is mostly for cleaning up after an + * error. + */ +void free_cpu_idle_disable_states(void) +{ + int cpu; + int nr_cpus; + + if (!saved_cpu_idle_disable_state) + return; + + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + + for (cpu = 0; cpu < nr_cpus; cpu++) { + free(saved_cpu_idle_disable_state[cpu]); + saved_cpu_idle_disable_state[cpu] = NULL; + } + + free(saved_cpu_idle_disable_state); + saved_cpu_idle_disable_state = NULL; +} + +/* + * set_deepest_cpu_idle_state - limit idle state of cpu + * + * Disables all idle states deeper than the one given in + * deepest_state (assuming states with higher number are deeper). + * + * This is used to reduce the exit from idle latency. Unlike + * set_cpu_dma_latency, it can disable idle states per cpu. + * + * Return: idle state count on success, negative on error + */ +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int deepest_state) +{ + unsigned int nr_states; + unsigned int state; + int result; + + nr_states = cpuidle_state_count(cpu); + + for (state = deepest_state + 1; state < nr_states; state++) { + result = cpuidle_state_disable(cpu, state, 1); + if (result < 0) + return result; + } + + return nr_states; +} +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ + #define _STR(x) #x #define STR(x) _STR(x) diff --git a/tools/tracing/rtla/src/utils.h b/tools/tracing/rtla/src/utils.h index d44513e6c66a..101d4799a009 100644 --- a/tools/tracing/rtla/src/utils.h +++ b/tools/tracing/rtla/src/utils.h @@ -46,6 +46,7 @@ update_sum(unsigned long long *a, unsigned long long *b) *a += *b; } +#ifndef SCHED_ATTR_SIZE_VER0 struct sched_attr { uint32_t size; uint32_t sched_policy; @@ -56,6 +57,7 @@ struct sched_attr { uint64_t sched_deadline; uint64_t sched_period; }; +#endif /* SCHED_ATTR_SIZE_VER0 */ int parse_prio(char *arg, struct sched_attr *sched_param); int parse_cpu_set(char *cpu_list, cpu_set_t *set); @@ -64,6 +66,19 @@ int set_comm_sched_attr(const char *comm_prefix, struct sched_attr *attr); int set_comm_cgroup(const char *comm_prefix, const char *cgroup); int set_pid_cgroup(pid_t pid, const char *cgroup); int set_cpu_dma_latency(int32_t latency); +#ifdef HAVE_LIBCPUPOWER_SUPPORT +int save_cpu_idle_disable_state(unsigned int cpu); +int restore_cpu_idle_disable_state(unsigned int cpu); +void free_cpu_idle_disable_states(void); +int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state); +static inline int have_libcpupower_support(void) { return 1; } +#else +static inline int save_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline int restore_cpu_idle_disable_state(unsigned int cpu) { return -1; } +static inline void free_cpu_idle_disable_states(void) { } +static inline int set_deepest_cpu_idle_state(unsigned int cpu, unsigned int state) { return -1; } +static inline int have_libcpupower_support(void) { return 0; } +#endif /* HAVE_LIBCPUPOWER_SUPPORT */ int auto_house_keeping(cpu_set_t *monitored_cpus); #define ns_to_usf(x) (((double)x/1000)) diff --git a/tools/usb/usbip/src/usbip_detach.c b/tools/usb/usbip/src/usbip_detach.c index b29101986b5a..6b78d4a81e95 100644 --- a/tools/usb/usbip/src/usbip_detach.c +++ b/tools/usb/usbip/src/usbip_detach.c @@ -68,6 +68,7 @@ static int detach_port(char *port) } if (!found) { + ret = -1; err("Invalid port %s > maxports %d", port, vhci_driver->nports); goto call_driver_close; diff --git a/tools/verification/dot2/automata.py b/tools/verification/dot2/automata.py index baffeb960ff0..bdeb98baa8b0 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/dot2/automata.py @@ -29,11 +29,11 @@ class Automata: def __get_model_name(self): basename = ntpath.basename(self.__dot_path) - if basename.endswith(".dot") == False: + if not basename.endswith(".dot") and not basename.endswith(".gv"): print("not a dot file") raise Exception("not a dot file: %s" % self.__dot_path) - model_name = basename[0:-4] + model_name = ntpath.splitext(basename)[0] if model_name.__len__() == 0: raise Exception("not a dot file: %s" % self.__dot_path) @@ -68,9 +68,9 @@ class Automata: def __get_cursor_begin_events(self): cursor = 0 while self.__dot_lines[cursor].split()[0] != "{node": - cursor += 1 + cursor += 1 while self.__dot_lines[cursor].split()[0] == "{node": - cursor += 1 + cursor += 1 # skip initial state transition cursor += 1 return cursor @@ -94,11 +94,11 @@ class Automata: initial_state = state[7:] else: states.append(state) - if self.__dot_lines[cursor].__contains__("doublecircle") == True: + if "doublecircle" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True - if self.__dot_lines[cursor].__contains__("ellipse") == True: + if "ellipse" in self.__dot_lines[cursor]: final_states.append(state) has_final_states = True @@ -110,7 +110,7 @@ class Automata: # Insert the initial state at the bein og the states states.insert(0, initial_state) - if has_final_states == False: + if not has_final_states: final_states.append(initial_state) return states, initial_state, final_states @@ -120,7 +120,7 @@ class Automata: cursor = self.__get_cursor_begin_events() events = [] - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': # transitions have the format: # "all_fired" -> "both_fired" [ label = "disable_irq" ]; # ------------ event is here ------------^^^^^ @@ -161,7 +161,7 @@ class Automata: # and we are back! Let's fill the matrix cursor = self.__get_cursor_begin_events() - while self.__dot_lines[cursor][1] == '"': + while self.__dot_lines[cursor].lstrip()[0] == '"': if self.__dot_lines[cursor].split()[1] == "->": line = self.__dot_lines[cursor].split() origin_state = line[0].replace('"','').replace(',','_') diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index f04479ecc96c..f2bbc75a76f4 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -332,7 +332,7 @@ static void ikm_print_header(struct trace_seq *s) * ikm_event_handler - callback to handle event events * * Called any time a rv:"monitor"_event events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_event_handler(struct trace_seq *s, struct tep_record *record, @@ -384,7 +384,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record, * ikm_error_handler - callback to handle error events * * Called any time a rv:"monitor"_errors events is generated. - * It parses and print event. + * It parses and prints event. */ static int ikm_error_handler(struct trace_seq *s, struct tep_record *record, diff --git a/tools/verification/rv/src/trace.c b/tools/verification/rv/src/trace.c index 2c7deed47f8d..1b9f9bfa1893 100644 --- a/tools/verification/rv/src/trace.c +++ b/tools/verification/rv/src/trace.c @@ -81,7 +81,7 @@ void trace_instance_destroy(struct trace_instance *trace) } /** - * trace_instance_init - create an trace instance + * trace_instance_init - create a trace instance * * It is more than the tracefs instance, as it contains other * things required for the tracing, such as the local events and diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c index 43d3a6aa1dcf..b9591223437a 100644 --- a/tools/virtio/vringh_test.c +++ b/tools/virtio/vringh_test.c @@ -519,7 +519,7 @@ int main(int argc, char *argv[]) errx(1, "virtqueue_add_sgs: %i", err); __kmalloc_fake = NULL; - /* Host retreives it. */ + /* Host retrieves it. */ vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov)); vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov)); |