diff options
Diffstat (limited to 'tools')
146 files changed, 3867 insertions, 888 deletions
diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd-ibs.h new file mode 100644 index 000000000000..174e7d83fcbd --- /dev/null +++ b/tools/arch/x86/include/asm/amd-ibs.h @@ -0,0 +1,132 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * From PPR Vol 1 for AMD Family 19h Model 01h B1 + * 55898 Rev 0.35 - Feb 5, 2021 + */ + +#include "msr-index.h" + +/* + * IBS Hardware MSRs + */ + +/* MSR 0xc0011030: IBS Fetch Control */ +union ibs_fetch_ctl { + __u64 val; + struct { + __u64 fetch_maxcnt:16,/* 0-15: instruction fetch max. count */ + fetch_cnt:16, /* 16-31: instruction fetch count */ + fetch_lat:16, /* 32-47: instruction fetch latency */ + fetch_en:1, /* 48: instruction fetch enable */ + fetch_val:1, /* 49: instruction fetch valid */ + fetch_comp:1, /* 50: instruction fetch complete */ + ic_miss:1, /* 51: i-cache miss */ + phy_addr_valid:1,/* 52: physical address valid */ + l1tlb_pgsz:2, /* 53-54: i-cache L1TLB page size + * (needs IbsPhyAddrValid) */ + l1tlb_miss:1, /* 55: i-cache fetch missed in L1TLB */ + l2tlb_miss:1, /* 56: i-cache fetch missed in L2TLB */ + rand_en:1, /* 57: random tagging enable */ + fetch_l2_miss:1,/* 58: L2 miss for sampled fetch + * (needs IbsFetchComp) */ + reserved:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011033: IBS Execution Control */ +union ibs_op_ctl { + __u64 val; + struct { + __u64 opmaxcnt:16, /* 0-15: periodic op max. count */ + reserved0:1, /* 16: reserved */ + op_en:1, /* 17: op sampling enable */ + op_val:1, /* 18: op sample valid */ + cnt_ctl:1, /* 19: periodic op counter control */ + opmaxcnt_ext:7, /* 20-26: upper 7 bits of periodic op maximum count */ + reserved1:5, /* 27-31: reserved */ + opcurcnt:27, /* 32-58: periodic op counter current count */ + reserved2:5; /* 59-63: reserved */ + }; +}; + +/* MSR 0xc0011035: IBS Op Data 2 */ +union ibs_op_data { + __u64 val; + struct { + __u64 comp_to_ret_ctr:16, /* 0-15: op completion to retire count */ + tag_to_ret_ctr:16, /* 15-31: op tag to retire count */ + reserved1:2, /* 32-33: reserved */ + op_return:1, /* 34: return op */ + op_brn_taken:1, /* 35: taken branch op */ + op_brn_misp:1, /* 36: mispredicted branch op */ + op_brn_ret:1, /* 37: branch op retired */ + op_rip_invalid:1, /* 38: RIP is invalid */ + op_brn_fuse:1, /* 39: fused branch op */ + op_microcode:1, /* 40: microcode op */ + reserved2:23; /* 41-63: reserved */ + }; +}; + +/* MSR 0xc0011036: IBS Op Data 2 */ +union ibs_op_data2 { + __u64 val; + struct { + __u64 data_src:3, /* 0-2: data source */ + reserved0:1, /* 3: reserved */ + rmt_node:1, /* 4: destination node */ + cache_hit_st:1, /* 5: cache hit state */ + reserved1:57; /* 5-63: reserved */ + }; +}; + +/* MSR 0xc0011037: IBS Op Data 3 */ +union ibs_op_data3 { + __u64 val; + struct { + __u64 ld_op:1, /* 0: load op */ + st_op:1, /* 1: store op */ + dc_l1tlb_miss:1, /* 2: data cache L1TLB miss */ + dc_l2tlb_miss:1, /* 3: data cache L2TLB hit in 2M page */ + dc_l1tlb_hit_2m:1, /* 4: data cache L1TLB hit in 2M page */ + dc_l1tlb_hit_1g:1, /* 5: data cache L1TLB hit in 1G page */ + dc_l2tlb_hit_2m:1, /* 6: data cache L2TLB hit in 2M page */ + dc_miss:1, /* 7: data cache miss */ + dc_mis_acc:1, /* 8: misaligned access */ + reserved:4, /* 9-12: reserved */ + dc_wc_mem_acc:1, /* 13: write combining memory access */ + dc_uc_mem_acc:1, /* 14: uncacheable memory access */ + dc_locked_op:1, /* 15: locked operation */ + dc_miss_no_mab_alloc:1, /* 16: DC miss with no MAB allocated */ + dc_lin_addr_valid:1, /* 17: data cache linear address valid */ + dc_phy_addr_valid:1, /* 18: data cache physical address valid */ + dc_l2_tlb_hit_1g:1, /* 19: data cache L2 hit in 1GB page */ + l2_miss:1, /* 20: L2 cache miss */ + sw_pf:1, /* 21: software prefetch */ + op_mem_width:4, /* 22-25: load/store size in bytes */ + op_dc_miss_open_mem_reqs:6, /* 26-31: outstanding mem reqs on DC fill */ + dc_miss_lat:16, /* 32-47: data cache miss latency */ + tlb_refill_lat:16; /* 48-63: L1 TLB refill latency */ + }; +}; + +/* MSR 0xc001103c: IBS Fetch Control Extended */ +union ic_ibs_extd_ctl { + __u64 val; + struct { + __u64 itlb_refill_lat:16, /* 0-15: ITLB Refill latency for sampled fetch */ + reserved:48; /* 16-63: reserved */ + }; +}; + +/* + * IBS driver related + */ + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index a6c327f8ad9e..2ef1f6513c68 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -295,6 +295,7 @@ struct kvm_debug_exit_arch { #define KVM_GUESTDBG_USE_HW_BP 0x00020000 #define KVM_GUESTDBG_INJECT_DB 0x00040000 #define KVM_GUESTDBG_INJECT_BP 0x00080000 +#define KVM_GUESTDBG_BLOCKIRQ 0x00100000 /* for KVM_SET_GUEST_DEBUG */ struct kvm_guest_debug_arch { diff --git a/tools/arch/x86/include/asm/unistd_32.h b/tools/arch/x86/include/uapi/asm/unistd_32.h index 60a89dba01b6..60a89dba01b6 100644 --- a/tools/arch/x86/include/asm/unistd_32.h +++ b/tools/arch/x86/include/uapi/asm/unistd_32.h diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/uapi/asm/unistd_64.h index 4205ed4158bf..cb52a3a8b8fc 100644 --- a/tools/arch/x86/include/asm/unistd_64.h +++ b/tools/arch/x86/include/uapi/asm/unistd_64.h @@ -1,7 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __NR_userfaultfd -#define __NR_userfaultfd 282 -#endif #ifndef __NR_perf_event_open # define __NR_perf_event_open 298 #endif diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index c41f95815480..797699462cd8 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -37,10 +37,10 @@ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr) #define __get_next(t, insn) \ - ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, insn->next_byte, sizeof(t)); insn->next_byte += sizeof(t); leXX_to_cpu(t, r); }) #define __peek_nbyte_next(t, insn, n) \ - ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); }) + ({ t r; memcpy(&r, (insn)->next_byte + n, sizeof(t)); leXX_to_cpu(t, r); }) #define get_next(t, insn) \ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); }) diff --git a/tools/bootconfig/include/linux/memblock.h b/tools/bootconfig/include/linux/memblock.h index 7862f217d85d..f2e506f7d57f 100644 --- a/tools/bootconfig/include/linux/memblock.h +++ b/tools/bootconfig/include/linux/memblock.h @@ -4,9 +4,8 @@ #include <stdlib.h> -#define __pa(addr) (addr) #define SMP_CACHE_BYTES 0 #define memblock_alloc(size, align) malloc(size) -#define memblock_free(paddr, size) free(paddr) +#define memblock_free_ptr(paddr, size) free(paddr) #endif diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h index 95c072b70d0e..8816f06fc6c7 100644 --- a/tools/include/linux/compiler-gcc.h +++ b/tools/include/linux/compiler-gcc.h @@ -16,9 +16,9 @@ # define __fallthrough __attribute__ ((fallthrough)) #endif -#if GCC_VERSION >= 40300 +#if __has_attribute(__error__) # define __compiletime_error(message) __attribute__((error(message))) -#endif /* GCC_VERSION >= 40300 */ +#endif /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) @@ -38,7 +38,3 @@ #endif #define __printf(a, b) __attribute__((format(printf, a, b))) #define __scanf(a, b) __attribute__((format(scanf, a, b))) - -#if GCC_VERSION >= 50100 -#define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 -#endif diff --git a/tools/include/linux/overflow.h b/tools/include/linux/overflow.h index 8712ff70995f..dcb0c1bf6866 100644 --- a/tools/include/linux/overflow.h +++ b/tools/include/linux/overflow.h @@ -5,12 +5,9 @@ #include <linux/compiler.h> /* - * In the fallback code below, we need to compute the minimum and - * maximum values representable in a given type. These macros may also - * be useful elsewhere, so we provide them outside the - * COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW block. - * - * It would seem more obvious to do something like + * We need to compute the minimum and maximum values representable in a given + * type. These macros may also be useful elsewhere. It would seem more obvious + * to do something like: * * #define type_min(T) (T)(is_signed_type(T) ? (T)1 << (8*sizeof(T)-1) : 0) * #define type_max(T) (T)(is_signed_type(T) ? ((T)1 << (8*sizeof(T)-1)) - 1 : ~(T)0) @@ -36,8 +33,6 @@ #define type_max(T) ((T)((__type_half_max(T) - 1) + __type_half_max(T))) #define type_min(T) ((T)((T)-type_max(T)-(T)1)) - -#ifdef COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW /* * For simplicity and code hygiene, the fallback code below insists on * a, b and *d having the same type (similar to the min() and max() @@ -73,135 +68,6 @@ __builtin_mul_overflow(__a, __b, __d); \ }) -#else - - -/* Checking for unsigned overflow is relatively easy without causing UB. */ -#define __unsigned_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a + __b; \ - *__d < __a; \ -}) -#define __unsigned_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a - __b; \ - __a < __b; \ -}) -/* - * If one of a or b is a compile-time constant, this avoids a division. - */ -#define __unsigned_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = __a * __b; \ - __builtin_constant_p(__b) ? \ - __b > 0 && __a > type_max(typeof(__a)) / __b : \ - __a > 0 && __b > type_max(typeof(__b)) / __a; \ -}) - -/* - * For signed types, detecting overflow is much harder, especially if - * we want to avoid UB. But the interface of these macros is such that - * we must provide a result in *d, and in fact we must produce the - * result promised by gcc's builtins, which is simply the possibly - * wrapped-around value. Fortunately, we can just formally do the - * operations in the widest relevant unsigned type (u64) and then - * truncate the result - gcc is smart enough to generate the same code - * with and without the (u64) casts. - */ - -/* - * Adding two signed integers can overflow only if they have the same - * sign, and overflow has happened iff the result has the opposite - * sign. - */ -#define __signed_add_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a + (u64)__b; \ - (((~(__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Subtraction is similar, except that overflow can now happen only - * when the signs are opposite. In this case, overflow has happened if - * the result has the opposite sign of a. - */ -#define __signed_sub_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a - (u64)__b; \ - ((((__a ^ __b)) & (*__d ^ __a)) \ - & type_min(typeof(__a))) != 0; \ -}) - -/* - * Signed multiplication is rather hard. gcc always follows C99, so - * division is truncated towards 0. This means that we can write the - * overflow check like this: - * - * (a > 0 && (b > MAX/a || b < MIN/a)) || - * (a < -1 && (b > MIN/a || b < MAX/a) || - * (a == -1 && b == MIN) - * - * The redundant casts of -1 are to silence an annoying -Wtype-limits - * (included in -Wextra) warning: When the type is u8 or u16, the - * __b_c_e in check_mul_overflow obviously selects - * __unsigned_mul_overflow, but unfortunately gcc still parses this - * code and warns about the limited range of __b. - */ - -#define __signed_mul_overflow(a, b, d) ({ \ - typeof(a) __a = (a); \ - typeof(b) __b = (b); \ - typeof(d) __d = (d); \ - typeof(a) __tmax = type_max(typeof(a)); \ - typeof(a) __tmin = type_min(typeof(a)); \ - (void) (&__a == &__b); \ - (void) (&__a == __d); \ - *__d = (u64)__a * (u64)__b; \ - (__b > 0 && (__a > __tmax/__b || __a < __tmin/__b)) || \ - (__b < (typeof(__b))-1 && (__a > __tmin/__b || __a < __tmax/__b)) || \ - (__b == (typeof(__b))-1 && __a == __tmin); \ -}) - - -#define check_add_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_add_overflow(a, b, d), \ - __unsigned_add_overflow(a, b, d)) - -#define check_sub_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_sub_overflow(a, b, d), \ - __unsigned_sub_overflow(a, b, d)) - -#define check_mul_overflow(a, b, d) \ - __builtin_choose_expr(is_signed_type(typeof(a)), \ - __signed_mul_overflow(a, b, d), \ - __unsigned_mul_overflow(a, b, d)) - - -#endif /* COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW */ - /** * array_size() - Calculate size of 2-dimensional array. * diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a9d6fcd95f42..1c5fb86d455a 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -673,15 +673,15 @@ __SYSCALL(__NR_madvise, sys_madvise) #define __NR_remap_file_pages 234 __SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) #define __NR_mbind 235 -__SC_COMP(__NR_mbind, sys_mbind, compat_sys_mbind) +__SYSCALL(__NR_mbind, sys_mbind) #define __NR_get_mempolicy 236 -__SC_COMP(__NR_get_mempolicy, sys_get_mempolicy, compat_sys_get_mempolicy) +__SYSCALL(__NR_get_mempolicy, sys_get_mempolicy) #define __NR_set_mempolicy 237 -__SC_COMP(__NR_set_mempolicy, sys_set_mempolicy, compat_sys_set_mempolicy) +__SYSCALL(__NR_set_mempolicy, sys_set_mempolicy) #define __NR_migrate_pages 238 -__SC_COMP(__NR_migrate_pages, sys_migrate_pages, compat_sys_migrate_pages) +__SYSCALL(__NR_migrate_pages, sys_migrate_pages) #define __NR_move_pages 239 -__SC_COMP(__NR_move_pages, sys_move_pages, compat_sys_move_pages) +__SYSCALL(__NR_move_pages, sys_move_pages) #endif #define __NR_rt_tgsigqueueinfo 240 @@ -877,9 +877,11 @@ __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self) #define __NR_memfd_secret 447 __SYSCALL(__NR_memfd_secret, sys_memfd_secret) #endif +#define __NR_process_mrelease 448 +__SYSCALL(__NR_process_mrelease, sys_process_mrelease) #undef __NR_syscalls -#define __NR_syscalls 448 +#define __NR_syscalls 449 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index d043752a74cf..3b810b53ba8b 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -635,8 +635,8 @@ struct drm_gem_open { /** * DRM_CAP_VBLANK_HIGH_CRTC * - * If set to 1, the kernel supports specifying a CRTC index in the high bits of - * &drm_wait_vblank_request.type. + * If set to 1, the kernel supports specifying a :ref:`CRTC index<crtc_index>` + * in the high bits of &drm_wait_vblank_request.type. * * Starting kernel version 2.6.39, this capability is always set to 1. */ @@ -1050,6 +1050,16 @@ extern "C" { #define DRM_IOCTL_MODE_GETPROPBLOB DRM_IOWR(0xAC, struct drm_mode_get_blob) #define DRM_IOCTL_MODE_GETFB DRM_IOWR(0xAD, struct drm_mode_fb_cmd) #define DRM_IOCTL_MODE_ADDFB DRM_IOWR(0xAE, struct drm_mode_fb_cmd) +/** + * DRM_IOCTL_MODE_RMFB - Remove a framebuffer. + * + * This removes a framebuffer previously added via ADDFB/ADDFB2. The IOCTL + * argument is a framebuffer object ID. + * + * Warning: removing a framebuffer currently in-use on an enabled plane will + * disable that plane. The CRTC the plane is linked to may also be disabled + * (depending on driver capabilities). + */ #define DRM_IOCTL_MODE_RMFB DRM_IOWR(0xAF, unsigned int) #define DRM_IOCTL_MODE_PAGE_FLIP DRM_IOWR(0xB0, struct drm_mode_crtc_page_flip) #define DRM_IOCTL_MODE_DIRTYFB DRM_IOWR(0xB1, struct drm_mode_fb_dirty_cmd) diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index c2c7759b7d2e..bde5860b3686 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -572,6 +572,15 @@ typedef struct drm_i915_irq_wait { #define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) #define I915_SCHEDULER_CAP_SEMAPHORES (1ul << 3) #define I915_SCHEDULER_CAP_ENGINE_BUSY_STATS (1ul << 4) +/* + * Indicates the 2k user priority levels are statically mapped into 3 buckets as + * follows: + * + * -1k to -1 Low priority + * 0 Normal priority + * 1 to 1k Highest priority + */ +#define I915_SCHEDULER_CAP_STATIC_PRIORITY_MAP (1ul << 5) #define I915_PARAM_HUC_STATUS 42 @@ -674,6 +683,9 @@ typedef struct drm_i915_irq_wait { */ #define I915_PARAM_HAS_EXEC_TIMELINE_FENCES 55 +/* Query if the kernel supports the I915_USERPTR_PROBE flag. */ +#define I915_PARAM_HAS_USERPTR_PROBE 56 + /* Must be kept compact -- no holes and well documented */ typedef struct drm_i915_getparam { @@ -849,45 +861,113 @@ struct drm_i915_gem_mmap_gtt { __u64 offset; }; +/** + * struct drm_i915_gem_mmap_offset - Retrieve an offset so we can mmap this buffer object. + * + * This struct is passed as argument to the `DRM_IOCTL_I915_GEM_MMAP_OFFSET` ioctl, + * and is used to retrieve the fake offset to mmap an object specified by &handle. + * + * The legacy way of using `DRM_IOCTL_I915_GEM_MMAP` is removed on gen12+. + * `DRM_IOCTL_I915_GEM_MMAP_GTT` is an older supported alias to this struct, but will behave + * as setting the &extensions to 0, and &flags to `I915_MMAP_OFFSET_GTT`. + */ struct drm_i915_gem_mmap_offset { - /** Handle for the object being mapped. */ + /** @handle: Handle for the object being mapped. */ __u32 handle; + /** @pad: Must be zero */ __u32 pad; /** - * Fake offset to use for subsequent mmap call + * @offset: The fake offset to use for subsequent mmap call * * This is a fixed-size type for 32/64 compatibility. */ __u64 offset; /** - * Flags for extended behaviour. + * @flags: Flags for extended behaviour. + * + * It is mandatory that one of the `MMAP_OFFSET` types + * should be included: * - * It is mandatory that one of the MMAP_OFFSET types - * (GTT, WC, WB, UC, etc) should be included. + * - `I915_MMAP_OFFSET_GTT`: Use mmap with the object bound to GTT. (Write-Combined) + * - `I915_MMAP_OFFSET_WC`: Use Write-Combined caching. + * - `I915_MMAP_OFFSET_WB`: Use Write-Back caching. + * - `I915_MMAP_OFFSET_FIXED`: Use object placement to determine caching. + * + * On devices with local memory `I915_MMAP_OFFSET_FIXED` is the only valid + * type. On devices without local memory, this caching mode is invalid. + * + * As caching mode when specifying `I915_MMAP_OFFSET_FIXED`, WC or WB will + * be used, depending on the object placement on creation. WB will be used + * when the object can only exist in system memory, WC otherwise. */ __u64 flags; -#define I915_MMAP_OFFSET_GTT 0 -#define I915_MMAP_OFFSET_WC 1 -#define I915_MMAP_OFFSET_WB 2 -#define I915_MMAP_OFFSET_UC 3 - /* - * Zero-terminated chain of extensions. +#define I915_MMAP_OFFSET_GTT 0 +#define I915_MMAP_OFFSET_WC 1 +#define I915_MMAP_OFFSET_WB 2 +#define I915_MMAP_OFFSET_UC 3 +#define I915_MMAP_OFFSET_FIXED 4 + + /** + * @extensions: Zero-terminated chain of extensions. * * No current extensions defined; mbz. */ __u64 extensions; }; +/** + * struct drm_i915_gem_set_domain - Adjust the objects write or read domain, in + * preparation for accessing the pages via some CPU domain. + * + * Specifying a new write or read domain will flush the object out of the + * previous domain(if required), before then updating the objects domain + * tracking with the new domain. + * + * Note this might involve waiting for the object first if it is still active on + * the GPU. + * + * Supported values for @read_domains and @write_domain: + * + * - I915_GEM_DOMAIN_WC: Uncached write-combined domain + * - I915_GEM_DOMAIN_CPU: CPU cache domain + * - I915_GEM_DOMAIN_GTT: Mappable aperture domain + * + * All other domains are rejected. + * + * Note that for discrete, starting from DG1, this is no longer supported, and + * is instead rejected. On such platforms the CPU domain is effectively static, + * where we also only support a single &drm_i915_gem_mmap_offset cache mode, + * which can't be set explicitly and instead depends on the object placements, + * as per the below. + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + */ struct drm_i915_gem_set_domain { - /** Handle for the object */ + /** @handle: Handle for the object. */ __u32 handle; - /** New read domains */ + /** @read_domains: New read domains. */ __u32 read_domains; - /** New write domain */ + /** + * @write_domain: New write domain. + * + * Note that having something in the write domain implies it's in the + * read domain, and only that read domain. + */ __u32 write_domain; }; @@ -1348,12 +1428,11 @@ struct drm_i915_gem_busy { * reading from the object simultaneously. * * The value of each engine class is the same as specified in the - * I915_CONTEXT_SET_ENGINES parameter and via perf, i.e. + * I915_CONTEXT_PARAM_ENGINES context parameter and via perf, i.e. * I915_ENGINE_CLASS_RENDER, I915_ENGINE_CLASS_COPY, etc. - * reported as active itself. Some hardware may have parallel - * execution engines, e.g. multiple media engines, which are - * mapped to the same class identifier and so are not separately - * reported for busyness. + * Some hardware may have parallel execution engines, e.g. multiple + * media engines, which are mapped to the same class identifier and so + * are not separately reported for busyness. * * Caveat emptor: * Only the boolean result of this query is reliable; that is whether @@ -1364,43 +1443,79 @@ struct drm_i915_gem_busy { }; /** - * I915_CACHING_NONE - * - * GPU access is not coherent with cpu caches. Default for machines without an - * LLC. - */ -#define I915_CACHING_NONE 0 -/** - * I915_CACHING_CACHED - * - * GPU access is coherent with cpu caches and furthermore the data is cached in - * last-level caches shared between cpu cores and the gpu GT. Default on - * machines with HAS_LLC. + * struct drm_i915_gem_caching - Set or get the caching for given object + * handle. + * + * Allow userspace to control the GTT caching bits for a given object when the + * object is later mapped through the ppGTT(or GGTT on older platforms lacking + * ppGTT support, or if the object is used for scanout). Note that this might + * require unbinding the object from the GTT first, if its current caching value + * doesn't match. + * + * Note that this all changes on discrete platforms, starting from DG1, the + * set/get caching is no longer supported, and is now rejected. Instead the CPU + * caching attributes(WB vs WC) will become an immutable creation time property + * for the object, along with the GTT caching level. For now we don't expose any + * new uAPI for this, instead on DG1 this is all implicit, although this largely + * shouldn't matter since DG1 is coherent by default(without any way of + * controlling it). + * + * Implicit caching rules, starting from DG1: + * + * - If any of the object placements (see &drm_i915_gem_create_ext_memory_regions) + * contain I915_MEMORY_CLASS_DEVICE then the object will be allocated and + * mapped as write-combined only. + * + * - Everything else is always allocated and mapped as write-back, with the + * guarantee that everything is also coherent with the GPU. + * + * Note that this is likely to change in the future again, where we might need + * more flexibility on future devices, so making this all explicit as part of a + * new &drm_i915_gem_create_ext extension is probable. + * + * Side note: Part of the reason for this is that changing the at-allocation-time CPU + * caching attributes for the pages might be required(and is expensive) if we + * need to then CPU map the pages later with different caching attributes. This + * inconsistent caching behaviour, while supported on x86, is not universally + * supported on other architectures. So for simplicity we opt for setting + * everything at creation time, whilst also making it immutable, on discrete + * platforms. */ -#define I915_CACHING_CACHED 1 -/** - * I915_CACHING_DISPLAY - * - * Special GPU caching mode which is coherent with the scanout engines. - * Transparently falls back to I915_CACHING_NONE on platforms where no special - * cache mode (like write-through or gfdt flushing) is available. The kernel - * automatically sets this mode when using a buffer as a scanout target. - * Userspace can manually set this mode to avoid a costly stall and clflush in - * the hotpath of drawing the first frame. - */ -#define I915_CACHING_DISPLAY 2 - struct drm_i915_gem_caching { /** - * Handle of the buffer to set/get the caching level of. */ + * @handle: Handle of the buffer to set/get the caching level. + */ __u32 handle; /** - * Cacheing level to apply or return value + * @caching: The GTT caching level to apply or possible return value. + * + * The supported @caching values: * - * bits0-15 are for generic caching control (i.e. the above defined - * values). bits16-31 are reserved for platform-specific variations - * (e.g. l3$ caching on gen7). */ + * I915_CACHING_NONE: + * + * GPU access is not coherent with CPU caches. Default for machines + * without an LLC. This means manual flushing might be needed, if we + * want GPU access to be coherent. + * + * I915_CACHING_CACHED: + * + * GPU access is coherent with CPU caches and furthermore the data is + * cached in last-level caches shared between CPU cores and the GPU GT. + * + * I915_CACHING_DISPLAY: + * + * Special GPU caching mode which is coherent with the scanout engines. + * Transparently falls back to I915_CACHING_NONE on platforms where no + * special cache mode (like write-through or gfdt flushing) is + * available. The kernel automatically sets this mode when using a + * buffer as a scanout target. Userspace can manually set this mode to + * avoid a costly stall and clflush in the hotpath of drawing the first + * frame. + */ +#define I915_CACHING_NONE 0 +#define I915_CACHING_CACHED 1 +#define I915_CACHING_DISPLAY 2 __u32 caching; }; @@ -1639,6 +1754,10 @@ struct drm_i915_gem_context_param { __u32 size; __u64 param; #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 +/* I915_CONTEXT_PARAM_NO_ZEROMAP has been removed. On the off chance + * someone somewhere has attempted to use it, never re-use this context + * param number. + */ #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 #define I915_CONTEXT_PARAM_NO_ERROR_CAPTURE 0x4 @@ -1723,24 +1842,8 @@ struct drm_i915_gem_context_param { */ #define I915_CONTEXT_PARAM_PERSISTENCE 0xb -/* - * I915_CONTEXT_PARAM_RINGSIZE: - * - * Sets the size of the CS ringbuffer to use for logical ring contexts. This - * applies a limit of how many batches can be queued to HW before the caller - * is blocked due to lack of space for more commands. - * - * Only reliably possible to be set prior to first use, i.e. during - * construction. At any later point, the current execution must be flushed as - * the ring can only be changed while the context is idle. Note, the ringsize - * can be specified as a constructor property, see - * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required. - * - * Only applies to the current set of engine and lost when those engines - * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES). - * - * Must be between 4 - 512 KiB, in intervals of page size [4 KiB]. - * Default is 16 KiB. +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this context param number. */ #define I915_CONTEXT_PARAM_RINGSIZE 0xc /* Must be kept compact -- no holes and well documented */ @@ -1807,6 +1910,69 @@ struct drm_i915_gem_context_param_sseu { __u32 rsvd; }; +/** + * DOC: Virtual Engine uAPI + * + * Virtual engine is a concept where userspace is able to configure a set of + * physical engines, submit a batch buffer, and let the driver execute it on any + * engine from the set as it sees fit. + * + * This is primarily useful on parts which have multiple instances of a same + * class engine, like for example GT3+ Skylake parts with their two VCS engines. + * + * For instance userspace can enumerate all engines of a certain class using the + * previously described `Engine Discovery uAPI`_. After that userspace can + * create a GEM context with a placeholder slot for the virtual engine (using + * `I915_ENGINE_CLASS_INVALID` and `I915_ENGINE_CLASS_INVALID_NONE` for class + * and instance respectively) and finally using the + * `I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE` extension place a virtual engine in + * the same reserved slot. + * + * Example of creating a virtual engine and submitting a batch buffer to it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_ENGINES_LOAD_BALANCE(virtual, 2) = { + * .base.name = I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE, + * .engine_index = 0, // Place this virtual engine into engine map slot 0 + * .num_siblings = 2, + * .engines = { { I915_ENGINE_CLASS_VIDEO, 0 }, + * { I915_ENGINE_CLASS_VIDEO, 1 }, }, + * }; + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 1) = { + * .engines = { { I915_ENGINE_CLASS_INVALID, + * I915_ENGINE_CLASS_INVALID_NONE } }, + * .extensions = to_user_pointer(&virtual), // Chains after load_balance extension + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // Now we have created a GEM context with its engine map containing a + * // single virtual engine. Submissions to this slot can go either to + * // vcs0 or vcs1, depending on the load balancing algorithm used inside + * // the driver. The load balancing is dynamic from one batch buffer to + * // another and transparent to userspace. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0 which is the virtual engine + * gem_execbuf(drm_fd, &execbuf); + */ + /* * i915_context_engines_load_balance: * @@ -1883,6 +2049,61 @@ struct i915_context_engines_bond { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +/** + * DOC: Context Engine Map uAPI + * + * Context engine map is a new way of addressing engines when submitting batch- + * buffers, replacing the existing way of using identifiers like `I915_EXEC_BLT` + * inside the flags field of `struct drm_i915_gem_execbuffer2`. + * + * To use it created GEM contexts need to be configured with a list of engines + * the user is intending to submit to. This is accomplished using the + * `I915_CONTEXT_PARAM_ENGINES` parameter and `struct + * i915_context_param_engines`. + * + * For such contexts the `I915_EXEC_RING_MASK` field becomes an index into the + * configured map. + * + * Example of creating such context and submitting against it: + * + * .. code-block:: C + * + * I915_DEFINE_CONTEXT_PARAM_ENGINES(engines, 2) = { + * .engines = { { I915_ENGINE_CLASS_RENDER, 0 }, + * { I915_ENGINE_CLASS_COPY, 0 } } + * }; + * struct drm_i915_gem_context_create_ext_setparam p_engines = { + * .base = { + * .name = I915_CONTEXT_CREATE_EXT_SETPARAM, + * }, + * .param = { + * .param = I915_CONTEXT_PARAM_ENGINES, + * .value = to_user_pointer(&engines), + * .size = sizeof(engines), + * }, + * }; + * struct drm_i915_gem_context_create_ext create = { + * .flags = I915_CONTEXT_CREATE_FLAGS_USE_EXTENSIONS, + * .extensions = to_user_pointer(&p_engines); + * }; + * + * ctx_id = gem_context_create_ext(drm_fd, &create); + * + * // We have now created a GEM context with two engines in the map: + * // Index 0 points to rcs0 while index 1 points to bcs0. Other engines + * // will not be accessible from this context. + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 0; // Submits to index 0, which is rcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + * + * ... + * execbuf.rsvd1 = ctx_id; + * execbuf.flags = 1; // Submits to index 0, which is bcs0 for this context + * gem_execbuf(drm_fd, &execbuf); + */ + struct i915_context_param_engines { __u64 extensions; /* linked chain of extension blocks, 0 terminates */ #define I915_CONTEXT_ENGINES_EXT_LOAD_BALANCE 0 /* see i915_context_engines_load_balance */ @@ -1901,20 +2122,10 @@ struct drm_i915_gem_context_create_ext_setparam { struct drm_i915_gem_context_param param; }; -struct drm_i915_gem_context_create_ext_clone { +/* This API has been removed. On the off chance someone somewhere has + * attempted to use it, never re-use this extension number. + */ #define I915_CONTEXT_CREATE_EXT_CLONE 1 - struct i915_user_extension base; - __u32 clone_id; - __u32 flags; -#define I915_CONTEXT_CLONE_ENGINES (1u << 0) -#define I915_CONTEXT_CLONE_FLAGS (1u << 1) -#define I915_CONTEXT_CLONE_SCHEDATTR (1u << 2) -#define I915_CONTEXT_CLONE_SSEU (1u << 3) -#define I915_CONTEXT_CLONE_TIMELINE (1u << 4) -#define I915_CONTEXT_CLONE_VM (1u << 5) -#define I915_CONTEXT_CLONE_UNKNOWN -(I915_CONTEXT_CLONE_VM << 1) - __u64 rsvd; -}; struct drm_i915_gem_context_destroy { __u32 ctx_id; @@ -1986,14 +2197,69 @@ struct drm_i915_reset_stats { __u32 pad; }; +/** + * struct drm_i915_gem_userptr - Create GEM object from user allocated memory. + * + * Userptr objects have several restrictions on what ioctls can be used with the + * object handle. + */ struct drm_i915_gem_userptr { + /** + * @user_ptr: The pointer to the allocated memory. + * + * Needs to be aligned to PAGE_SIZE. + */ __u64 user_ptr; + + /** + * @user_size: + * + * The size in bytes for the allocated memory. This will also become the + * object size. + * + * Needs to be aligned to PAGE_SIZE, and should be at least PAGE_SIZE, + * or larger. + */ __u64 user_size; + + /** + * @flags: + * + * Supported flags: + * + * I915_USERPTR_READ_ONLY: + * + * Mark the object as readonly, this also means GPU access can only be + * readonly. This is only supported on HW which supports readonly access + * through the GTT. If the HW can't support readonly access, an error is + * returned. + * + * I915_USERPTR_PROBE: + * + * Probe the provided @user_ptr range and validate that the @user_ptr is + * indeed pointing to normal memory and that the range is also valid. + * For example if some garbage address is given to the kernel, then this + * should complain. + * + * Returns -EFAULT if the probe failed. + * + * Note that this doesn't populate the backing pages, and also doesn't + * guarantee that the object will remain valid when the object is + * eventually used. + * + * The kernel supports this feature if I915_PARAM_HAS_USERPTR_PROBE + * returns a non-zero value. + * + * I915_USERPTR_UNSYNCHRONIZED: + * + * NOT USED. Setting this flag will result in an error. + */ __u32 flags; #define I915_USERPTR_READ_ONLY 0x1 +#define I915_USERPTR_PROBE 0x2 #define I915_USERPTR_UNSYNCHRONIZED 0x80000000 /** - * Returned handle for the object. + * @handle: Returned handle for the object. * * Object handles are nonzero. */ @@ -2377,6 +2643,76 @@ struct drm_i915_query_topology_info { }; /** + * DOC: Engine Discovery uAPI + * + * Engine discovery uAPI is a way of enumerating physical engines present in a + * GPU associated with an open i915 DRM file descriptor. This supersedes the old + * way of using `DRM_IOCTL_I915_GETPARAM` and engine identifiers like + * `I915_PARAM_HAS_BLT`. + * + * The need for this interface came starting with Icelake and newer GPUs, which + * started to establish a pattern of having multiple engines of a same class, + * where not all instances were always completely functionally equivalent. + * + * Entry point for this uapi is `DRM_IOCTL_I915_QUERY` with the + * `DRM_I915_QUERY_ENGINE_INFO` as the queried item id. + * + * Example for getting the list of engines: + * + * .. code-block:: C + * + * struct drm_i915_query_engine_info *info; + * struct drm_i915_query_item item = { + * .query_id = DRM_I915_QUERY_ENGINE_INFO; + * }; + * struct drm_i915_query query = { + * .num_items = 1, + * .items_ptr = (uintptr_t)&item, + * }; + * int err, i; + * + * // First query the size of the blob we need, this needs to be large + * // enough to hold our array of engines. The kernel will fill out the + * // item.length for us, which is the number of bytes we need. + * // + * // Alternatively a large buffer can be allocated straight away enabling + * // querying in one pass, in which case item.length should contain the + * // length of the provided buffer. + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * info = calloc(1, item.length); + * // Now that we allocated the required number of bytes, we call the ioctl + * // again, this time with the data_ptr pointing to our newly allocated + * // blob, which the kernel can then populate with info on all engines. + * item.data_ptr = (uintptr_t)&info, + * + * err = ioctl(fd, DRM_IOCTL_I915_QUERY, &query); + * if (err) ... + * + * // We can now access each engine in the array + * for (i = 0; i < info->num_engines; i++) { + * struct drm_i915_engine_info einfo = info->engines[i]; + * u16 class = einfo.engine.class; + * u16 instance = einfo.engine.instance; + * .... + * } + * + * free(info); + * + * Each of the enumerated engines, apart from being defined by its class and + * instance (see `struct i915_engine_class_instance`), also can have flags and + * capabilities defined as documented in i915_drm.h. + * + * For instance video engines which support HEVC encoding will have the + * `I915_VIDEO_CLASS_CAPABILITY_HEVC` capability bit set. + * + * Engine discovery only fully comes to its own when combined with the new way + * of addressing engines when submitting batch buffers using contexts with + * engine maps configured. + */ + +/** * struct drm_i915_engine_info * * Describes one engine and it's capabilities as known to the driver. diff --git a/tools/include/uapi/linux/fs.h b/tools/include/uapi/linux/fs.h index 4c32e97dcdf0..bdf7b404b3e7 100644 --- a/tools/include/uapi/linux/fs.h +++ b/tools/include/uapi/linux/fs.h @@ -184,6 +184,7 @@ struct fsxattr { #define BLKSECDISCARD _IO(0x12,125) #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) +#define BLKGETDISKSEQ _IOR(0x12,128,__u64) /* * A jump here: 130-136 are reserved for zoned block devices * (see uapi/linux/blkzoned.h) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index d1b327036ae4..14168225cecd 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -188,11 +188,22 @@ struct ip_mreq_source { }; struct ip_msfilter { - __be32 imsf_multiaddr; - __be32 imsf_interface; - __u32 imsf_fmode; - __u32 imsf_numsrc; - __be32 imsf_slist[1]; + union { + struct { + __be32 imsf_multiaddr_aux; + __be32 imsf_interface_aux; + __u32 imsf_fmode_aux; + __u32 imsf_numsrc_aux; + __be32 imsf_slist[1]; + }; + struct { + __be32 imsf_multiaddr; + __be32 imsf_interface; + __u32 imsf_fmode; + __u32 imsf_numsrc; + __be32 imsf_slist_flex[]; + }; + }; }; #define IP_MSFILTER_SIZE(numsrc) \ @@ -211,11 +222,22 @@ struct group_source_req { }; struct group_filter { - __u32 gf_interface; /* interface index */ - struct __kernel_sockaddr_storage gf_group; /* multicast address */ - __u32 gf_fmode; /* filter mode */ - __u32 gf_numsrc; /* number of sources */ - struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + union { + struct { + __u32 gf_interface_aux; /* interface index */ + struct __kernel_sockaddr_storage gf_group_aux; /* multicast address */ + __u32 gf_fmode_aux; /* filter mode */ + __u32 gf_numsrc_aux; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist[1]; /* interface index */ + }; + struct { + __u32 gf_interface; /* interface index */ + struct __kernel_sockaddr_storage gf_group; /* multicast address */ + __u32 gf_fmode; /* filter mode */ + __u32 gf_numsrc; /* number of sources */ + struct __kernel_sockaddr_storage gf_slist_flex[]; /* interface index */ + }; + }; }; #define GROUP_FILTER_SIZE(numsrc) \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index d9e4aabcb31a..a067410ebea5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1965,7 +1965,9 @@ struct kvm_stats_header { #define KVM_STATS_TYPE_CUMULATIVE (0x0 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_INSTANT (0x1 << KVM_STATS_TYPE_SHIFT) #define KVM_STATS_TYPE_PEAK (0x2 << KVM_STATS_TYPE_SHIFT) -#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_PEAK +#define KVM_STATS_TYPE_LINEAR_HIST (0x3 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_LOG_HIST (0x4 << KVM_STATS_TYPE_SHIFT) +#define KVM_STATS_TYPE_MAX KVM_STATS_TYPE_LOG_HIST #define KVM_STATS_UNIT_SHIFT 4 #define KVM_STATS_UNIT_MASK (0xF << KVM_STATS_UNIT_SHIFT) @@ -1988,8 +1990,9 @@ struct kvm_stats_header { * @size: The number of data items for this stats. * Every data item is of type __u64. * @offset: The offset of the stats to the start of stat structure in - * struture kvm or kvm_vcpu. - * @unused: Unused field for future usage. Always 0 for now. + * structure kvm or kvm_vcpu. + * @bucket_size: A parameter value used for histogram stats. It is only used + * for linear histogram stats, specifying the size of the bucket; * @name: The name string for the stats. Its size is indicated by the * &kvm_stats_header->name_size. */ @@ -1998,7 +2001,7 @@ struct kvm_stats_desc { __s16 exponent; __u16 size; __u32 offset; - __u32 unused; + __u32 bucket_size; char name[]; }; diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h index dd7a166fdf9c..4d93967f8aea 100644 --- a/tools/include/uapi/linux/mount.h +++ b/tools/include/uapi/linux/mount.h @@ -73,7 +73,8 @@ #define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ #define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ #define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#define MOVE_MOUNT__MASK 0x00000077 +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT__MASK 0x00000177 /* * fsopen() flags. diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 967d9c55323d..43bd7f713c39 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -213,6 +213,7 @@ struct prctl_mm_map { /* Speculation control variants */ # define PR_SPEC_STORE_BYPASS 0 # define PR_SPEC_INDIRECT_BRANCH 1 +# define PR_SPEC_L1D_FLUSH 2 /* Return and control values for PR_SET/GET_SPECULATION_CTRL */ # define PR_SPEC_NOT_AFFECTED 0 # define PR_SPEC_PRCTL (1UL << 0) @@ -234,14 +235,15 @@ struct prctl_mm_map { #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) /* MTE tag check fault modes */ -# define PR_MTE_TCF_SHIFT 1 -# define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) -# define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT) +# define PR_MTE_TCF_NONE 0 +# define PR_MTE_TCF_SYNC (1UL << 1) +# define PR_MTE_TCF_ASYNC (1UL << 2) +# define PR_MTE_TCF_MASK (PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC) /* MTE tag inclusion mask */ # define PR_MTE_TAG_SHIFT 3 # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) +/* Unused; kept only for source compatibility */ +# define PR_MTE_TCF_SHIFT 1 /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 diff --git a/tools/include/uapi/sound/asound.h b/tools/include/uapi/sound/asound.h index d17c061950df..5859ca0a1439 100644 --- a/tools/include/uapi/sound/asound.h +++ b/tools/include/uapi/sound/asound.h @@ -299,6 +299,7 @@ typedef int __bitwise snd_pcm_subformat_t; #define SNDRV_PCM_INFO_HAS_LINK_ABSOLUTE_ATIME 0x02000000 /* report absolute hardware link audio time, not reset on startup */ #define SNDRV_PCM_INFO_HAS_LINK_ESTIMATED_ATIME 0x04000000 /* report estimated link audio time */ #define SNDRV_PCM_INFO_HAS_LINK_SYNCHRONIZED_ATIME 0x08000000 /* report synchronized audio/system time */ +#define SNDRV_PCM_INFO_EXPLICIT_SYNC 0x10000000 /* needs explicit sync of pointers and data */ #define SNDRV_PCM_INFO_DRAIN_TRIGGER 0x40000000 /* internal kernel flag - trigger in drain */ #define SNDRV_PCM_INFO_FIFO_IN_FRAMES 0x80000000 /* internal kernel flag - FIFO size is in frames */ @@ -783,6 +784,7 @@ struct snd_rawmidi_status { #define SNDRV_RAWMIDI_IOCTL_PVERSION _IOR('W', 0x00, int) #define SNDRV_RAWMIDI_IOCTL_INFO _IOR('W', 0x01, struct snd_rawmidi_info) +#define SNDRV_RAWMIDI_IOCTL_USER_PVERSION _IOW('W', 0x02, int) #define SNDRV_RAWMIDI_IOCTL_PARAMS _IOWR('W', 0x10, struct snd_rawmidi_params) #define SNDRV_RAWMIDI_IOCTL_STATUS _IOWR('W', 0x20, struct snd_rawmidi_status) #define SNDRV_RAWMIDI_IOCTL_DROP _IOW('W', 0x30, int) diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat index b0bf56c5f120..5a5bd74f55bd 100755 --- a/tools/kvm/kvm_stat/kvm_stat +++ b/tools/kvm/kvm_stat/kvm_stat @@ -742,7 +742,7 @@ class DebugfsProvider(Provider): The fields are all available KVM debugfs files """ - exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns'] + exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns', 'halt_wait_ns'] fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2] if field not in exempt_list] diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 88d8825fc6f6..e4f83c304ec9 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -6894,7 +6894,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) if (obj->gen_loader) { /* reset FDs */ - btf__set_fd(obj->btf, -1); + if (obj->btf) + btf__set_fd(obj->btf, -1); for (i = 0; i < obj->nr_maps; i++) obj->maps[i].fd = -1; if (!err) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 10911a8cad0f..2df880cefdae 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -1649,11 +1649,17 @@ static bool btf_is_non_static(const struct btf_type *t) static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name, int *out_btf_sec_id, int *out_btf_id) { - int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0; + int i, j, n, m, btf_id = 0; const struct btf_type *t; const struct btf_var_secinfo *vi; const char *name; + if (!obj->btf) { + pr_warn("failed to find BTF info for object '%s'\n", obj->filename); + return -EINVAL; + } + + n = btf__get_nr_types(obj->btf); for (i = 1; i <= n; i++) { t = btf__type_by_id(obj->btf, i); diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c index 1fb8b49de1d6..ea655318153f 100644 --- a/tools/lib/bpf/strset.c +++ b/tools/lib/bpf/strset.c @@ -88,6 +88,7 @@ void strset__free(struct strset *set) hashmap__free(set->strs_hash); free(set->strs_data); + free(set); } size_t strset__data_size(const struct strset *set) diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index d8886720e83d..8441e3e1aaac 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,7 +43,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -#define FD(e, x, y) (*(int *) xyarray__entry(e->fd, x, y)) +#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) #define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -54,7 +54,10 @@ int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) int cpu, thread; for (cpu = 0; cpu < ncpus; cpu++) { for (thread = 0; thread < nthreads; thread++) { - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd) + *fd = -1; } } } @@ -80,7 +83,7 @@ sys_perf_event_open(struct perf_event_attr *attr, static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) { struct perf_evsel *leader = evsel->leader; - int fd; + int *fd; if (evsel == leader) { *group_fd = -1; @@ -95,10 +98,10 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou return -ENOTCONN; fd = FD(leader, cpu, thread); - if (fd == -1) + if (fd == NULL || *fd == -1) return -EBADF; - *group_fd = fd; + *group_fd = *fd; return 0; } @@ -138,7 +141,11 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, for (cpu = 0; cpu < cpus->nr; cpu++) { for (thread = 0; thread < threads->nr; thread++) { - int fd, group_fd; + int fd, group_fd, *evsel_fd; + + evsel_fd = FD(evsel, cpu, thread); + if (evsel_fd == NULL) + return -EINVAL; err = get_group_fd(evsel, cpu, thread, &group_fd); if (err < 0) @@ -151,7 +158,7 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus, if (fd < 0) return -errno; - FD(evsel, cpu, thread) = fd; + *evsel_fd = fd; } } @@ -163,9 +170,12 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { - if (FD(evsel, cpu, thread) >= 0) - close(FD(evsel, cpu, thread)); - FD(evsel, cpu, thread) = -1; + int *fd = FD(evsel, cpu, thread); + + if (fd && *fd >= 0) { + close(*fd); + *fd = -1; + } } } @@ -209,13 +219,12 @@ void perf_evsel__munmap(struct perf_evsel *evsel) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); - if (fd < 0) + if (fd == NULL || *fd < 0) continue; - perf_mmap__munmap(map); + perf_mmap__munmap(MMAP(evsel, cpu, thread)); } } @@ -239,15 +248,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread); - struct perf_mmap *map = MMAP(evsel, cpu, thread); + int *fd = FD(evsel, cpu, thread); + struct perf_mmap *map; - if (fd < 0) + if (fd == NULL || *fd < 0) continue; + map = MMAP(evsel, cpu, thread); perf_mmap__init(map, NULL, false, NULL); - ret = perf_mmap__mmap(map, &mp, fd, cpu); + ret = perf_mmap__mmap(map, &mp, *fd, cpu); if (ret) { perf_evsel__munmap(evsel); return ret; @@ -260,7 +270,9 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages) void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) { - if (FD(evsel, cpu, thread) < 0 || MMAP(evsel, cpu, thread) == NULL) + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) return NULL; return MMAP(evsel, cpu, thread)->base; @@ -295,17 +307,18 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { size_t size = perf_evsel__read_size(evsel); + int *fd = FD(evsel, cpu, thread); memset(count, 0, sizeof(*count)); - if (FD(evsel, cpu, thread) < 0) + if (fd == NULL || *fd < 0) return -EINVAL; if (MMAP(evsel, cpu, thread) && !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) return 0; - if (readn(FD(evsel, cpu, thread), count->values, size) <= 0) + if (readn(*fd, count->values, size) <= 0) return -errno; return 0; @@ -318,8 +331,13 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel, int thread; for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { - int fd = FD(evsel, cpu, thread), - err = ioctl(fd, ioc, arg); + int err; + int *fd = FD(evsel, cpu, thread); + + if (fd == NULL || *fd < 0) + return -1; + + err = ioctl(*fd, ioc, arg); if (err) return err; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index c67c83399170..ce91a582f0e4 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -40,7 +40,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -70,10 +70,10 @@ static int test_stat_cpu(void) perf_evlist__for_each_evsel(evlist, evsel) { cpus = perf_evsel__cpus(evsel); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } } diff --git a/tools/lib/perf/tests/test-evsel.c b/tools/lib/perf/tests/test-evsel.c index a184e4861627..33ae9334861a 100644 --- a/tools/lib/perf/tests/test-evsel.c +++ b/tools/lib/perf/tests/test-evsel.c @@ -22,7 +22,7 @@ static int test_stat_cpu(void) .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_CPU_CLOCK, }; - int err, cpu, tmp; + int err, idx; cpus = perf_cpu_map__new(NULL); __T("failed to create cpus", cpus); @@ -33,10 +33,10 @@ static int test_stat_cpu(void) err = perf_evsel__open(evsel, cpus, NULL); __T("failed to open evsel", err == 0); - perf_cpu_map__for_each_cpu(cpu, tmp, cpus) { + for (idx = 0; idx < perf_cpu_map__nr(cpus); idx++) { struct perf_counts_values counts = { .val = 0 }; - perf_evsel__read(evsel, cpu, 0, &counts); + perf_evsel__read(evsel, idx, 0, &counts); __T("failed to read value for evsel", counts.val != 0); } @@ -148,6 +148,7 @@ static int test_stat_user_read(int event) __T("failed to mmap evsel", err == 0); pc = perf_evsel__mmap_base(evsel, 0, 0); + __T("failed to get mmapped address", pc); #if defined(__i386__) || defined(__x86_64__) __T("userspace counter access not supported", pc->cap_user_rdpmc); diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index bc821056aba9..0893436cc09f 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -684,7 +684,7 @@ static int elf_add_alternative(struct elf *elf, sec = find_section_by_name(elf, ".altinstructions"); if (!sec) { sec = elf_create_section(elf, ".altinstructions", - SHF_ALLOC, size, 0); + SHF_ALLOC, 0, 0); if (!sec) { WARN_ELF("elf_create_section"); diff --git a/tools/objtool/check.c b/tools/objtool/check.c index e5947fbb9e7a..06b5c164ae93 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -292,7 +292,7 @@ static int decode_instructions(struct objtool_file *file) !strcmp(sec->name, ".entry.text")) sec->noinstr = true; - for (offset = 0; offset < sec->len; offset += insn->len) { + for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) { insn = malloc(sizeof(*insn)); if (!insn) { WARN("malloc failed"); @@ -307,7 +307,7 @@ static int decode_instructions(struct objtool_file *file) insn->offset = offset; ret = arch_decode_instruction(file->elf, sec, offset, - sec->len - offset, + sec->sh.sh_size - offset, &insn->len, &insn->type, &insn->immediate, &insn->stack_ops); @@ -349,9 +349,9 @@ static struct instruction *find_last_insn(struct objtool_file *file, { struct instruction *insn = NULL; unsigned int offset; - unsigned int end = (sec->len > 10) ? sec->len - 10 : 0; + unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0; - for (offset = sec->len - 1; offset >= end && !insn; offset--) + for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--) insn = find_insn(file, sec, offset); return insn; @@ -389,7 +389,7 @@ static int add_dead_ends(struct objtool_file *file) insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find unreachable insn at %s+0x%x", @@ -424,7 +424,7 @@ reachable: insn = find_insn(file, reloc->sym->sec, reloc->addend); if (insn) insn = list_prev_entry(insn, list); - else if (reloc->addend == reloc->sym->sec->len) { + else if (reloc->addend == reloc->sym->sec->sh.sh_size) { insn = find_last_insn(file, reloc->sym->sec); if (!insn) { WARN("can't find reachable insn at %s+0x%x", @@ -1561,14 +1561,14 @@ static int read_unwind_hints(struct objtool_file *file) return -1; } - if (sec->len % sizeof(struct unwind_hint)) { + if (sec->sh.sh_size % sizeof(struct unwind_hint)) { WARN("struct unwind_hint size mismatch"); return -1; } file->hints = true; - for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) { + for (i = 0; i < sec->sh.sh_size / sizeof(struct unwind_hint); i++) { hint = (struct unwind_hint *)sec->data->d_buf + i; reloc = find_reloc_by_dest(file->elf, sec, i * sizeof(*hint)); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 8676c7598728..fee03b744a6e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -286,10 +286,9 @@ static int read_sections(struct elf *elf) return -1; } } - sec->len = sec->sh.sh_size; if (sec->sh.sh_flags & SHF_EXECINSTR) - elf->text_size += sec->len; + elf->text_size += sec->sh.sh_size; list_add_tail(&sec->list, &elf->sections); elf_hash_add(section, &sec->hash, sec->idx); @@ -509,6 +508,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset, list_add_tail(&reloc->list, &sec->reloc->reloc_list); elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc)); + sec->reloc->sh.sh_size += sec->reloc->sh.sh_entsize; sec->reloc->changed = true; return 0; @@ -734,8 +734,8 @@ static int elf_add_string(struct elf *elf, struct section *strtab, char *str) data->d_size = strlen(str) + 1; data->d_align = 1; - len = strtab->len; - strtab->len += data->d_size; + len = strtab->sh.sh_size; + strtab->sh.sh_size += data->d_size; strtab->changed = true; return len; @@ -790,9 +790,9 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_align = 1; data->d_type = ELF_T_SYM; - sym->idx = symtab->len / sizeof(sym->sym); + sym->idx = symtab->sh.sh_size / sizeof(sym->sym); - symtab->len += data->d_size; + symtab->sh.sh_size += data->d_size; symtab->changed = true; symtab_shndx = find_section_by_name(elf, ".symtab_shndx"); @@ -814,7 +814,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name) data->d_align = 4; data->d_type = ELF_T_WORD; - symtab_shndx->len += 4; + symtab_shndx->sh.sh_size += 4; symtab_shndx->changed = true; } @@ -855,7 +855,6 @@ struct section *elf_create_section(struct elf *elf, const char *name, } sec->idx = elf_ndxscn(s); - sec->len = size; sec->changed = true; sec->data = elf_newdata(s); @@ -979,63 +978,63 @@ static struct section *elf_create_reloc_section(struct elf *elf, } } -static int elf_rebuild_rel_reloc_section(struct section *sec, int nr) +static int elf_rebuild_rel_reloc_section(struct section *sec) { struct reloc *reloc; - int idx = 0, size; + int idx = 0; void *buf; /* Allocate a buffer for relocations */ - size = nr * sizeof(GElf_Rel); - buf = malloc(size); + buf = malloc(sec->sh.sh_size); if (!buf) { perror("malloc"); return -1; } sec->data->d_buf = buf; - sec->data->d_size = size; + sec->data->d_size = sec->sh.sh_size; sec->data->d_type = ELF_T_REL; - sec->sh.sh_size = size; - idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { reloc->rel.r_offset = reloc->offset; reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); - gelf_update_rel(sec->data, idx, &reloc->rel); + if (!gelf_update_rel(sec->data, idx, &reloc->rel)) { + WARN_ELF("gelf_update_rel"); + return -1; + } idx++; } return 0; } -static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) +static int elf_rebuild_rela_reloc_section(struct section *sec) { struct reloc *reloc; - int idx = 0, size; + int idx = 0; void *buf; /* Allocate a buffer for relocations with addends */ - size = nr * sizeof(GElf_Rela); - buf = malloc(size); + buf = malloc(sec->sh.sh_size); if (!buf) { perror("malloc"); return -1; } sec->data->d_buf = buf; - sec->data->d_size = size; + sec->data->d_size = sec->sh.sh_size; sec->data->d_type = ELF_T_RELA; - sec->sh.sh_size = size; - idx = 0; list_for_each_entry(reloc, &sec->reloc_list, list) { reloc->rela.r_offset = reloc->offset; reloc->rela.r_addend = reloc->addend; reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type); - gelf_update_rela(sec->data, idx, &reloc->rela); + if (!gelf_update_rela(sec->data, idx, &reloc->rela)) { + WARN_ELF("gelf_update_rela"); + return -1; + } idx++; } @@ -1044,16 +1043,9 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr) static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec) { - struct reloc *reloc; - int nr; - - nr = 0; - list_for_each_entry(reloc, &sec->reloc_list, list) - nr++; - switch (sec->sh.sh_type) { - case SHT_REL: return elf_rebuild_rel_reloc_section(sec, nr); - case SHT_RELA: return elf_rebuild_rela_reloc_section(sec, nr); + case SHT_REL: return elf_rebuild_rel_reloc_section(sec); + case SHT_RELA: return elf_rebuild_rela_reloc_section(sec); default: return -1; } } @@ -1113,12 +1105,6 @@ int elf_write(struct elf *elf) /* Update changed relocation sections and section headers: */ list_for_each_entry(sec, &elf->sections, list) { if (sec->changed) { - if (sec->base && - elf_rebuild_reloc_section(elf, sec)) { - WARN("elf_rebuild_reloc_section"); - return -1; - } - s = elf_getscn(elf->elf, sec->idx); if (!s) { WARN_ELF("elf_getscn"); @@ -1129,6 +1115,12 @@ int elf_write(struct elf *elf) return -1; } + if (sec->base && + elf_rebuild_reloc_section(elf, sec)) { + WARN("elf_rebuild_reloc_section"); + return -1; + } + sec->changed = false; elf->changed = true; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index e34395047530..075d8291b854 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -38,7 +38,6 @@ struct section { Elf_Data *data; char *name; int idx; - unsigned int len; bool changed, text, rodata, noinstr; }; diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c index dc9b7dd314b0..b5865e2450cb 100644 --- a/tools/objtool/orc_gen.c +++ b/tools/objtool/orc_gen.c @@ -204,7 +204,7 @@ int orc_create(struct objtool_file *file) /* Add a section terminator */ if (!empty) { - orc_list_add(&orc_list, &null, sec, sec->len); + orc_list_add(&orc_list, &null, sec, sec->sh.sh_size); nr++; } } diff --git a/tools/objtool/special.c b/tools/objtool/special.c index bc925cf19e2d..06c3eacab3d5 100644 --- a/tools/objtool/special.c +++ b/tools/objtool/special.c @@ -58,6 +58,13 @@ void __weak arch_handle_alternative(unsigned short feature, struct special_alt * { } +static void reloc_to_sec_off(struct reloc *reloc, struct section **sec, + unsigned long *off) +{ + *sec = reloc->sym->sec; + *off = reloc->sym->offset + reloc->addend; +} + static int get_alt_entry(struct elf *elf, struct special_entry *entry, struct section *sec, int idx, struct special_alt *alt) @@ -91,14 +98,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, WARN_FUNC("can't find orig reloc", sec, offset + entry->orig); return -1; } - if (orig_reloc->sym->type != STT_SECTION) { - WARN_FUNC("don't know how to handle non-section reloc symbol %s", - sec, offset + entry->orig, orig_reloc->sym->name); - return -1; - } - alt->orig_sec = orig_reloc->sym->sec; - alt->orig_off = orig_reloc->addend; + reloc_to_sec_off(orig_reloc, &alt->orig_sec, &alt->orig_off); if (!entry->group || alt->new_len) { new_reloc = find_reloc_by_dest(elf, sec, offset + entry->new); @@ -116,8 +117,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry, if (arch_is_retpoline(new_reloc->sym)) return 1; - alt->new_sec = new_reloc->sym->sec; - alt->new_off = (unsigned int)new_reloc->addend; + reloc_to_sec_off(new_reloc, &alt->new_sec, &alt->new_off); /* _ASM_EXTABLE_EX hack */ if (alt->new_off >= 0x7ffffff0) @@ -159,13 +159,13 @@ int special_get_alts(struct elf *elf, struct list_head *alts) if (!sec) continue; - if (sec->len % entry->size != 0) { + if (sec->sh.sh_size % entry->size != 0) { WARN("%s size not a multiple of %d", sec->name, entry->size); return -1; } - nr_entries = sec->len / entry->size; + nr_entries = sec->sh.sh_size / entry->size; for (idx = 0; idx < nr_entries; idx++) { alt = malloc(sizeof(*alt)); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index e555e9729758..8e0163b7ef01 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,3 +39,4 @@ pmu-events/jevents feature/ fixdep libtraceevent-dynamic-list +Documentation/doc.dep diff --git a/tools/perf/Documentation/jitdump-specification.txt b/tools/perf/Documentation/jitdump-specification.txt index 52152d156ad9..79936355d819 100644 --- a/tools/perf/Documentation/jitdump-specification.txt +++ b/tools/perf/Documentation/jitdump-specification.txt @@ -164,7 +164,7 @@ const char unwinding_data[n]: an array of unwinding data, consisting of the EH F The EH Frame header follows the Linux Standard Base (LSB) specification as described in the document at https://refspecs.linuxfoundation.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html -The EH Frame follows the LSB specicfication as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html +The EH Frame follows the LSB specification as described in the document at https://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/ehframechpt.html NOTE: The mapped_size is generally either the same as unwind_data_size (if the unwinding data was mapped in memory by the running process) or zero (if the unwinding data is not mapped by the process). If the unwinding data was not mapped, then only the EH Frame Header will be read, which can be used to specify FP based unwinding for a function which does not have unwinding information. diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index de6beedb7283..3b6a2c84ea02 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -261,7 +261,7 @@ COALESCE User can specify how to sort offsets for cacheline. Following fields are available and governs the final -output fields set for caheline offsets output: +output fields set for cacheline offsets output: tid - coalesced by process TIDs pid - coalesced by process PIDs diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 184ba62420f0..db465fa7ee91 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -883,7 +883,7 @@ and "r" can be combined to get calls and returns. "Transactions" events correspond to the start or end of transactions. The 'flags' field can be used in perf script to determine whether the event is a -tranasaction start, commit or abort. +transaction start, commit or abort. Note that "instructions", "branches" and "transactions" events depend on code flow packets which can be disabled by using the config term "branch=0". Refer diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 74d774592196..1b4d452923d7 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -44,7 +44,7 @@ COMMON OPTIONS -f:: --force:: - Don't complan, do it. + Don't complain, do it. REPORT OPTIONS -------------- diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt index 5a1f68122f50..fa4f39d305a7 100644 --- a/tools/perf/Documentation/perf-script-perl.txt +++ b/tools/perf/Documentation/perf-script-perl.txt @@ -54,7 +54,7 @@ all sched_wakeup events in the system: Traces meant to be processed using a script should be recorded with the above option: -a to enable system-wide collection. -The format file for the sched_wakep event defines the following fields +The format file for the sched_wakeup event defines the following fields (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): ---- diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt index 0250dc61cf98..cf4b7f4b625a 100644 --- a/tools/perf/Documentation/perf-script-python.txt +++ b/tools/perf/Documentation/perf-script-python.txt @@ -448,7 +448,7 @@ all sched_wakeup events in the system: Traces meant to be processed using a script should be recorded with the above option: -a to enable system-wide collection. -The format file for the sched_wakep event defines the following fields +The format file for the sched_wakeup event defines the following fields (see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format): ---- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 4c9310be6acc..7e6fb7cbc0f4 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -385,7 +385,7 @@ Aggregate counts per physical processor for system-wide mode measurements. Print metrics or metricgroups specified in a comma separated list. For a group all metrics from the group are added. The events from the metrics are automatically measured. -See perf list output for the possble metrics and metricgroups. +See perf list output for the possible metrics and metricgroups. -A:: --no-aggr:: diff --git a/tools/perf/Documentation/topdown.txt b/tools/perf/Documentation/topdown.txt index c6302df4cf29..a15b93fdcf50 100644 --- a/tools/perf/Documentation/topdown.txt +++ b/tools/perf/Documentation/topdown.txt @@ -2,7 +2,7 @@ Using TopDown metrics in user space ----------------------------------- Intel CPUs (since Sandy Bridge and Silvermont) support a TopDown -methology to break down CPU pipeline execution into 4 bottlenecks: +methodology to break down CPU pipeline execution into 4 bottlenecks: frontend bound, backend bound, bad speculation, retiring. For more details on Topdown see [1][5] diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index b66cf128cbc7..14e3e8d702a0 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -143,7 +143,7 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto ifdef CSINCLUDES LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) endif -OPENCSDLIBS := -lopencsd_c_api -lopencsd +OPENCSDLIBS := -lopencsd_c_api -lopencsd -lstdc++ ifdef CSLIBS LIBOPENCSD_LDFLAGS := -L$(CSLIBS) endif @@ -827,33 +827,36 @@ else endif endif -ifeq ($(feature-libbfd), 1) - EXTLIBS += -lbfd -lopcodes -else - # we are on a system that requires -liberty and (maybe) -lz - # to link against -lbfd; test each case individually here - - # call all detections now so we get correct - # status in VF output - $(call feature_check,libbfd-liberty) - $(call feature_check,libbfd-liberty-z) - ifeq ($(feature-libbfd-liberty), 1) - EXTLIBS += -lbfd -lopcodes -liberty - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl +ifndef NO_LIBBFD + ifeq ($(feature-libbfd), 1) + EXTLIBS += -lbfd -lopcodes else - ifeq ($(feature-libbfd-liberty-z), 1) - EXTLIBS += -lbfd -lopcodes -liberty -lz - FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl + # we are on a system that requires -liberty and (maybe) -lz + # to link against -lbfd; test each case individually here + + # call all detections now so we get correct + # status in VF output + $(call feature_check,libbfd-liberty) + $(call feature_check,libbfd-liberty-z) + + ifeq ($(feature-libbfd-liberty), 1) + EXTLIBS += -lbfd -lopcodes -liberty + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -ldl + else + ifeq ($(feature-libbfd-liberty-z), 1) + EXTLIBS += -lbfd -lopcodes -liberty -lz + FEATURE_CHECK_LDFLAGS-disassembler-four-args += -liberty -lz -ldl + endif endif + $(call feature_check,disassembler-four-args) endif - $(call feature_check,disassembler-four-args) -endif -ifeq ($(feature-libbfd-buildid), 1) - CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT -else - msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); + ifeq ($(feature-libbfd-buildid), 1) + CFLAGS += -DHAVE_LIBBFD_BUILDID_SUPPORT + else + msg := $(warning Old version of libbfd/binutils things like PE executable profiling will not be available); + endif endif ifdef NO_DEMANGLE diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index e04313c4d840..b856afa6eb52 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -787,6 +787,8 @@ $(OUTPUT)dlfilters/%.o: dlfilters/%.c include/perf/perf_dlfilter.h $(Q)$(MKDIR) -p $(OUTPUT)dlfilters $(QUIET_CC)$(CC) -c -Iinclude $(EXTRA_CFLAGS) -o $@ -fpic $< +.SECONDARY: $(DLFILTERS:.so=.o) + $(OUTPUT)dlfilters/%.so: $(OUTPUT)dlfilters/%.o $(QUIET_LINK)$(CC) $(EXTRA_CFLAGS) -shared -o $@ $< @@ -802,7 +804,7 @@ endif $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(LDFLAGS)' +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' $(LIBTRACEEVENT): FORCE $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index c7c7ec0812d5..5fc6a2a3dbc5 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -8,10 +8,10 @@ #include <linux/coresight-pmu.h> #include <linux/zalloc.h> -#include "../../util/auxtrace.h" -#include "../../util/debug.h" -#include "../../util/evlist.h" -#include "../../util/pmu.h" +#include "../../../util/auxtrace.h" +#include "../../../util/debug.h" +#include "../../../util/evlist.h" +#include "../../../util/pmu.h" #include "cs-etm.h" #include "arm-spe.h" diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c index 515aae470e23..293a23bf8be3 100644 --- a/tools/perf/arch/arm/util/cs-etm.c +++ b/tools/perf/arch/arm/util/cs-etm.c @@ -16,19 +16,19 @@ #include <linux/zalloc.h> #include "cs-etm.h" -#include "../../util/debug.h" -#include "../../util/record.h" -#include "../../util/auxtrace.h" -#include "../../util/cpumap.h" -#include "../../util/event.h" -#include "../../util/evlist.h" -#include "../../util/evsel.h" -#include "../../util/perf_api_probe.h" -#include "../../util/evsel_config.h" -#include "../../util/pmu.h" -#include "../../util/cs-etm.h" +#include "../../../util/debug.h" +#include "../../../util/record.h" +#include "../../../util/auxtrace.h" +#include "../../../util/cpumap.h" +#include "../../../util/event.h" +#include "../../../util/evlist.h" +#include "../../../util/evsel.h" +#include "../../../util/perf_api_probe.h" +#include "../../../util/evsel_config.h" +#include "../../../util/pmu.h" +#include "../../../util/cs-etm.h" #include <internal/lib.h> // page_size -#include "../../util/session.h" +#include "../../../util/session.h" #include <errno.h> #include <stdlib.h> diff --git a/tools/perf/arch/arm/util/perf_regs.c b/tools/perf/arch/arm/util/perf_regs.c index 2864e2e3776d..2833e101a7c6 100644 --- a/tools/perf/arch/arm/util/perf_regs.c +++ b/tools/perf/arch/arm/util/perf_regs.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -#include "../../util/perf_regs.h" +#include "../../../util/perf_regs.h" const struct sample_reg sample_reg_masks[] = { SMPL_REG_END diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index bbc297a7e2e3..b8b23b9dc598 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -10,7 +10,7 @@ #include <linux/string.h> #include "arm-spe.h" -#include "../../util/pmu.h" +#include "../../../util/pmu.h" struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index 36ba4c69c3c5..b7692cb0c733 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include <elfutils/libdwfl.h> -#include "../../util/unwind-libdw.h" -#include "../../util/perf_regs.h" -#include "../../util/event.h" +#include "../../../util/unwind-libdw.h" +#include "../../../util/perf_regs.h" +#include "../../../util/event.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c index 3a550225dfaf..438906bf0014 100644 --- a/tools/perf/arch/arm/util/unwind-libunwind.c +++ b/tools/perf/arch/arm/util/unwind-libunwind.c @@ -3,8 +3,8 @@ #include <errno.h> #include <libunwind.h> #include "perf_regs.h" -#include "../../util/unwind.h" -#include "../../util/debug.h" +#include "../../../util/unwind.h" +#include "../../../util/debug.h" int libunwind__arch_reg_id(int regnum) { diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index ac653d08b1ea..1ca7bc337932 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -361,3 +361,5 @@ 444 n64 landlock_create_ruleset sys_landlock_create_ruleset 445 n64 landlock_add_rule sys_landlock_add_rule 446 n64 landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 n64 process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 6f3953f2a0d5..7bef917cc84e 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -330,10 +330,10 @@ 256 64 sys_debug_setcontext sys_ni_syscall 256 spu sys_debug_setcontext sys_ni_syscall # 257 reserved for vserver -258 nospu migrate_pages sys_migrate_pages compat_sys_migrate_pages -259 nospu mbind sys_mbind compat_sys_mbind -260 nospu get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -261 nospu set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +258 nospu migrate_pages sys_migrate_pages +259 nospu mbind sys_mbind +260 nospu get_mempolicy sys_get_mempolicy +261 nospu set_mempolicy sys_set_mempolicy 262 nospu mq_open sys_mq_open compat_sys_mq_open 263 nospu mq_unlink sys_mq_unlink 264 32 mq_timedsend sys_mq_timedsend_time32 @@ -381,7 +381,7 @@ 298 common faccessat sys_faccessat 299 common get_robust_list sys_get_robust_list compat_sys_get_robust_list 300 common set_robust_list sys_set_robust_list compat_sys_set_robust_list -301 common move_pages sys_move_pages compat_sys_move_pages +301 common move_pages sys_move_pages 302 common getcpu sys_getcpu 303 nospu epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 304 32 utimensat sys_utimensat_time32 @@ -526,3 +526,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/powerpc/util/skip-callchain-idx.c b/tools/perf/arch/powerpc/util/skip-callchain-idx.c index 3018a054526a..20cd6244863b 100644 --- a/tools/perf/arch/powerpc/util/skip-callchain-idx.c +++ b/tools/perf/arch/powerpc/util/skip-callchain-idx.c @@ -45,7 +45,7 @@ static const Dwfl_Callbacks offline_callbacks = { */ static int check_return_reg(int ra_regno, Dwarf_Frame *frame) { - Dwarf_Op ops_mem[2]; + Dwarf_Op ops_mem[3]; Dwarf_Op dummy; Dwarf_Op *ops = &dummy; size_t nops; diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index 8d619ec86dcc..df5261e5cfe1 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -122,7 +122,7 @@ 131 common quotactl sys_quotactl sys_quotactl 132 common getpgid sys_getpgid sys_getpgid 133 common fchdir sys_fchdir sys_fchdir -134 common bdflush - - +134 common bdflush sys_ni_syscall sys_ni_syscall 135 common sysfs sys_sysfs sys_sysfs 136 common personality sys_s390_personality sys_s390_personality 137 common afs_syscall - - @@ -274,9 +274,9 @@ 265 common statfs64 sys_statfs64 compat_sys_statfs64 266 common fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 267 common remap_file_pages sys_remap_file_pages sys_remap_file_pages -268 common mbind sys_mbind compat_sys_mbind -269 common get_mempolicy sys_get_mempolicy compat_sys_get_mempolicy -270 common set_mempolicy sys_set_mempolicy compat_sys_set_mempolicy +268 common mbind sys_mbind sys_mbind +269 common get_mempolicy sys_get_mempolicy sys_get_mempolicy +270 common set_mempolicy sys_set_mempolicy sys_set_mempolicy 271 common mq_open sys_mq_open compat_sys_mq_open 272 common mq_unlink sys_mq_unlink sys_mq_unlink 273 common mq_timedsend sys_mq_timedsend sys_mq_timedsend_time32 @@ -293,7 +293,7 @@ 284 common inotify_init sys_inotify_init sys_inotify_init 285 common inotify_add_watch sys_inotify_add_watch sys_inotify_add_watch 286 common inotify_rm_watch sys_inotify_rm_watch sys_inotify_rm_watch -287 common migrate_pages sys_migrate_pages compat_sys_migrate_pages +287 common migrate_pages sys_migrate_pages sys_migrate_pages 288 common openat sys_openat compat_sys_openat 289 common mkdirat sys_mkdirat sys_mkdirat 290 common mknodat sys_mknodat sys_mknodat @@ -317,7 +317,7 @@ 307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range 308 common tee sys_tee sys_tee 309 common vmsplice sys_vmsplice sys_vmsplice -310 common move_pages sys_move_pages compat_sys_move_pages +310 common move_pages sys_move_pages sys_move_pages 311 common getcpu sys_getcpu sys_getcpu 312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait 313 common utimes sys_utimes sys_utimes_time32 @@ -449,3 +449,5 @@ 444 common landlock_create_ruleset sys_landlock_create_ruleset sys_landlock_create_ruleset 445 common landlock_add_rule sys_landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease sys_process_mrelease diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index f6b57799c1ea..18b5500ea8bf 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -369,6 +369,7 @@ 445 common landlock_add_rule sys_landlock_add_rule 446 common landlock_restrict_self sys_landlock_restrict_self 447 common memfd_secret sys_memfd_secret +448 common process_mrelease sys_process_mrelease # # Due to a historical design error, certain syscalls are numbered differently @@ -397,7 +398,7 @@ 530 x32 set_robust_list compat_sys_set_robust_list 531 x32 get_robust_list compat_sys_get_robust_list 532 x32 vmsplice sys_vmsplice -533 x32 move_pages compat_sys_move_pages +533 x32 move_pages sys_move_pages 534 x32 preadv compat_sys_preadv64 535 x32 pwritev compat_sys_pwritev64 536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c index eeafe97b8105..792cd75ade33 100644 --- a/tools/perf/arch/x86/util/iostat.c +++ b/tools/perf/arch/x86/util/iostat.c @@ -432,7 +432,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel, u8 die = ((struct iio_root_port *)evsel->priv)->die; struct perf_counts_values *count = perf_counts(evsel->counts, die, 0); - if (count->run && count->ena) { + if (count && count->run && count->ena) { if (evsel->prev_raw_counts && !out->force_header) { struct perf_counts_values *prev_count = perf_counts(evsel->prev_raw_counts, die, 0); diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 0e824f7d8b19..c32c2eb16d7d 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -368,16 +368,6 @@ static inline int output_type(unsigned int type) return OUTPUT_TYPE_OTHER; } -static inline unsigned int attr_type(unsigned int type) -{ - switch (type) { - case OUTPUT_TYPE_SYNTH: - return PERF_TYPE_SYNTH; - default: - return type; - } -} - static bool output_set_by_user(void) { int j; @@ -469,7 +459,7 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session) return -EINVAL; if (PRINT_FIELD(WEIGHT) && - evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT)) + evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_TYPE, "WEIGHT", PERF_OUTPUT_WEIGHT)) return -EINVAL; if (PRINT_FIELD(SYM) && @@ -556,6 +546,18 @@ static void set_print_ip_opts(struct perf_event_attr *attr) output[type].print_ip_opts |= EVSEL__PRINT_SRCLINE; } +static struct evsel *find_first_output_type(struct evlist *evlist, + unsigned int type) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (output_type(evsel->core.attr.type) == (int)type) + return evsel; + } + return NULL; +} + /* * verify all user requested events exist and the samples * have the expected data @@ -567,7 +569,7 @@ static int perf_session__check_output_opt(struct perf_session *session) struct evsel *evsel; for (j = 0; j < OUTPUT_TYPE_MAX; ++j) { - evsel = perf_session__find_first_evtype(session, attr_type(j)); + evsel = find_first_output_type(session->evlist, j); /* * even if fields is set to 0 (ie., show nothing) event must @@ -4037,11 +4039,15 @@ script_found: goto out_delete; uname(&uts); - if (data.is_pipe || /* assume pipe_mode indicates native_arch */ - !strcmp(uts.machine, session->header.env.arch) || - (!strcmp(uts.machine, "x86_64") && - !strcmp(session->header.env.arch, "i386"))) + if (data.is_pipe) { /* Assume pipe_mode indicates native_arch */ native_arch = true; + } else if (session->header.env.arch) { + if (!strcmp(uts.machine, session->header.env.arch)) + native_arch = true; + else if (!strcmp(uts.machine, "x86_64") && + !strcmp(session->header.env.arch, "i386")) + native_arch = true; + } script.session = session; script__setup_sample_type(&script); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f6e87b7be5fa..f0ecfda34ece 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -2408,6 +2408,8 @@ int cmd_stat(int argc, const char **argv) goto out; } else if (verbose) iostat_list(evsel_list, &stat_config); + if (iostat_mode == IOSTAT_RUN && !target__has_cpu(&target)) + target.system_wide = true; } if (add_default_attributes()) diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index c783558332b8..f1e46277e822 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -144,6 +144,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' +check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' diff --git a/tools/perf/pmu-events/arch/powerpc/power8/other.json b/tools/perf/pmu-events/arch/powerpc/power8/other.json index 84a0cedf1fd9..f1f2965f6775 100644 --- a/tools/perf/pmu-events/arch/powerpc/power8/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power8/other.json @@ -1046,7 +1046,7 @@ { "EventCode": "0x4e010", "EventName": "PM_GCT_NOSLOT_IC_L3MISS", - "BriefDescription": "Gct empty for this thread due to icach l3 miss", + "BriefDescription": "Gct empty for this thread due to icache l3 miss", "PublicDescription": "" }, { diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 6731b3cf0c2f..7c887d37b893 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -1285,6 +1285,7 @@ int main(int argc, char *argv[]) } free_arch_std_events(); + free_sys_event_tables(); free(mapfile); return 0; @@ -1306,6 +1307,7 @@ err_close_eventsfp: create_empty_mapping(output_file); err_out: free_arch_std_events(); + free_sys_event_tables(); free(mapfile); return ret; } diff --git a/tools/perf/scripts/python/bin/stackcollapse-report b/tools/perf/scripts/python/bin/stackcollapse-report index 356b9656393d..21a356bd27f6 100755 --- a/tools/perf/scripts/python/bin/stackcollapse-report +++ b/tools/perf/scripts/python/bin/stackcollapse-report @@ -1,3 +1,3 @@ #!/bin/sh # description: produce callgraphs in short form for scripting use -perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@" +perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py "$@" diff --git a/tools/perf/tests/attr/test-stat-default b/tools/perf/tests/attr/test-stat-default index d9e99b3f77e6..d8ea6a88163f 100644 --- a/tools/perf/tests/attr/test-stat-default +++ b/tools/perf/tests/attr/test-stat-default @@ -68,3 +68,100 @@ fd=10 type=0 config=5 optional=1 + +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-1 b/tools/perf/tests/attr/test-stat-detailed-1 index 8b04a055d154..b656ab93c5bf 100644 --- a/tools/perf/tests/attr/test-stat-detailed-1 +++ b/tools/perf/tests/attr/test-stat-detailed-1 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-2 b/tools/perf/tests/attr/test-stat-detailed-2 index 4fca9f1bfbf8..97625090a1c4 100644 --- a/tools/perf/tests/attr/test-stat-detailed-2 +++ b/tools/perf/tests/attr/test-stat-detailed-2 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 @@ -114,8 +211,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event15:base-stat] -fd=15 +[event24:base-stat] +fd=24 type=3 config=1 optional=1 @@ -124,8 +221,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event16:base-stat] -fd=16 +[event25:base-stat] +fd=25 type=3 config=65537 optional=1 @@ -134,8 +231,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event17:base-stat] -fd=17 +[event26:base-stat] +fd=26 type=3 config=3 optional=1 @@ -144,8 +241,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event18:base-stat] -fd=18 +[event27:base-stat] +fd=27 type=3 config=65539 optional=1 @@ -154,8 +251,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event19:base-stat] -fd=19 +[event28:base-stat] +fd=28 type=3 config=4 optional=1 @@ -164,8 +261,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event20:base-stat] -fd=20 +[event29:base-stat] +fd=29 type=3 config=65540 optional=1 diff --git a/tools/perf/tests/attr/test-stat-detailed-3 b/tools/perf/tests/attr/test-stat-detailed-3 index 4bb58e1c82a6..d555042e3fbf 100644 --- a/tools/perf/tests/attr/test-stat-detailed-3 +++ b/tools/perf/tests/attr/test-stat-detailed-3 @@ -70,12 +70,109 @@ type=0 config=5 optional=1 +# PERF_TYPE_RAW / slots (0x400) +[event11:base-stat] +fd=11 +group_fd=-1 +type=4 +config=1024 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-retiring (0x8000) +[event12:base-stat] +fd=12 +group_fd=11 +type=4 +config=32768 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-bad-spec (0x8100) +[event13:base-stat] +fd=13 +group_fd=11 +type=4 +config=33024 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fe-bound (0x8200) +[event14:base-stat] +fd=14 +group_fd=11 +type=4 +config=33280 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-be-bound (0x8300) +[event15:base-stat] +fd=15 +group_fd=11 +type=4 +config=33536 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-heavy-ops (0x8400) +[event16:base-stat] +fd=16 +group_fd=11 +type=4 +config=33792 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-br-mispredict (0x8500) +[event17:base-stat] +fd=17 +group_fd=11 +type=4 +config=34048 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-fetch-lat (0x8600) +[event18:base-stat] +fd=18 +group_fd=11 +type=4 +config=34304 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + +# PERF_TYPE_RAW / topdown-mem-bound (0x8700) +[event19:base-stat] +fd=19 +group_fd=11 +type=4 +config=34560 +disabled=0 +enable_on_exec=0 +read_format=15 +optional=1 + # PERF_TYPE_HW_CACHE / # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event11:base-stat] -fd=11 +[event20:base-stat] +fd=20 type=3 config=0 optional=1 @@ -84,8 +181,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event12:base-stat] -fd=12 +[event21:base-stat] +fd=21 type=3 config=65536 optional=1 @@ -94,8 +191,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event13:base-stat] -fd=13 +[event22:base-stat] +fd=22 type=3 config=2 optional=1 @@ -104,8 +201,8 @@ optional=1 # PERF_COUNT_HW_CACHE_LL << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event14:base-stat] -fd=14 +[event23:base-stat] +fd=23 type=3 config=65538 optional=1 @@ -114,8 +211,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event15:base-stat] -fd=15 +[event24:base-stat] +fd=24 type=3 config=1 optional=1 @@ -124,8 +221,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1I << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event16:base-stat] -fd=16 +[event25:base-stat] +fd=25 type=3 config=65537 optional=1 @@ -134,8 +231,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event17:base-stat] -fd=17 +[event26:base-stat] +fd=26 type=3 config=3 optional=1 @@ -144,8 +241,8 @@ optional=1 # PERF_COUNT_HW_CACHE_DTLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event18:base-stat] -fd=18 +[event27:base-stat] +fd=27 type=3 config=65539 optional=1 @@ -154,8 +251,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event19:base-stat] -fd=19 +[event28:base-stat] +fd=28 type=3 config=4 optional=1 @@ -164,8 +261,8 @@ optional=1 # PERF_COUNT_HW_CACHE_ITLB << 0 | # (PERF_COUNT_HW_CACHE_OP_READ << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event20:base-stat] -fd=20 +[event29:base-stat] +fd=29 type=3 config=65540 optional=1 @@ -174,8 +271,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16) -[event21:base-stat] -fd=21 +[event30:base-stat] +fd=30 type=3 config=512 optional=1 @@ -184,8 +281,8 @@ optional=1 # PERF_COUNT_HW_CACHE_L1D << 0 | # (PERF_COUNT_HW_CACHE_OP_PREFETCH << 8) | # (PERF_COUNT_HW_CACHE_RESULT_MISS << 16) -[event22:base-stat] -fd=22 +[event31:base-stat] +fd=31 type=3 config=66048 optional=1 diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index dbf5f5215abe..fa03ff0dc083 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -192,7 +192,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect * evlist->core.nr_entries) { - pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect * evlist->core.nr_entries, count); goto out_delete_evlist; } diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index 9866cddebf23..9b4a765e4b73 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -229,8 +229,8 @@ static int read_object_code(u64 addr, size_t len, u8 cpumode, struct thread *thread, struct state *state) { struct addr_location al; - unsigned char buf1[BUFSZ]; - unsigned char buf2[BUFSZ]; + unsigned char buf1[BUFSZ] = {0}; + unsigned char buf2[BUFSZ] = {0}; size_t ret_len; u64 objdump_addr; const char *objdump_name; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index a288035eb362..c756284b3b13 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -20,6 +20,23 @@ /* For bsearch. We try to unwind functions in shared object. */ #include <stdlib.h> +/* + * The test will assert frames are on the stack but tail call optimizations lose + * the frame of the caller. Clang can disable this optimization on a called + * function but GCC currently (11/2020) lacks this attribute. The barrier is + * used to inhibit tail calls in these cases. + */ +#ifdef __has_attribute +#if __has_attribute(disable_tail_calls) +#define NO_TAIL_CALL_ATTRIBUTE __attribute__((disable_tail_calls)) +#define NO_TAIL_CALL_BARRIER +#endif +#endif +#ifndef NO_TAIL_CALL_ATTRIBUTE +#define NO_TAIL_CALL_ATTRIBUTE +#define NO_TAIL_CALL_BARRIER __asm__ __volatile__("" : : : "memory"); +#endif + static int mmap_handler(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -91,7 +108,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg) return strcmp((const char *) symbol, funcs[idx]); } -noinline int test_dwarf_unwind__thread(struct thread *thread) +NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__thread(struct thread *thread) { struct perf_sample sample; unsigned long cnt = 0; @@ -122,7 +139,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread) static int global_unwind_retval = -INT_MAX; -noinline int test_dwarf_unwind__compare(void *p1, void *p2) +NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__compare(void *p1, void *p2) { /* Any possible value should be 'thread' */ struct thread *thread = *(struct thread **)p1; @@ -141,7 +158,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2) return p1 - p2; } -noinline int test_dwarf_unwind__krava_3(struct thread *thread) +NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_3(struct thread *thread) { struct thread *array[2] = {thread, thread}; void *fp = &bsearch; @@ -160,14 +177,22 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread) return global_unwind_retval; } -noinline int test_dwarf_unwind__krava_2(struct thread *thread) +NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_2(struct thread *thread) { - return test_dwarf_unwind__krava_3(thread); + int ret; + + ret = test_dwarf_unwind__krava_3(thread); + NO_TAIL_CALL_BARRIER; + return ret; } -noinline int test_dwarf_unwind__krava_1(struct thread *thread) +NO_TAIL_CALL_ATTRIBUTE noinline int test_dwarf_unwind__krava_1(struct thread *thread) { - return test_dwarf_unwind__krava_2(thread); + int ret; + + ret = test_dwarf_unwind__krava_2(thread); + NO_TAIL_CALL_BARRIER; + return ret; } int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unused) diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 0d8e3dcb7f88..041d6032a348 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -223,8 +223,11 @@ struct ucred { * reuses AF_INET address family */ #define AF_XDP 44 /* XDP sockets */ +#define AF_MCTP 45 /* Management component + * transport protocol + */ -#define AF_MAX 45 /* For now.. */ +#define AF_MAX 46 /* For now.. */ /* Protocol families, same as address families. */ #define PF_UNSPEC AF_UNSPEC @@ -274,6 +277,7 @@ struct ucred { #define PF_QIPCRTR AF_QIPCRTR #define PF_SMC AF_SMC #define PF_XDP AF_XDP +#define PF_MCTP AF_MCTP #define PF_MAX AF_MAX /* Maximum queue length specifiable by listen. */ @@ -421,6 +425,9 @@ extern int __sys_accept4_file(struct file *file, unsigned file_flags, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags, unsigned long nofile); +extern struct file *do_accept(struct file *file, unsigned file_flags, + struct sockaddr __user *upeer_sockaddr, + int __user *upeer_addrlen, int flags); extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen, int flags); extern int __sys_socket(int family, int type, int protocol); diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh index 55e59241daa4..4b1d9acc0bd0 100755 --- a/tools/perf/trace/beauty/move_mount_flags.sh +++ b/tools/perf/trace/beauty/move_mount_flags.sh @@ -10,7 +10,7 @@ fi linux_mount=${linux_header_dir}/mount.h printf "static const char *move_mount_flags[] = {\n" -regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([FT]_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' +regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' egrep $regex ${linux_mount} | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" diff --git a/tools/perf/ui/browser.c b/tools/perf/ui/browser.c index 781afe42e90e..fa5bd5c20e96 100644 --- a/tools/perf/ui/browser.c +++ b/tools/perf/ui/browser.c @@ -757,25 +757,40 @@ void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, } void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down) + unsigned int row, int diff, bool arrow_down) { - unsigned int end_row; + int end_row; - if (row >= browser->top_idx) - end_row = row - browser->top_idx; - else + if (diff <= 0) return; SLsmg_set_char_set(1); if (arrow_down) { + if (row + diff <= browser->top_idx) + return; + + end_row = row + diff - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); - SLsmg_write_char(SLSMG_ULCORN_CHAR); - ui_browser__gotorc(browser, end_row, column); - SLsmg_draw_hline(2); - ui_browser__gotorc(browser, end_row + 1, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); + + while (--end_row >= 0 && end_row > (int)(row - browser->top_idx)) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_draw_vline(1); + } + + end_row = (int)(row - browser->top_idx); + if (end_row >= 0) { + ui_browser__gotorc(browser, end_row, column - 1); + SLsmg_write_char(SLSMG_ULCORN_CHAR); + ui_browser__gotorc(browser, end_row, column); + SLsmg_draw_hline(2); + } } else { + if (row < browser->top_idx) + return; + + end_row = row - browser->top_idx; ui_browser__gotorc(browser, end_row, column - 1); SLsmg_write_char(SLSMG_LTEE_CHAR); ui_browser__gotorc(browser, end_row, column); diff --git a/tools/perf/ui/browser.h b/tools/perf/ui/browser.h index 3678eb88f119..510ce4554050 100644 --- a/tools/perf/ui/browser.h +++ b/tools/perf/ui/browser.h @@ -51,7 +51,7 @@ void ui_browser__write_graph(struct ui_browser *browser, int graph); void __ui_browser__line_arrow(struct ui_browser *browser, unsigned int column, u64 start, u64 end); void ui_browser__mark_fused(struct ui_browser *browser, unsigned int column, - unsigned int row, bool arrow_down); + unsigned int row, int diff, bool arrow_down); void __ui_browser__show_title(struct ui_browser *browser, const char *title); void ui_browser__show_title(struct ui_browser *browser, const char *title); int ui_browser__show(struct ui_browser *browser, const char *title, diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index ef4da4295bf7..e81c2493efdf 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -125,13 +125,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ab->selection = al; } -static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) +static int is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; + int diff = 1; + + while (pos && pos->al.offset == -1) { + pos = list_prev_entry(pos, al.node); + if (!ab->opts->hide_src_code) + diff++; + } if (!pos) - return false; + return 0; if (ins__is_lock(&pos->ins)) name = pos->ops.locked.ins.name; @@ -139,9 +146,11 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) name = pos->ins.name; if (!name || !cursor->ins.name) - return false; + return 0; - return ins__is_fused(ab->arch, name, cursor->ins.name); + if (ins__is_fused(ab->arch, name, cursor->ins.name)) + return diff; + return 0; } static void annotate_browser__draw_current_jump(struct ui_browser *browser) @@ -155,6 +164,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotation *notes = symbol__annotation(sym); u8 pcnt_width = annotation__pcnt_width(notes); int width; + int diff = 0; /* PLT symbols contain external offsets */ if (strstr(sym->name, "@plt")) @@ -205,11 +215,11 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) pcnt_width + 2 + notes->widths.addr + width, from, to); - if (is_fused(ab, cursor)) { + diff = is_fused(ab, cursor); + if (diff > 0) { ui_browser__mark_fused(browser, pcnt_width + 3 + notes->widths.addr + width, - from - 1, - to > from); + from - diff, diff, to > from); } } diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2d4fa1304178..f2914d5bed6e 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -59,6 +59,7 @@ perf-y += pstack.o perf-y += session.o perf-y += sample-raw.o perf-y += s390-sample-raw.o +perf-y += amd-sample-raw.o perf-$(CONFIG_TRACE) += syscalltbl.o perf-y += ordered-events.o perf-y += namespaces.o diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c new file mode 100644 index 000000000000..d19d765195c5 --- /dev/null +++ b/tools/perf/util/amd-sample-raw.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD specific. Provide textual annotation for IBS raw sample data. + */ + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> + +#include <linux/string.h> +#include "../../arch/x86/include/asm/amd-ibs.h" + +#include "debug.h" +#include "session.h" +#include "evlist.h" +#include "sample-raw.h" +#include "pmu-events/pmu-events.h" + +static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; + +static void pr_ibs_fetch_ctl(union ibs_fetch_ctl reg) +{ + const char * const ic_miss_strs[] = { + " IcMiss 0", + " IcMiss 1", + }; + const char * const l1tlb_pgsz_strs[] = { + " L1TlbPgSz 4KB", + " L1TlbPgSz 2MB", + " L1TlbPgSz 1GB", + " L1TlbPgSz RESERVED" + }; + const char * const l1tlb_pgsz_strs_erratum1347[] = { + " L1TlbPgSz 4KB", + " L1TlbPgSz 16KB", + " L1TlbPgSz 2MB", + " L1TlbPgSz 1GB" + }; + const char *ic_miss_str = NULL; + const char *l1tlb_pgsz_str = NULL; + + if (cpu_family == 0x19 && cpu_model < 0x10) { + /* + * Erratum #1238 workaround is to ignore MSRC001_1030[IbsIcMiss] + * Erratum #1347 workaround is to use table provided in erratum + */ + if (reg.phy_addr_valid) + l1tlb_pgsz_str = l1tlb_pgsz_strs_erratum1347[reg.l1tlb_pgsz]; + } else { + if (reg.phy_addr_valid) + l1tlb_pgsz_str = l1tlb_pgsz_strs[reg.l1tlb_pgsz]; + ic_miss_str = ic_miss_strs[reg.ic_miss]; + } + + printf("ibs_fetch_ctl:\t%016llx MaxCnt %7d Cnt %7d Lat %5d En %d Val %d Comp %d%s " + "PhyAddrValid %d%s L1TlbMiss %d L2TlbMiss %d RandEn %d%s\n", + reg.val, reg.fetch_maxcnt << 4, reg.fetch_cnt << 4, reg.fetch_lat, + reg.fetch_en, reg.fetch_val, reg.fetch_comp, ic_miss_str ? : "", + reg.phy_addr_valid, l1tlb_pgsz_str ? : "", reg.l1tlb_miss, reg.l2tlb_miss, + reg.rand_en, reg.fetch_comp ? (reg.fetch_l2_miss ? " L2Miss 1" : " L2Miss 0") : ""); +} + +static void pr_ic_ibs_extd_ctl(union ic_ibs_extd_ctl reg) +{ + printf("ic_ibs_ext_ctl:\t%016llx IbsItlbRefillLat %3d\n", reg.val, reg.itlb_refill_lat); +} + +static void pr_ibs_op_ctl(union ibs_op_ctl reg) +{ + printf("ibs_op_ctl:\t%016llx MaxCnt %9d En %d Val %d CntCtl %d=%s CurCnt %9d\n", + reg.val, ((reg.opmaxcnt_ext << 16) | reg.opmaxcnt) << 4, reg.op_en, reg.op_val, + reg.cnt_ctl, reg.cnt_ctl ? "uOps" : "cycles", reg.opcurcnt); +} + +static void pr_ibs_op_data(union ibs_op_data reg) +{ + printf("ibs_op_data:\t%016llx CompToRetCtr %5d TagToRetCtr %5d%s%s%s BrnRet %d " + " RipInvalid %d BrnFuse %d Microcode %d\n", + reg.val, reg.comp_to_ret_ctr, reg.tag_to_ret_ctr, + reg.op_brn_ret ? (reg.op_return ? " OpReturn 1" : " OpReturn 0") : "", + reg.op_brn_ret ? (reg.op_brn_taken ? " OpBrnTaken 1" : " OpBrnTaken 0") : "", + reg.op_brn_ret ? (reg.op_brn_misp ? " OpBrnMisp 1" : " OpBrnMisp 0") : "", + reg.op_brn_ret, reg.op_rip_invalid, reg.op_brn_fuse, reg.op_microcode); +} + +static void pr_ibs_op_data2(union ibs_op_data2 reg) +{ + static const char * const data_src_str[] = { + "", + " DataSrc 1=(reserved)", + " DataSrc 2=Local node cache", + " DataSrc 3=DRAM", + " DataSrc 4=Remote node cache", + " DataSrc 5=(reserved)", + " DataSrc 6=(reserved)", + " DataSrc 7=Other" + }; + + printf("ibs_op_data2:\t%016llx %sRmtNode %d%s\n", reg.val, + reg.data_src == 2 ? (reg.cache_hit_st ? "CacheHitSt 1=O-State " + : "CacheHitSt 0=M-state ") : "", + reg.rmt_node, data_src_str[reg.data_src]); +} + +static void pr_ibs_op_data3(union ibs_op_data3 reg) +{ + char l2_miss_str[sizeof(" L2Miss _")] = ""; + char op_mem_width_str[sizeof(" OpMemWidth _____ bytes")] = ""; + char op_dc_miss_open_mem_reqs_str[sizeof(" OpDcMissOpenMemReqs __")] = ""; + + /* + * Erratum #1293 + * Ignore L2Miss and OpDcMissOpenMemReqs (and opdata2) if DcMissNoMabAlloc or SwPf set + */ + if (!(cpu_family == 0x19 && cpu_model < 0x10 && (reg.dc_miss_no_mab_alloc || reg.sw_pf))) { + snprintf(l2_miss_str, sizeof(l2_miss_str), " L2Miss %d", reg.l2_miss); + snprintf(op_dc_miss_open_mem_reqs_str, sizeof(op_dc_miss_open_mem_reqs_str), + " OpDcMissOpenMemReqs %2d", reg.op_dc_miss_open_mem_reqs); + } + + if (reg.op_mem_width) + snprintf(op_mem_width_str, sizeof(op_mem_width_str), + " OpMemWidth %2d bytes", 1 << (reg.op_mem_width - 1)); + + printf("ibs_op_data3:\t%016llx LdOp %d StOp %d DcL1TlbMiss %d DcL2TlbMiss %d " + "DcL1TlbHit2M %d DcL1TlbHit1G %d DcL2TlbHit2M %d DcMiss %d DcMisAcc %d " + "DcWcMemAcc %d DcUcMemAcc %d DcLockedOp %d DcMissNoMabAlloc %d DcLinAddrValid %d " + "DcPhyAddrValid %d DcL2TlbHit1G %d%s SwPf %d%s%s DcMissLat %5d TlbRefillLat %5d\n", + reg.val, reg.ld_op, reg.st_op, reg.dc_l1tlb_miss, reg.dc_l2tlb_miss, + reg.dc_l1tlb_hit_2m, reg.dc_l1tlb_hit_1g, reg.dc_l2tlb_hit_2m, reg.dc_miss, + reg.dc_mis_acc, reg.dc_wc_mem_acc, reg.dc_uc_mem_acc, reg.dc_locked_op, + reg.dc_miss_no_mab_alloc, reg.dc_lin_addr_valid, reg.dc_phy_addr_valid, + reg.dc_l2_tlb_hit_1g, l2_miss_str, reg.sw_pf, op_mem_width_str, + op_dc_miss_open_mem_reqs_str, reg.dc_miss_lat, reg.tlb_refill_lat); +} + +/* + * IBS Op/Execution MSRs always saved, in order, are: + * IBS_OP_CTL, IBS_OP_RIP, IBS_OP_DATA, IBS_OP_DATA2, + * IBS_OP_DATA3, IBS_DC_LINADDR, IBS_DC_PHYSADDR, BP_IBSTGT_RIP + */ +static void amd_dump_ibs_op(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; + __u64 *rip = (__u64 *)op_ctl + 1; + union ibs_op_data *op_data = (union ibs_op_data *)(rip + 1); + union ibs_op_data3 *op_data3 = (union ibs_op_data3 *)(rip + 3); + + pr_ibs_op_ctl(*op_ctl); + if (!op_data->op_rip_invalid) + printf("IbsOpRip:\t%016llx\n", *rip); + pr_ibs_op_data(*op_data); + /* + * Erratum #1293: ignore op_data2 if DcMissNoMabAlloc or SwPf are set + */ + if (!(cpu_family == 0x19 && cpu_model < 0x10 && + (op_data3->dc_miss_no_mab_alloc || op_data3->sw_pf))) + pr_ibs_op_data2(*(union ibs_op_data2 *)(rip + 2)); + pr_ibs_op_data3(*op_data3); + if (op_data3->dc_lin_addr_valid) + printf("IbsDCLinAd:\t%016llx\n", *(rip + 4)); + if (op_data3->dc_phy_addr_valid) + printf("IbsDCPhysAd:\t%016llx\n", *(rip + 5)); + if (op_data->op_brn_ret && *(rip + 6)) + printf("IbsBrTarget:\t%016llx\n", *(rip + 6)); +} + +/* + * IBS Fetch MSRs always saved, in order, are: + * IBS_FETCH_CTL, IBS_FETCH_LINADDR, IBS_FETCH_PHYSADDR, IC_IBS_EXTD_CTL + */ +static void amd_dump_ibs_fetch(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; + __u64 *addr = (__u64 *)fetch_ctl + 1; + union ic_ibs_extd_ctl *extd_ctl = (union ic_ibs_extd_ctl *)addr + 2; + + pr_ibs_fetch_ctl(*fetch_ctl); + printf("IbsFetchLinAd:\t%016llx\n", *addr++); + if (fetch_ctl->phy_addr_valid) + printf("IbsFetchPhysAd:\t%016llx\n", *addr); + pr_ic_ibs_extd_ctl(*extd_ctl); +} + +/* + * Test for enable and valid bits in captured control MSRs. + */ +static bool is_valid_ibs_fetch_sample(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_fetch_ctl *fetch_ctl = (union ibs_fetch_ctl *)data->data; + + if (fetch_ctl->fetch_en && fetch_ctl->fetch_val) + return true; + + return false; +} + +static bool is_valid_ibs_op_sample(struct perf_sample *sample) +{ + struct perf_ibs_data *data = sample->raw_data; + union ibs_op_ctl *op_ctl = (union ibs_op_ctl *)data->data; + + if (op_ctl->op_en && op_ctl->op_val) + return true; + + return false; +} + +/* AMD vendor specific raw sample function. Check for PERF_RECORD_SAMPLE events + * and if the event was triggered by IBS, display its raw data with decoded text. + * The function is only invoked when the dump flag -D is set. + */ +void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample) +{ + struct evsel *evsel; + + if (event->header.type != PERF_RECORD_SAMPLE || !sample->raw_size) + return; + + evsel = evlist__event2evsel(evlist, event); + if (!evsel) + return; + + if (evsel->core.attr.type == ibs_fetch_type) { + if (!is_valid_ibs_fetch_sample(sample)) { + pr_debug("Invalid raw IBS Fetch MSR data encountered\n"); + return; + } + amd_dump_ibs_fetch(sample); + } else if (evsel->core.attr.type == ibs_op_type) { + if (!is_valid_ibs_op_sample(sample)) { + pr_debug("Invalid raw IBS Op MSR data encountered\n"); + return; + } + amd_dump_ibs_op(sample); + } +} + +static void parse_cpuid(struct perf_env *env) +{ + const char *cpuid; + int ret; + + cpuid = perf_env__cpuid(env); + /* + * cpuid = "AuthenticAMD,family,model,stepping" + */ + ret = sscanf(cpuid, "%*[^,],%u,%u", &cpu_family, &cpu_model); + if (ret != 2) + pr_debug("problem parsing cpuid\n"); +} + +/* + * Find and assign the type number used for ibs_op or ibs_fetch samples. + * Device names can be large - we are only interested in the first 9 characters, + * to match "ibs_fetch". + */ +bool evlist__has_amd_ibs(struct evlist *evlist) +{ + struct perf_env *env = evlist->env; + int ret, nr_pmu_mappings = perf_env__nr_pmu_mappings(env); + const char *pmu_mapping = perf_env__pmu_mappings(env); + char name[sizeof("ibs_fetch")]; + u32 type; + + while (nr_pmu_mappings--) { + ret = sscanf(pmu_mapping, "%u:%9s", &type, name); + if (ret == 2) { + if (strstarts(name, "ibs_op")) + ibs_op_type = type; + else if (strstarts(name, "ibs_fetch")) + ibs_fetch_type = type; + } + pmu_mapping += strlen(pmu_mapping) + 1 /* '\0' */; + } + + if (ibs_fetch_type || ibs_op_type) { + if (!cpu_family) + parse_cpuid(env); + return true; + } + + return false; +} diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c index 996d025b8ed8..1a7112a87736 100644 --- a/tools/perf/util/bpf-event.c +++ b/tools/perf/util/bpf-event.c @@ -21,6 +21,17 @@ #include "record.h" #include "util/synthetic-events.h" +struct btf * __weak btf__load_from_kernel_by_id(__u32 id) +{ + struct btf *btf; +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + int err = btf__get_from_id(id, &btf); +#pragma GCC diagnostic pop + + return err ? ERR_PTR(err) : btf; +} + #define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr)) static int snprintf_hex(char *buf, size_t size, unsigned char *data, size_t len) diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 4fb5e90d7a57..60ce5908c664 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -801,7 +801,7 @@ int perf_config_set(struct perf_config_set *set, section->name, item->name); ret = fn(key, value, data); if (ret < 0) { - pr_err("Error: wrong config key-value pair %s=%s\n", + pr_err("Error in the given config file: wrong config key-value pair %s=%s\n", key, value); /* * Can't be just a 'break', as perf_config_set__for_each_entry() diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index ee15db2be2f4..9ed9a5676d35 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -1349,6 +1349,16 @@ void dso__set_build_id(struct dso *dso, struct build_id *bid) bool dso__build_id_equal(const struct dso *dso, struct build_id *bid) { + if (dso->bid.size > bid->size && dso->bid.size == BUILD_ID_SIZE) { + /* + * For the backward compatibility, it allows a build-id has + * trailing zeros. + */ + return !memcmp(dso->bid.data, bid->data, bid->size) && + !memchr_inv(&dso->bid.data[bid->size], 0, + dso->bid.size - bid->size); + } + return dso->bid.size == bid->size && memcmp(dso->bid.data, bid->data, dso->bid.size) == 0; } diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 8f7ff0035c41..cf773f0dec38 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -10,6 +10,7 @@ #include <sys/utsname.h> #include <stdlib.h> #include <string.h> +#include "strbuf.h" struct perf_env perf_env; @@ -306,6 +307,45 @@ int perf_env__read_cpu_topology_map(struct perf_env *env) return 0; } +int perf_env__read_pmu_mappings(struct perf_env *env) +{ + struct perf_pmu *pmu = NULL; + u32 pmu_num = 0; + struct strbuf sb; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + pmu_num++; + } + if (!pmu_num) { + pr_debug("pmu mappings not available\n"); + return -ENOENT; + } + env->nr_pmu_mappings = pmu_num; + + if (strbuf_init(&sb, 128 * pmu_num) < 0) + return -ENOMEM; + + while ((pmu = perf_pmu__scan(pmu))) { + if (!pmu->name) + continue; + if (strbuf_addf(&sb, "%u:%s", pmu->type, pmu->name) < 0) + goto error; + /* include a NULL character at the end */ + if (strbuf_add(&sb, "", 1) < 0) + goto error; + } + + env->pmu_mappings = strbuf_detach(&sb, NULL); + + return 0; + +error: + strbuf_release(&sb); + return -1; +} + int perf_env__read_cpuid(struct perf_env *env) { char cpuid[128]; @@ -404,6 +444,44 @@ const char *perf_env__arch(struct perf_env *env) return normalize_arch(arch_name); } +const char *perf_env__cpuid(struct perf_env *env) +{ + int status; + + if (!env || !env->cpuid) { /* Assume local operation */ + status = perf_env__read_cpuid(env); + if (status) + return NULL; + } + + return env->cpuid; +} + +int perf_env__nr_pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->nr_pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return 0; + } + + return env->nr_pmu_mappings; +} + +const char *perf_env__pmu_mappings(struct perf_env *env) +{ + int status; + + if (!env || !env->pmu_mappings) { /* Assume local operation */ + status = perf_env__read_pmu_mappings(env); + if (status) + return NULL; + } + + return env->pmu_mappings; +} int perf_env__numa_node(struct perf_env *env, int cpu) { diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1f5175820a05..1383876f72b3 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -149,11 +149,16 @@ int perf_env__kernel_is_64_bit(struct perf_env *env); int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpuid(struct perf_env *env); +int perf_env__read_pmu_mappings(struct perf_env *env); +int perf_env__nr_pmu_mappings(struct perf_env *env); +const char *perf_env__pmu_mappings(struct perf_env *env); + int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); const char *perf_env__arch(struct perf_env *env); +const char *perf_env__cpuid(struct perf_env *env); const char *perf_env__raw_arch(struct perf_env *env); int perf_env__nr_cpus_avail(struct perf_env *env); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 54d251327b5b..dbfeceb2546c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -333,11 +333,11 @@ error_free: goto out; } -static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +int copy_config_terms(struct list_head *dst, struct list_head *src) { struct evsel_config_term *pos, *tmp; - list_for_each_entry(pos, &src->config_terms, list) { + list_for_each_entry(pos, src, list) { tmp = malloc(sizeof(*tmp)); if (tmp == NULL) return -ENOMEM; @@ -350,11 +350,16 @@ static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) return -ENOMEM; } } - list_add_tail(&tmp->list, &dst->config_terms); + list_add_tail(&tmp->list, dst); } return 0; } +static int evsel__copy_config_terms(struct evsel *dst, struct evsel *src) +{ + return copy_config_terms(&dst->config_terms, &src->config_terms); +} + /** * evsel__clone - create a new evsel copied from @orig * @orig: original evsel @@ -1385,11 +1390,11 @@ int evsel__disable(struct evsel *evsel) return err; } -static void evsel__free_config_terms(struct evsel *evsel) +void free_config_terms(struct list_head *config_terms) { struct evsel_config_term *term, *h; - list_for_each_entry_safe(term, h, &evsel->config_terms, list) { + list_for_each_entry_safe(term, h, config_terms, list) { list_del_init(&term->list); if (term->free_str) zfree(&term->val.str); @@ -1397,6 +1402,11 @@ static void evsel__free_config_terms(struct evsel *evsel) } } +static void evsel__free_config_terms(struct evsel *evsel) +{ + free_config_terms(&evsel->config_terms); +} + void evsel__exit(struct evsel *evsel) { assert(list_empty(&evsel->core.node)); diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 1b3eeab5f188..1f7edfa8568a 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -213,6 +213,9 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr) struct evsel *evsel__clone(struct evsel *orig); struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx); +int copy_config_terms(struct list_head *dst, struct list_head *src); +void free_config_terms(struct list_head *config_terms); + /* * Returns pointer with encoded error via <linux/err.h> interface. */ diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da19be7da284..44e40bad0e33 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2149,6 +2149,7 @@ static int add_callchain_ip(struct thread *thread, al.filtered = 0; al.sym = NULL; + al.srcline = NULL; if (!cpumode) { thread__find_cpumode_addr_location(thread, ip, &al); } else { diff --git a/tools/perf/util/parse-events-hybrid.c b/tools/perf/util/parse-events-hybrid.c index 10160ab126f9..b234d95fb10a 100644 --- a/tools/perf/util/parse-events-hybrid.c +++ b/tools/perf/util/parse-events-hybrid.c @@ -76,12 +76,16 @@ static int add_hw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HARDWARE, &parse_state->idx, list, attr, name, - config_terms, pmu); + &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } @@ -115,11 +119,15 @@ static int add_raw_hybrid(struct parse_events_state *parse_state, int ret; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_raw_event_hybrid(&parse_state->idx, list, attr, - name, config_terms, pmu); + name, &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } @@ -165,11 +173,15 @@ int parse_events__add_cache_hybrid(struct list_head *list, int *idx, *hybrid = true; perf_pmu__for_each_hybrid_pmu(pmu) { + LIST_HEAD(terms); + if (pmu_cmp(parse_state, pmu)) continue; + copy_config_terms(&terms, config_terms); ret = create_event_hybrid(PERF_TYPE_HW_CACHE, idx, list, - attr, name, config_terms, pmu); + attr, name, &terms, pmu); + free_config_terms(&terms); if (ret) return ret; } diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index e5eae23cfceb..51a2219df601 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -387,7 +387,7 @@ __add_event(struct list_head *list, int *idx, evsel->name = strdup(name); if (config_terms) - list_splice(config_terms, &evsel->config_terms); + list_splice_init(config_terms, &evsel->config_terms); if (list) list_add_tail(&evsel->core.node, list); @@ -535,9 +535,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, config_name ? : name, &config_terms, &hybrid, parse_state); if (hybrid) - return ret; + goto out_free_terms; - return add_event(list, idx, &attr, config_name ? : name, &config_terms); + ret = add_event(list, idx, &attr, config_name ? : name, &config_terms); +out_free_terms: + free_config_terms(&config_terms); + return ret; } static void tracepoint_error(struct parse_events_error *e, int err, @@ -1457,10 +1460,13 @@ int parse_events_add_numeric(struct parse_events_state *parse_state, get_config_name(head_config), &config_terms, &hybrid); if (hybrid) - return ret; + goto out_free_terms; - return add_event(list, &parse_state->idx, &attr, - get_config_name(head_config), &config_terms); + ret = add_event(list, &parse_state->idx, &attr, + get_config_name(head_config), &config_terms); +out_free_terms: + free_config_terms(&config_terms); + return ret; } int parse_events_add_tool(struct parse_events_state *parse_state, @@ -1608,14 +1614,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state, } if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) { - struct evsel_config_term *pos, *tmp; - - list_for_each_entry_safe(pos, tmp, &config_terms, list) { - list_del_init(&pos->list); - if (pos->free_str) - zfree(&pos->val.str); - free(pos); - } + free_config_terms(&config_terms); return -EINVAL; } diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c index 30481825515b..47b7531f51da 100644 --- a/tools/perf/util/perf_event_attr_fprintf.c +++ b/tools/perf/util/perf_event_attr_fprintf.c @@ -137,6 +137,9 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(cgroup, p_unsigned); PRINT_ATTRf(text_poke, p_unsigned); PRINT_ATTRf(build_id, p_unsigned); + PRINT_ATTRf(inherit_thread, p_unsigned); + PRINT_ATTRf(remove_on_exec, p_unsigned); + PRINT_ATTRf(sigtrap, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); @@ -150,7 +153,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(aux_watermark, p_unsigned); PRINT_ATTRf(sample_max_stack, p_unsigned); PRINT_ATTRf(aux_sample_size, p_unsigned); - PRINT_ATTRf(text_poke, p_unsigned); + PRINT_ATTRf(sig_data, p_unsigned); return ret; } diff --git a/tools/perf/util/sample-raw.c b/tools/perf/util/sample-raw.c index cde5cd3ce49b..f3f6bd9d290e 100644 --- a/tools/perf/util/sample-raw.c +++ b/tools/perf/util/sample-raw.c @@ -1,8 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include <string.h> +#include <linux/string.h> #include "evlist.h" #include "env.h" +#include "header.h" #include "sample-raw.h" /* @@ -12,7 +14,13 @@ void evlist__init_trace_event_sample_raw(struct evlist *evlist) { const char *arch_pf = perf_env__arch(evlist->env); + const char *cpuid = perf_env__cpuid(evlist->env); if (arch_pf && !strcmp("s390", arch_pf)) evlist->trace_event_sample_raw = evlist__s390_sample_raw; + else if (arch_pf && !strcmp("x86", arch_pf) && + cpuid && strstarts(cpuid, "AuthenticAMD") && + evlist__has_amd_ibs(evlist)) { + evlist->trace_event_sample_raw = evlist__amd_sample_raw; + } } diff --git a/tools/perf/util/sample-raw.h b/tools/perf/util/sample-raw.h index 4be84a5510cf..ea01c5811503 100644 --- a/tools/perf/util/sample-raw.h +++ b/tools/perf/util/sample-raw.h @@ -6,6 +6,10 @@ struct evlist; union perf_event; struct perf_sample; -void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, struct perf_sample *sample); +void evlist__s390_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample); +bool evlist__has_amd_ibs(struct evlist *evlist); +void evlist__amd_sample_raw(struct evlist *evlist, union perf_event *event, + struct perf_sample *sample); void evlist__init_trace_event_sample_raw(struct evlist *evlist); #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 069c2cfdd3be..352f16076e01 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -2116,7 +2116,7 @@ fetch_decomp_event(u64 head, size_t mmap_size, char *buf, bool needs_swap) static int __perf_session__process_decomp_events(struct perf_session *session) { s64 skip; - u64 size, file_pos = 0; + u64 size; struct decomp *decomp = session->decomp_last; if (!decomp) @@ -2132,7 +2132,7 @@ static int __perf_session__process_decomp_events(struct perf_session *session) size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, file_pos)) < 0) { + (skip = perf_session__process_event(session, event, decomp->file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", decomp->file_pos + decomp->head, event->header.size, event->header.type); return -EINVAL; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 77fc46ca07c0..0fc9a5410739 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1581,10 +1581,6 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (bfd_get_flavour(abfd) == bfd_target_elf_flavour) goto out_close; - section = bfd_get_section_by_name(abfd, ".text"); - if (section) - dso->text_offset = section->vma - section->filepos; - symbols_size = bfd_get_symtab_upper_bound(abfd); if (symbols_size == 0) { bfd_close(abfd); @@ -1602,6 +1598,22 @@ int dso__load_bfd_symbols(struct dso *dso, const char *debugfile) if (symbols_count < 0) goto out_free; + section = bfd_get_section_by_name(abfd, ".text"); + if (section) { + for (i = 0; i < symbols_count; ++i) { + if (!strcmp(bfd_asymbol_name(symbols[i]), "__ImageBase") || + !strcmp(bfd_asymbol_name(symbols[i]), "__image_base__")) + break; + } + if (i < symbols_count) { + /* PE symbols can only have 4 bytes, so use .text high bits */ + dso->text_offset = section->vma - (u32)section->vma; + dso->text_offset += (u32)bfd_asymbol_value(symbols[i]); + } else { + dso->text_offset = section->vma - section->filepos; + } + } + qsort(symbols, symbols_count, sizeof(asymbol *), bfd_symbols__cmpvalue); #ifdef bfd_get_section diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 5a931456e718..ac35c61f65f5 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -16,7 +16,7 @@ assert sys.version_info >= (3, 7), "Python version is too old" from collections import namedtuple from enum import Enum, auto -from typing import Iterable +from typing import Iterable, Sequence import kunit_config import kunit_json @@ -186,6 +186,26 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, exec_result.elapsed_time)) return parse_result +# Problem: +# $ kunit.py run --json +# works as one would expect and prints the parsed test results as JSON. +# $ kunit.py run --json suite_name +# would *not* pass suite_name as the filter_glob and print as json. +# argparse will consider it to be another way of writing +# $ kunit.py run --json=suite_name +# i.e. it would run all tests, and dump the json to a `suite_name` file. +# So we hackily automatically rewrite --json => --json=stdout +pseudo_bool_flag_defaults = { + '--json': 'stdout', + '--raw_output': 'kunit', +} +def massage_argv(argv: Sequence[str]) -> Sequence[str]: + def massage_arg(arg: str) -> str: + if arg not in pseudo_bool_flag_defaults: + return arg + return f'{arg}={pseudo_bool_flag_defaults[arg]}' + return list(map(massage_arg, argv)) + def add_common_opts(parser) -> None: parser.add_argument('--build_dir', help='As in the make command, it specifies the build ' @@ -303,7 +323,7 @@ def main(argv, linux=None): help='Specifies the file to read results from.', type=str, nargs='?', metavar='input_file') - cli_args = parser.parse_args(argv) + cli_args = parser.parse_args(massage_argv(argv)) if get_kernel_root_path(): os.chdir(get_kernel_root_path()) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 619c4554cbff..1edcc8373b4e 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -408,6 +408,14 @@ class KUnitMainTest(unittest.TestCase): self.assertNotEqual(call, mock.call(StrContains('Testing complete.'))) self.assertNotEqual(call, mock.call(StrContains(' 0 tests run'))) + def test_run_raw_output_does_not_take_positional_args(self): + # --raw_output is a string flag, but we don't want it to consume + # any positional arguments, only ones after an '=' + self.linux_source_mock.run_kernel = mock.Mock(return_value=[]) + kunit.main(['run', '--raw_output', 'filter_glob'], self.linux_source_mock) + self.linux_source_mock.run_kernel.assert_called_once_with( + args=None, build_dir='.kunit', filter_glob='filter_glob', timeout=300) + def test_exec_timeout(self): timeout = 3453 kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock) diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 6836510a522f..22722abc9dfa 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -266,16 +266,19 @@ int test_init(struct tdescr *td) td->feats_supported |= FEAT_SSBS; if (getauxval(AT_HWCAP) & HWCAP_SVE) td->feats_supported |= FEAT_SVE; - if (feats_ok(td)) + if (feats_ok(td)) { fprintf(stderr, "Required Features: [%s] supported\n", feats_to_string(td->feats_required & td->feats_supported)); - else + } else { fprintf(stderr, "Required Features: [%s] NOT supported\n", feats_to_string(td->feats_required & ~td->feats_supported)); + td->result = KSFT_SKIP; + return 0; + } } /* Perform test specific additional initialization */ diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 866531c08e4f..799b88152e9e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ - $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) + $(wildcard $(BPFDIR)/bpf_*.h) \ + | $(TRUNNER_OUTPUT) $$(BPFOBJ) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS)) diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 033051717ba5..f3daa44a8266 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -12,27 +12,36 @@ #include <unistd.h> #include <ftw.h> - #include "cgroup_helpers.h" /* * To avoid relying on the system setup, when setup_cgroup_env is called - * we create a new mount namespace, and cgroup namespace. The cgroup2 - * root is mounted at CGROUP_MOUNT_PATH - * - * Unfortunately, most people don't have cgroupv2 enabled at this point in time. - * It's easier to create our own mount namespace and manage it ourselves. + * we create a new mount namespace, and cgroup namespace. The cgroupv2 + * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't + * have cgroupv2 enabled at this point in time. It's easier to create our + * own mount namespace and manage it ourselves. We assume /mnt exists. * - * We assume /mnt exists. + * Related cgroupv1 helpers are named *classid*(), since we only use the + * net_cls controller for tagging net_cls.classid. We assume the default + * mount under /sys/fs/cgroup/net_cls, which should be the case for the + * vast majority of users. */ #define WALK_FD_LIMIT 16 + #define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup" +#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls" #define CGROUP_WORK_DIR "/cgroup-test-work-dir" + #define format_cgroup_path(buf, path) \ snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \ CGROUP_WORK_DIR, path) +#define format_classid_path(buf) \ + snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \ + CGROUP_WORK_DIR) + /** * enable_all_controllers() - Enable all available cgroup v2 controllers * @@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr, return 0; } - -static int join_cgroup_from_top(char *cgroup_path) +static int join_cgroup_from_top(const char *cgroup_path) { char cgroup_procs_path[PATH_MAX + 1]; pid_t pid = getpid(); @@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) { } return cg_fd; } + +/** + * setup_classid_environment() - Setup the cgroupv1 net_cls environment + * + * After calling this function, cleanup_classid_environment should be called + * once testing is complete. + * + * This function will print an error to stderr and return 1 if it is unable + * to setup the cgroup environment. If setup is successful, 0 is returned. + */ +int setup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + + if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) && + errno != EBUSY) { + log_err("mount cgroup base"); + return 1; + } + + if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup net_cls"); + return 1; + } + + if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") && + errno != EBUSY) { + log_err("mount cgroup net_cls"); + return 1; + } + + cleanup_classid_environment(); + + if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) { + log_err("mkdir cgroup work dir"); + return 1; + } + + return 0; +} + +/** + * set_classid() - Set a cgroupv1 net_cls classid + * @id: the numeric classid + * + * Writes the passed classid into the cgroup work dir's net_cls.classid + * file in order to later on trigger socket tagging. + * + * On success, it returns 0, otherwise on failure it returns 1. If there + * is a failure, it prints the error to stderr. + */ +int set_classid(unsigned int id) +{ + char cgroup_workdir[PATH_MAX - 42]; + char cgroup_classid_path[PATH_MAX + 1]; + int fd, rc = 0; + + format_classid_path(cgroup_workdir); + snprintf(cgroup_classid_path, sizeof(cgroup_classid_path), + "%s/net_cls.classid", cgroup_workdir); + + fd = open(cgroup_classid_path, O_WRONLY); + if (fd < 0) { + log_err("Opening cgroup classid: %s", cgroup_classid_path); + return 1; + } + + if (dprintf(fd, "%u\n", id) < 0) { + log_err("Setting cgroup classid"); + rc = 1; + } + + close(fd); + return rc; +} + +/** + * join_classid() - Join a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * join it here. This causes the process sockets to be tagged with the given + * net_cls classid. + * + * On success, it returns 0, otherwise on failure it returns 1. + */ +int join_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return join_cgroup_from_top(cgroup_workdir); +} + +/** + * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment + * + * At call time, it moves the calling process to the root cgroup, and then + * runs the deletion process. + * + * On failure, it will print an error to stderr, and try to continue. + */ +void cleanup_classid_environment(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + join_cgroup_from_top(NETCLS_MOUNT_PATH); + nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 5fe3d88e4f0d..629da3854b3e 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CGROUP_HELPERS_H #define __CGROUP_HELPERS_H + #include <errno.h> #include <string.h> @@ -8,12 +9,21 @@ #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) - +/* cgroupv2 related */ int cgroup_setup_and_join(const char *path); int create_and_get_cgroup(const char *path); +unsigned long long get_cgroup_id(const char *path); + int join_cgroup(const char *path); + int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); -unsigned long long get_cgroup_id(const char *path); -#endif +/* cgroupv1 related */ +int set_classid(unsigned int id); +int join_classid(void); + +int setup_classid_environment(void); +void cleanup_classid_environment(void); + +#endif /* __CGROUP_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index 7e9f6375757a..6db1af8fdee7 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -208,11 +208,26 @@ error_close: static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, - socklen_t addrlen) + socklen_t addrlen, const bool must_fail) { - if (connect(fd, (const struct sockaddr *)addr, addrlen)) { - log_err("Failed to connect to server"); - return -1; + int ret; + + errno = 0; + ret = connect(fd, (const struct sockaddr *)addr, addrlen); + if (must_fail) { + if (!ret) { + log_err("Unexpected success to connect to server"); + return -1; + } + if (errno != EPERM) { + log_err("Unexpected error from connect to server"); + return -1; + } + } else { + if (ret) { + log_err("Failed to connect to server"); + return -1; + } } return 0; @@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts) strlen(opts->cc) + 1)) goto error_close; - if (connect_fd_to_addr(fd, &addr, addrlen)) + if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail)) goto error_close; return fd; @@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) return -1; } - if (connect_fd_to_addr(client_fd, &addr, len)) + if (connect_fd_to_addr(client_fd, &addr, len, false)) return -1; return 0; diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index da7e132657d5..d198181a5648 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -20,6 +20,7 @@ typedef __u16 __sum16; struct network_helper_opts { const char *cc; int timeout_ms; + bool must_fail; }; /* ipv4 test vector */ diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c new file mode 100644 index 000000000000..ab3b9bc5e6d1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "connect4_dropper.skel.h" + +#include "cgroup_helpers.h" +#include "network_helpers.h" + +static int run_test(int cgroup_fd, int server_fd, bool classid) +{ + struct network_helper_opts opts = { + .must_fail = true, + }; + struct connect4_dropper *skel; + int fd, err = 0; + + skel = connect4_dropper__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return -1; + + skel->links.connect_v4_dropper = + bpf_program__attach_cgroup(skel->progs.connect_v4_dropper, + cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) { + err = -1; + goto out; + } + + if (classid && !ASSERT_OK(join_classid(), "join_classid")) { + err = -1; + goto out; + } + + fd = connect_to_fd_opts(server_fd, &opts); + if (fd < 0) + err = -1; + else + close(fd); +out: + connect4_dropper__destroy(skel); + return err; +} + +void test_cgroup_v1v2(void) +{ + struct network_helper_opts opts = {}; + int server_fd, client_fd, cgroup_fd; + static const int port = 60123; + + /* Step 1: Check base connectivity works without any BPF. */ + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) + return; + client_fd = connect_to_fd_opts(server_fd, &opts); + if (!ASSERT_GE(client_fd, 0, "client_fd")) { + close(server_fd); + return; + } + close(client_fd); + close(server_fd); + + /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */ + cgroup_fd = test__join_cgroup("/connect_dropper"); + if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd")) + return; + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0); + if (!ASSERT_GE(server_fd, 0, "server_fd")) { + close(cgroup_fd); + return; + } + ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only"); + setup_classid_environment(); + set_classid(42); + ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2"); + cleanup_classid_environment(); + close(server_fd); + close(cgroup_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 5c5979046523..d88bb65b74cc 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -949,7 +949,6 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, int err, n; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1002,17 +1001,11 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd, goto close_peer1; if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_peer1: xclose(p1); @@ -1571,7 +1564,6 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1606,17 +1598,11 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); @@ -1748,7 +1734,6 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; u32 key; char b; @@ -1781,17 +1766,11 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1841,7 +1820,6 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, const char *log_prefix = redir_mode_str(mode); int c0, c1, p0, p1; unsigned int pass; - int retries = 100; int err, n; int sfd[2]; u32 key; @@ -1876,17 +1854,11 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close_cli1: xclose(c1); @@ -1932,7 +1904,6 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, int sfd[2]; u32 key; char b; - int retries = 100; zero_verdict_count(verd_mapfd); @@ -1963,17 +1934,11 @@ static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd, if (pass != 1) FAIL("%s: want pass count 1, have %d", log_prefix, pass); -again: - n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1); - if (n < 0) { - if (errno == EAGAIN && retries--) { - usleep(1000); - goto again; - } - FAIL_ERRNO("%s: read", log_prefix); - } + n = recv_timeout(mode == REDIR_INGRESS ? p0 : c0, &b, 1, 0, IO_TIMEOUT_SEC); + if (n < 0) + FAIL_ERRNO("%s: recv_timeout", log_prefix); if (n == 0) - FAIL("%s: incomplete read", log_prefix); + FAIL("%s: incomplete recv", log_prefix); close: xclose(c1); diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c index 53f0e0fa1a53..37c20b5ffa70 100644 --- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c +++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE #include <test_progs.h> -#include <linux/ptrace.h> #include "test_task_pt_regs.skel.h" void test_task_pt_regs(void) diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c new file mode 100644 index 000000000000..b565d997810a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <string.h> + +#include <linux/stddef.h> +#include <linux/bpf.h> + +#include <sys/socket.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define VERDICT_REJECT 0 +#define VERDICT_PROCEED 1 + +SEC("cgroup/connect4") +int connect_v4_dropper(struct bpf_sock_addr *ctx) +{ + if (ctx->type != SOCK_STREAM) + return VERDICT_PROCEED; + if (ctx->user_port == bpf_htons(60123)) + return VERDICT_REJECT; + return VERDICT_PROCEED; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c index 6c059f1cfa1b..e6cb09259408 100644 --- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c +++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/ptrace.h> -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct pt_regs current_regs = {}; -struct pt_regs ctx_regs = {}; +#define PT_REGS_SIZE sizeof(struct pt_regs) + +/* + * The kernel struct pt_regs isn't exported in its entirety to userspace. + * Pass it as an array to task_pt_regs.c + */ +char current_regs[PT_REGS_SIZE] = {}; +char ctx_regs[PT_REGS_SIZE] = {}; int uprobe_res = 0; SEC("uprobe/trigger_func") @@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx) current = bpf_get_current_task_btf(); regs = (struct pt_regs *) bpf_task_pt_regs(current); - __builtin_memcpy(¤t_regs, regs, sizeof(*regs)); - __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx)); + if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs)) + return 0; + if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx)) + return 0; /* Prove that uprobe was run */ uprobe_res = 1; diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh index 59ea56945e6c..b497bb85b667 100755 --- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh @@ -112,6 +112,14 @@ setup() ip netns add "${NS2}" ip netns add "${NS3}" + # rp_filter gets confused by what these tests are doing, so disable it + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0 + ip link add veth1 type veth peer name veth2 ip link add veth3 type veth peer name veth4 ip link add veth5 type veth peer name veth6 @@ -236,11 +244,6 @@ setup() ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF} ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF} - # rp_filter gets confused by what these tests are doing, so disable it - ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0 - TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX) sleep 1 # reduce flakiness diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c index 4de902ea14d8..de1c4e6de0b2 100644 --- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c +++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c @@ -1,10 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __EXPORTED_HEADERS__ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <string.h> #include <errno.h> -#include <linux/fcntl.h> +#include <fcntl.h> #include <malloc.h> #include <sys/ioctl.h> diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh index beee0d5646a6..f7d84549cc3e 100755 --- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -1,6 +1,6 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Copyright 2020 NXP Semiconductors +# Copyright 2020 NXP WAIT_TIME=1 NUM_NETIFS=4 diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc index 5f5b2ba3e557..60c02b482be8 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc @@ -11,8 +11,8 @@ SYSTEM="syscalls" EVENT="sys_enter_openat" FIELD="filename" EPROBE="eprobe_open" - -echo "e:$EPROBE $SYSTEM/$EVENT file=+0(\$filename):ustring" >> dynamic_events +OPTIONS="file=+0(\$filename):ustring" +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events grep -q "$EPROBE" dynamic_events test -d events/eprobes/$EPROBE @@ -37,4 +37,54 @@ echo "-:$EPROBE" >> dynamic_events ! grep -q "$EPROBE" dynamic_events ! test -d events/eprobes/$EPROBE +# test various ways to remove the probe (already tested with just event name) + +# With group name +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With group name and system/event +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With just event name and system/event +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With just event name and system/event and options +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# With group name and system/event and options +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +grep -q "$EPROBE" dynamic_events +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + +# Finally make sure what is in the dynamic_events file clears it too +echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events` +test -d events/eprobes/$EPROBE +echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events +! grep -q "$EPROBE" dynamic_events +! test -d events/eprobes/$EPROBE + clear_trace diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 98053d3afbda..b8dbabe24ac2 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -24,6 +24,7 @@ /x86_64/smm_test /x86_64/state_test /x86_64/svm_vmcall_test +/x86_64/svm_int_ctl_test /x86_64/sync_regs_test /x86_64/tsc_msrs_test /x86_64/userspace_msr_exit_test @@ -48,6 +49,7 @@ /kvm_page_table_test /memslot_modification_stress_test /memslot_perf_test +/rseq_test /set_memory_region_test /steal_time /kvm_binary_stats_test diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 5d05801ab816..d1774f461393 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test TEST_GEN_PROGS_x86_64 += x86_64/state_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test +TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test @@ -80,6 +81,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus TEST_GEN_PROGS_x86_64 += kvm_page_table_test TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test TEST_GEN_PROGS_x86_64 += memslot_perf_test +TEST_GEN_PROGS_x86_64 += rseq_test TEST_GEN_PROGS_x86_64 += set_memory_region_test TEST_GEN_PROGS_x86_64 += steal_time TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test @@ -93,6 +95,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus TEST_GEN_PROGS_aarch64 += kvm_page_table_test +TEST_GEN_PROGS_aarch64 += rseq_test TEST_GEN_PROGS_aarch64 += set_memory_region_test TEST_GEN_PROGS_aarch64 += steal_time TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test @@ -104,6 +107,7 @@ TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += kvm_create_max_vcpus TEST_GEN_PROGS_s390x += kvm_page_table_test +TEST_GEN_PROGS_s390x += rseq_test TEST_GEN_PROGS_s390x += set_memory_region_test TEST_GEN_PROGS_s390x += kvm_binary_stats_test diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index 71e277c7c3f3..5d95113c7b7c 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -371,9 +371,7 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); - printf(" -s: specify the type of memory that should be used to\n" - " back the guest data region.\n\n"); - backing_src_help(); + backing_src_help("-s"); puts(""); exit(0); } @@ -381,7 +379,7 @@ static void help(char *name) int main(int argc, char *argv[]) { struct test_params params = { - .backing_src = VM_MEM_SRC_ANONYMOUS, + .backing_src = DEFAULT_VM_MEM_SRC, .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE, .vcpus = 1, }; diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c index e79c1b64977f..1510b21e6306 100644 --- a/tools/testing/selftests/kvm/demand_paging_test.c +++ b/tools/testing/selftests/kvm/demand_paging_test.c @@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg) return NULL; } - if (!pollfd[0].revents & POLLIN) + if (!(pollfd[0].revents & POLLIN)) continue; r = read(uffd, &msg, sizeof(msg)); @@ -416,7 +416,7 @@ static void help(char *name) { puts(""); printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n" - " [-b memory] [-t type] [-v vcpus] [-o]\n", name); + " [-b memory] [-s type] [-v vcpus] [-o]\n", name); guest_modes_help(); printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n" " UFFD registration mode: 'MISSING' or 'MINOR'.\n"); @@ -426,8 +426,7 @@ static void help(char *name) printf(" -b: specify the size of the memory region which should be\n" " demand paged by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); - printf(" -t: The type of backing memory to use. Default: anonymous\n"); - backing_src_help(); + backing_src_help("-s"); printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); @@ -439,14 +438,14 @@ int main(int argc, char *argv[]) { int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); struct test_params p = { - .src_type = VM_MEM_SRC_ANONYMOUS, + .src_type = DEFAULT_VM_MEM_SRC, .partition_vcpu_memory_access = true, }; int opt; guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) { + while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -465,7 +464,7 @@ int main(int argc, char *argv[]) case 'b': guest_percpu_mem_size = parse_size(optarg); break; - case 't': + case 's': p.src_type = parse_backing_src_type(optarg); break; case 'v': @@ -485,7 +484,7 @@ int main(int argc, char *argv[]) if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR && !backing_src_is_shared(p.src_type)) { - TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t"); + TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s"); } for_each_guest_mode(run_test, &p); diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c index 479868570d59..7ffab5bd5ce5 100644 --- a/tools/testing/selftests/kvm/dirty_log_perf_test.c +++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c @@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots) toggle_dirty_logging(vm, slots, false); } -static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap, - uint64_t nr_pages) +static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots) { - uint64_t slot_pages = nr_pages / slots; int i; for (i = 0; i < slots; i++) { int slot = PERF_TEST_MEM_SLOT_INDEX + i; - unsigned long *slot_bitmap = bitmap + i * slot_pages; - kvm_vm_get_dirty_log(vm, slot, slot_bitmap); + kvm_vm_get_dirty_log(vm, slot, bitmaps[i]); } } -static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap, - uint64_t nr_pages) +static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], + int slots, uint64_t pages_per_slot) { - uint64_t slot_pages = nr_pages / slots; int i; for (i = 0; i < slots; i++) { int slot = PERF_TEST_MEM_SLOT_INDEX + i; - unsigned long *slot_bitmap = bitmap + i * slot_pages; - kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages); + kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot); } } +static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot) +{ + unsigned long **bitmaps; + int i; + + bitmaps = malloc(slots * sizeof(bitmaps[0])); + TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array."); + + for (i = 0; i < slots; i++) { + bitmaps[i] = bitmap_zalloc(pages_per_slot); + TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap."); + } + + return bitmaps; +} + +static void free_bitmaps(unsigned long *bitmaps[], int slots) +{ + int i; + + for (i = 0; i < slots; i++) + free(bitmaps[i]); + + free(bitmaps); +} + static void run_test(enum vm_guest_mode mode, void *arg) { struct test_params *p = arg; pthread_t *vcpu_threads; struct kvm_vm *vm; - unsigned long *bmap; + unsigned long **bitmaps; uint64_t guest_num_pages; uint64_t host_num_pages; + uint64_t pages_per_slot; int vcpu_id; struct timespec start; struct timespec ts_diff; @@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg) guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm); guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages); host_num_pages = vm_num_host_pages(mode, guest_num_pages); - bmap = bitmap_zalloc(host_num_pages); + pages_per_slot = host_num_pages / p->slots; + + bitmaps = alloc_bitmaps(p->slots, pages_per_slot); if (dirty_log_manual_caps) { cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2; @@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) iteration, ts_diff.tv_sec, ts_diff.tv_nsec); clock_gettime(CLOCK_MONOTONIC, &start); - get_dirty_log(vm, p->slots, bmap, host_num_pages); + get_dirty_log(vm, bitmaps, p->slots); ts_diff = timespec_elapsed(start); get_dirty_log_total = timespec_add(get_dirty_log_total, ts_diff); @@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) if (dirty_log_manual_caps) { clock_gettime(CLOCK_MONOTONIC, &start); - clear_dirty_log(vm, p->slots, bmap, host_num_pages); + clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot); ts_diff = timespec_elapsed(start); clear_dirty_log_total = timespec_add(clear_dirty_log_total, ts_diff); @@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg) clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec); } - free(bmap); + free_bitmaps(bitmaps, p->slots); free(vcpu_threads); perf_test_destroy_vm(vm); } @@ -308,11 +332,9 @@ static void help(char *name) printf(" -v: specify the number of vCPUs to run.\n"); printf(" -o: Overlap guest memory accesses instead of partitioning\n" " them into a separate region of memory for each vCPU.\n"); - printf(" -s: specify the type of memory that should be used to\n" - " back the guest data region.\n\n"); + backing_src_help("-s"); printf(" -x: Split the memory region into this number of memslots.\n" - " (default: 1)"); - backing_src_help(); + " (default: 1)\n"); puts(""); exit(0); } @@ -324,7 +346,7 @@ int main(int argc, char *argv[]) .iterations = TEST_HOST_LOOP_N, .wr_fract = 1, .partition_vcpu_memory_access = true, - .backing_src = VM_MEM_SRC_ANONYMOUS, + .backing_src = DEFAULT_VM_MEM_SRC, .slots = 1, }; int opt; diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h index d79be15dd3d2..f8fddc84c0d3 100644 --- a/tools/testing/selftests/kvm/include/test_util.h +++ b/tools/testing/selftests/kvm/include/test_util.h @@ -90,18 +90,23 @@ enum vm_mem_backing_src_type { NUM_SRC_TYPES, }; +#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS + struct vm_mem_backing_src_alias { const char *name; uint32_t flag; }; +#define MIN_RUN_DELAY_NS 200000UL + bool thp_configured(void); size_t get_trans_hugepagesz(void); size_t get_def_hugetlb_pagesz(void); const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i); size_t get_backing_src_pagesz(uint32_t i); -void backing_src_help(void); +void backing_src_help(const char *flag); enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name); +long get_run_delay(void); /* * Whether or not the given source type is shared memory (as opposed to diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 242ae8e09a65..05e65ca1c30c 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val) } } -typedef unsigned long v1di __attribute__ ((vector_size (8))); +#define GET_XMM(__xmm) \ +({ \ + unsigned long __val; \ + asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \ + __val; \ +}) + static inline unsigned long get_xmm(int n) { assert(n >= 0 && n <= 7); - register v1di xmm0 __asm__("%xmm0"); - register v1di xmm1 __asm__("%xmm1"); - register v1di xmm2 __asm__("%xmm2"); - register v1di xmm3 __asm__("%xmm3"); - register v1di xmm4 __asm__("%xmm4"); - register v1di xmm5 __asm__("%xmm5"); - register v1di xmm6 __asm__("%xmm6"); - register v1di xmm7 __asm__("%xmm7"); switch (n) { case 0: - return (unsigned long)xmm0; + return GET_XMM(xmm0); case 1: - return (unsigned long)xmm1; + return GET_XMM(xmm1); case 2: - return (unsigned long)xmm2; + return GET_XMM(xmm2); case 3: - return (unsigned long)xmm3; + return GET_XMM(xmm3); case 4: - return (unsigned long)xmm4; + return GET_XMM(xmm4); case 5: - return (unsigned long)xmm5; + return GET_XMM(xmm5); case 6: - return (unsigned long)xmm6; + return GET_XMM(xmm6); case 7: - return (unsigned long)xmm7; + return GET_XMM(xmm7); } + + /* never reached */ return 0; } diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c index 0d04a7db7f24..36407cb0ec85 100644 --- a/tools/testing/selftests/kvm/kvm_page_table_test.c +++ b/tools/testing/selftests/kvm/kvm_page_table_test.c @@ -456,10 +456,7 @@ static void help(char *name) " (default: 1G)\n"); printf(" -v: specify the number of vCPUs to run\n" " (default: 1)\n"); - printf(" -s: specify the type of memory that should be used to\n" - " back the guest data region.\n" - " (default: anonymous)\n\n"); - backing_src_help(); + backing_src_help("-s"); puts(""); } @@ -468,7 +465,7 @@ int main(int argc, char *argv[]) int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS); struct test_params p = { .test_mem_size = DEFAULT_TEST_MEM_SIZE, - .src_type = VM_MEM_SRC_ANONYMOUS, + .src_type = DEFAULT_VM_MEM_SRC, }; int opt; diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c index af1031fed97f..b72429108993 100644 --- a/tools/testing/selftests/kvm/lib/test_util.c +++ b/tools/testing/selftests/kvm/lib/test_util.c @@ -11,6 +11,7 @@ #include <stdlib.h> #include <time.h> #include <sys/stat.h> +#include <sys/syscall.h> #include <linux/mman.h> #include "linux/kernel.h" @@ -129,13 +130,16 @@ size_t get_trans_hugepagesz(void) { size_t size; FILE *f; + int ret; TEST_ASSERT(thp_configured(), "THP is not configured in host kernel"); f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r"); TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size"); - fscanf(f, "%ld", &size); + ret = fscanf(f, "%ld", &size); + ret = fscanf(f, "%ld", &size); + TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size"); fclose(f); return size; @@ -279,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i) } } -void backing_src_help(void) +static void print_available_backing_src_types(const char *prefix) { int i; - printf("Available backing src types:\n"); + printf("%sAvailable backing src types:\n", prefix); + for (i = 0; i < NUM_SRC_TYPES; i++) - printf("\t%s\n", vm_mem_backing_src_alias(i)->name); + printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name); +} + +void backing_src_help(const char *flag) +{ + printf(" %s: specify the type of memory that should be used to\n" + " back the guest data region. (default: %s)\n", + flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name); + print_available_backing_src_types(" "); } enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) @@ -296,7 +309,23 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name) if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name)) return i; - backing_src_help(); + print_available_backing_src_types(""); TEST_FAIL("Unknown backing src type: %s", type_name); return -1; } + +long get_run_delay(void) +{ + char path[64]; + long val[2]; + FILE *fp; + + sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); + fp = fopen(path, "r"); + /* Return MIN_RUN_DELAY_NS upon failure just to be safe */ + if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2) + val[1] = MIN_RUN_DELAY_NS; + fclose(fp); + + return val[1]; +} diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c new file mode 100644 index 000000000000..4158da0da2bb --- /dev/null +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-only +#define _GNU_SOURCE /* for program_invocation_short_name */ +#include <errno.h> +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <syscall.h> +#include <sys/ioctl.h> +#include <sys/sysinfo.h> +#include <asm/barrier.h> +#include <linux/atomic.h> +#include <linux/rseq.h> +#include <linux/unistd.h> + +#include "kvm_util.h" +#include "processor.h" +#include "test_util.h" + +#define VCPU_ID 0 + +static __thread volatile struct rseq __rseq = { + .cpu_id = RSEQ_CPU_ID_UNINITIALIZED, +}; + +/* + * Use an arbitrary, bogus signature for configuring rseq, this test does not + * actually enter an rseq critical section. + */ +#define RSEQ_SIG 0xdeadbeef + +/* + * Any bug related to task migration is likely to be timing-dependent; perform + * a large number of migrations to reduce the odds of a false negative. + */ +#define NR_TASK_MIGRATIONS 100000 + +static pthread_t migration_thread; +static cpu_set_t possible_mask; +static int min_cpu, max_cpu; +static bool done; + +static atomic_t seq_cnt; + +static void guest_code(void) +{ + for (;;) + GUEST_SYNC(0); +} + +static void sys_rseq(int flags) +{ + int r; + + r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG); + TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno)); +} + +static int next_cpu(int cpu) +{ + /* + * Advance to the next CPU, skipping those that weren't in the original + * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's + * data storage is considered as opaque. Note, if this task is pinned + * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will + * burn a lot cycles and the test will take longer than normal to + * complete. + */ + do { + cpu++; + if (cpu > max_cpu) { + cpu = min_cpu; + TEST_ASSERT(CPU_ISSET(cpu, &possible_mask), + "Min CPU = %d must always be usable", cpu); + break; + } + } while (!CPU_ISSET(cpu, &possible_mask)); + + return cpu; +} + +static void *migration_worker(void *ign) +{ + cpu_set_t allowed_mask; + int r, i, cpu; + + CPU_ZERO(&allowed_mask); + + for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) { + CPU_SET(cpu, &allowed_mask); + + /* + * Bump the sequence count twice to allow the reader to detect + * that a migration may have occurred in between rseq and sched + * CPU ID reads. An odd sequence count indicates a migration + * is in-progress, while a completely different count indicates + * a migration occurred since the count was last read. + */ + atomic_inc(&seq_cnt); + + /* + * Ensure the odd count is visible while sched_getcpu() isn't + * stable, i.e. while changing affinity is in-progress. + */ + smp_wmb(); + r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask); + TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)", + errno, strerror(errno)); + smp_wmb(); + atomic_inc(&seq_cnt); + + CPU_CLR(cpu, &allowed_mask); + + /* + * Wait 1-10us before proceeding to the next iteration and more + * specifically, before bumping seq_cnt again. A delay is + * needed on three fronts: + * + * 1. To allow sched_setaffinity() to prompt migration before + * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME + * (or TIF_NEED_RESCHED, which indirectly leads to handling + * NOTIFY_RESUME) is handled in KVM context. + * + * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters + * the guest, the guest will trigger a IO/MMIO exit all the + * way to userspace and the TIF flags will be handled by + * the generic "exit to userspace" logic, not by KVM. The + * exit to userspace is necessary to give the test a chance + * to check the rseq CPU ID (see #2). + * + * Alternatively, guest_code() could include an instruction + * to trigger an exit that is handled by KVM, but any such + * exit requires architecture specific code. + * + * 2. To let ioctl(KVM_RUN) make its way back to the test + * before the next round of migration. The test's check on + * the rseq CPU ID must wait for migration to complete in + * order to avoid false positive, thus any kernel rseq bug + * will be missed if the next migration starts before the + * check completes. + * + * 3. To ensure the read-side makes efficient forward progress, + * e.g. if sched_getcpu() involves a syscall. Stalling the + * read-side means the test will spend more time waiting for + * sched_getcpu() to stabilize and less time trying to hit + * the timing-dependent bug. + * + * Because any bug in this area is likely to be timing-dependent, + * run with a range of delays at 1us intervals from 1us to 10us + * as a best effort to avoid tuning the test to the point where + * it can hit _only_ the original bug and not detect future + * regressions. + * + * The original bug can reproduce with a delay up to ~500us on + * x86-64, but starts to require more iterations to reproduce + * as the delay creeps above ~10us, and the average runtime of + * each iteration obviously increases as well. Cap the delay + * at 10us to keep test runtime reasonable while minimizing + * potential coverage loss. + * + * The lower bound for reproducing the bug is likely below 1us, + * e.g. failures occur on x86-64 with nanosleep(0), but at that + * point the overhead of the syscall likely dominates the delay. + * Use usleep() for simplicity and to avoid unnecessary kernel + * dependencies. + */ + usleep((i % 10) + 1); + } + done = true; + return NULL; +} + +static int calc_min_max_cpu(void) +{ + int i, cnt, nproc; + + if (CPU_COUNT(&possible_mask) < 2) + return -EINVAL; + + /* + * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that + * this task is affined to in order to reduce the time spent querying + * unusable CPUs, e.g. if this task is pinned to a small percentage of + * total CPUs. + */ + nproc = get_nprocs_conf(); + min_cpu = -1; + max_cpu = -1; + cnt = 0; + + for (i = 0; i < nproc; i++) { + if (!CPU_ISSET(i, &possible_mask)) + continue; + if (min_cpu == -1) + min_cpu = i; + max_cpu = i; + cnt++; + } + + return (cnt < 2) ? -EINVAL : 0; +} + +int main(int argc, char *argv[]) +{ + int r, i, snapshot; + struct kvm_vm *vm; + u32 cpu, rseq_cpu; + + /* Tell stdout not to buffer its content */ + setbuf(stdout, NULL); + + r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask); + TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno, + strerror(errno)); + + if (calc_min_max_cpu()) { + print_skip("Only one usable CPU, task migration not possible"); + exit(KSFT_SKIP); + } + + sys_rseq(0); + + /* + * Create and run a dummy VM that immediately exits to userspace via + * GUEST_SYNC, while concurrently migrating the process by setting its + * CPU affinity. + */ + vm = vm_create_default(VCPU_ID, 0, guest_code); + ucall_init(vm, NULL); + + pthread_create(&migration_thread, NULL, migration_worker, 0); + + for (i = 0; !done; i++) { + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + "Guest failed?"); + + /* + * Verify rseq's CPU matches sched's CPU. Ensure migration + * doesn't occur between sched_getcpu() and reading the rseq + * cpu_id by rereading both if the sequence count changes, or + * if the count is odd (migration in-progress). + */ + do { + /* + * Drop bit 0 to force a mismatch if the count is odd, + * i.e. if a migration is in-progress. + */ + snapshot = atomic_read(&seq_cnt) & ~1; + + /* + * Ensure reading sched_getcpu() and rseq.cpu_id + * complete in a single "no migration" window, i.e. are + * not reordered across the seq_cnt reads. + */ + smp_rmb(); + cpu = sched_getcpu(); + rseq_cpu = READ_ONCE(__rseq.cpu_id); + smp_rmb(); + } while (snapshot != atomic_read(&seq_cnt)); + + TEST_ASSERT(rseq_cpu == cpu, + "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu); + } + + /* + * Sanity check that the test was able to enter the guest a reasonable + * number of times, e.g. didn't get stalled too often/long waiting for + * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a + * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs + * than migrations given the 1us+ delay in the migration task. + */ + TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2), + "Only performed %d KVM_RUNs, task stalled too much?\n", i); + + pthread_join(migration_thread, NULL); + + kvm_vm_free(vm); + + sys_rseq(RSEQ_FLAG_UNREGISTER); + + return 0; +} diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c index ecec30865a74..62f2eb9ee3d5 100644 --- a/tools/testing/selftests/kvm/steal_time.c +++ b/tools/testing/selftests/kvm/steal_time.c @@ -10,7 +10,6 @@ #include <sched.h> #include <pthread.h> #include <linux/kernel.h> -#include <sys/syscall.h> #include <asm/kvm.h> #include <asm/kvm_para.h> @@ -20,7 +19,6 @@ #define NR_VCPUS 4 #define ST_GPA_BASE (1 << 30) -#define MIN_RUN_DELAY_NS 200000UL static void *st_gva[NR_VCPUS]; static uint64_t guest_stolen_time[NR_VCPUS]; @@ -118,12 +116,12 @@ struct st_time { uint64_t st_time; }; -static int64_t smccc(uint32_t func, uint32_t arg) +static int64_t smccc(uint32_t func, uint64_t arg) { unsigned long ret; asm volatile( - "mov x0, %1\n" + "mov w0, %w1\n" "mov x1, %2\n" "hvc #0\n" "mov %0, x0\n" @@ -217,20 +215,6 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid) #endif -static long get_run_delay(void) -{ - char path[64]; - long val[2]; - FILE *fp; - - sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); - fp = fopen(path, "r"); - fscanf(fp, "%ld %ld ", &val[0], &val[1]); - fclose(fp); - - return val[1]; -} - static void *do_steal_time(void *arg) { struct timespec ts, stop; diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c index e6480fd5c4bd..8039e1eff938 100644 --- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c +++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c @@ -82,7 +82,8 @@ int get_warnings_count(void) FILE *f; f = popen("dmesg | grep \"WARNING:\" | wc -l", "r"); - fscanf(f, "%d", &warnings); + if (fscanf(f, "%d", &warnings) < 1) + warnings = 0; fclose(f); return warnings; diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c new file mode 100644 index 000000000000..df04f56ce859 --- /dev/null +++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * svm_int_ctl_test + * + * Copyright (C) 2021, Red Hat, Inc. + * + * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0. + */ + +#include "test_util.h" +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "apic.h" + +#define VCPU_ID 0 + +static struct kvm_vm *vm; + +bool vintr_irq_called; +bool intr_irq_called; + +#define VINTR_IRQ_NUMBER 0x20 +#define INTR_IRQ_NUMBER 0x30 + +static void vintr_irq_handler(struct ex_regs *regs) +{ + vintr_irq_called = true; +} + +static void intr_irq_handler(struct ex_regs *regs) +{ + x2apic_write_reg(APIC_EOI, 0x00); + intr_irq_called = true; +} + +static void l2_guest_code(struct svm_test_data *svm) +{ + /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC, + * and since L1 didn't enable virtual interrupt masking, + * L2 should receive it and not L1. + * + * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ + * so it should also receive it after the following 'sti'. + */ + x2apic_write_reg(APIC_ICR, + APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER); + + __asm__ __volatile__( + "sti\n" + "nop\n" + ); + + GUEST_ASSERT(vintr_irq_called); + GUEST_ASSERT(intr_irq_called); + + __asm__ __volatile__( + "vmcall\n" + ); +} + +static void l1_guest_code(struct svm_test_data *svm) +{ + #define L2_GUEST_STACK_SIZE 64 + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + x2apic_enable(); + + /* Prepare for L2 execution. */ + generic_svm_setup(svm, l2_guest_code, + &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* No virtual interrupt masking */ + vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; + + /* No intercepts for real and virtual interrupts */ + vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR); + + /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */ + vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT); + vmcb->control.int_vector = VINTR_IRQ_NUMBER; + + run_guest(vmcb, svm->vmcb_gpa); + GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + vm_vaddr_t svm_gva; + + nested_svm_check_supported(); + + vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vm, VCPU_ID); + + vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler); + vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler); + + vcpu_alloc_svm(vm, &svm_gva); + vcpu_args_set(vm, VCPU_ID, 1, svm_gva); + + struct kvm_run *run = vcpu_state(vm, VCPU_ID); + struct ucall uc; + + vcpu_run(vm, VCPU_ID); + TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, + "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n", + run->exit_reason, + exit_reason_str(run->exit_reason)); + + switch (get_ucall(vm, VCPU_ID, &uc)) { + case UCALL_ABORT: + TEST_FAIL("%s", (const char *)uc.args[0]); + break; + /* NOT REACHED */ + case UCALL_DONE: + goto done; + default: + TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd); + } +done: + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c index 117bf49a3d79..eda0d2a51224 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c @@ -14,7 +14,6 @@ #include <stdint.h> #include <time.h> #include <sched.h> -#include <sys/syscall.h> #define VCPU_ID 5 @@ -98,20 +97,6 @@ static void guest_code(void) GUEST_DONE(); } -static long get_run_delay(void) -{ - char path[64]; - long val[2]; - FILE *fp; - - sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid)); - fp = fopen(path, "r"); - fscanf(fp, "%ld %ld ", &val[0], &val[1]); - fclose(fp); - - return val[1]; -} - static int cmp_timespec(struct timespec *a, struct timespec *b) { if (a->tv_sec > b->tv_sec) diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index fa2ac0e56b43..fe7ee2b0f29c 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -48,6 +48,7 @@ ARCH ?= $(SUBARCH) # When local build is done, headers are installed in the default # INSTALL_HDR_PATH usr/include. .PHONY: khdr +.NOTPARALLEL: khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c index e1bf55dabdf6..162c41e9bcae 100644 --- a/tools/testing/selftests/nci/nci_dev.c +++ b/tools/testing/selftests/nci/nci_dev.c @@ -746,7 +746,7 @@ int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_ const __u8 *rsp, __u32 rsp_len) { char buf[256]; - unsigned int len; + int len; send(nfc_sock, &cmd[3], cmd_len - 3, 0); len = read(virtual_fd, buf, cmd_len); diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index cfc7f4f97fd1..df341648f818 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,5 +1,2 @@ -##TEST_GEN_FILES := test_unix_oob -TEST_PROGS := test_unix_oob +TEST_GEN_PROGS := test_unix_oob include ../../lib.mk - -all: $(TEST_PROGS) diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c index 0f3e3763f4f8..3dece8b29253 100644 --- a/tools/testing/selftests/net/af_unix/test_unix_oob.c +++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c @@ -271,8 +271,9 @@ main(int argc, char **argv) read_oob(pfd, &oob); if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) { - fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ", - "atmark %d\n", signal_recvd, len, oob, atmark); + fprintf(stderr, + "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n", + signal_recvd, len, oob, atmark); die(1); } diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh index 4254ddc3f70b..1ef9e4159bba 100755 --- a/tools/testing/selftests/net/altnames.sh +++ b/tools/testing/selftests/net/altnames.sh @@ -45,7 +45,7 @@ altnames_test() check_err $? "Got unexpected long alternative name from link show JSON" ip link property del $DUMMY_DEV altname $SHORT_NAME - check_err $? "Failed to add short alternative name" + check_err $? "Failed to delete short alternative name" ip -j -p link show $SHORT_NAME &>/dev/null check_fail $? "Unexpected success while trying to do link show with deleted short alternative name" diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 21b646d10b88..86ab429fe7f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -43,3 +43,4 @@ CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_MIRRED=m CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_CRYPTO_SM4=y diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 13350cd5c8ac..3313566ce906 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -289,6 +289,12 @@ set_sysctl() run_cmd sysctl -q -w $* } +# get sysctl values in NS-A +get_sysctl() +{ + ${NSA_CMD} sysctl -n $* +} + ################################################################################ # Setup for tests @@ -439,10 +445,13 @@ cleanup() ip -netns ${NSA} link set dev ${NSA_DEV} down ip -netns ${NSA} link del dev ${NSA_DEV} + ip netns pids ${NSA} | xargs kill 2>/dev/null ip netns del ${NSA} fi + ip netns pids ${NSB} | xargs kill 2>/dev/null ip netns del ${NSB} + ip netns pids ${NSC} | xargs kill 2>/dev/null ip netns del ${NSC} >/dev/null 2>&1 } @@ -1003,6 +1012,60 @@ ipv4_tcp_md5() run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET} log_test $? 1 "MD5: VRF: Device must be a VRF - prefix" + test_ipv4_md5_vrf__vrf_server__no_bind_ifindex + test_ipv4_md5_vrf__global_server__bind_ifindex0 +} + +test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() +{ + log_start + show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" + + log_start + show_hint "Binding both the socket and the key is not required but it works" + run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" +} + +test_ipv4_md5_vrf__global_server__bind_ifindex0() +{ + # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections + local old_tcp_l3mdev_accept + old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept) + set_sysctl net.ipv4.tcp_l3mdev_accept=1 + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" + log_start + + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" + + log_start + run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & + sleep 1 + run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} + log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" + + # restore value + set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept" } ipv4_tcp_novrf() diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d97bd6889446..72ee644d47bf 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -9,6 +9,7 @@ TEST_PROGS = bridge_igmp.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath.sh \ + ip6_forward_instats_vrf.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ ipip_flat_gre_key.sh \ diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index b802c14d2950..e5e2fbeca22e 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -39,3 +39,5 @@ NETIF_CREATE=yes # Timeout (in seconds) before ping exits regardless of how many packets have # been sent or received PING_TIMEOUT=5 +# IPv6 traceroute utility name. +TROUTE6=traceroute6 diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh new file mode 100755 index 000000000000..9f5b3e2e5e95 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -0,0 +1,172 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Test ipv6 stats on the incoming if when forwarding with VRF + +ALL_TESTS=" + ipv6_ping + ipv6_in_too_big_err + ipv6_in_hdr_err + ipv6_in_addr_err + ipv6_in_discard +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init $h1 2001:1:1::2/64 + ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1 +} + +h1_destroy() +{ + ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1 + simple_if_fini $h1 2001:1:1::2/64 +} + +router_create() +{ + vrf_create router + __simple_if_init $rtr1 router 2001:1:1::1/64 + __simple_if_init $rtr2 router 2001:1:2::1/64 + mtu_set $rtr2 1280 +} + +router_destroy() +{ + mtu_restore $rtr2 + __simple_if_fini $rtr2 2001:1:2::1/64 + __simple_if_fini $rtr1 2001:1:1::1/64 + vrf_destroy router +} + +h2_create() +{ + simple_if_init $h2 2001:1:2::2/64 + ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + mtu_set $h2 1280 +} + +h2_destroy() +{ + mtu_restore $h2 + ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1 + simple_if_fini $h2 2001:1:2::2/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + rtr1=${NETIFS[p2]} + + rtr2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + h1_create + router_create + h2_create + + forwarding_enable +} + +cleanup() +{ + pre_cleanup + + forwarding_restore + + h2_destroy + router_destroy + h1_destroy + vrf_cleanup +} + +ipv6_in_too_big_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + local vrf_name=$(master_name_get $h1) + + # Send too big packets + ip vrf exec $vrf_name \ + $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InTooBigErrors" +} + +ipv6_in_hdr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + local vrf_name=$(master_name_get $h1) + + # Send packets with hop limit 1, easiest with traceroute6 as some ping6 + # doesn't allow hop limit to be specified + ip vrf exec $vrf_name \ + $TROUTE6 2001:1:2::2 &> /dev/null + + local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InHdrErrors" +} + +ipv6_in_addr_err() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + local vrf_name=$(master_name_get $h1) + + # Disable forwarding temporary while sending the packet + sysctl -qw net.ipv6.conf.all.forwarding=0 + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + sysctl -qw net.ipv6.conf.all.forwarding=1 + + local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InAddrErrors" +} + +ipv6_in_discard() +{ + RET=0 + + local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards) + local vrf_name=$(master_name_get $h1) + + # Add a policy to discard + ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block + ip vrf exec $vrf_name \ + $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + ip xfrm policy del dst 2001:1:2::2/128 dir fwd + + local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) + test "$((t1 - t0))" -ne 0 + check_err $? + log_test "Ip6InDiscards" +} +ipv6_ping() +{ + RET=0 + + ping6_test $h1 2001:1:2::2 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e7fc5c35b569..92087d423bcf 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -751,6 +751,14 @@ qdisc_parent_stats_get() | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" } +ipv6_stats_get() +{ + local dev=$1; shift + local stat=$1; shift + + cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2 +} + humanize() { local speed=$1; shift diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 3caf72bb9c6a..a2489ec398fe 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -468,10 +468,26 @@ out_bits() for i in {0..22} do ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ - prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 - - run_test "out_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 ${bit2type[$i]} 123 + prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ + dev veth0 &>/dev/null + + local cmd_res=$? + local descr="${desc/<n>/$i}" + + if [[ $i -ge 12 && $i -le 21 ]] + then + if [ $cmd_res != 0 ] + then + npassed=$((npassed+1)) + log_test_passed "$descr" + else + nfailed=$((nfailed+1)) + log_test_failed "$descr" + fi + else + run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \ + db01::2 db01::1 veth0 ${bit2type[$i]} 123 + fi done bit2size[22]=$tmp @@ -544,7 +560,7 @@ in_bits() local tmp=${bit2size[22]} bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) - for i in {0..22} + for i in {0..11} {22..22} do ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \ prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0 diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d376cb2c383c..8f6997d35816 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -94,16 +94,6 @@ enum { TEST_OUT_BIT9, TEST_OUT_BIT10, TEST_OUT_BIT11, - TEST_OUT_BIT12, - TEST_OUT_BIT13, - TEST_OUT_BIT14, - TEST_OUT_BIT15, - TEST_OUT_BIT16, - TEST_OUT_BIT17, - TEST_OUT_BIT18, - TEST_OUT_BIT19, - TEST_OUT_BIT20, - TEST_OUT_BIT21, TEST_OUT_BIT22, TEST_OUT_FULL_SUPP_TRACE, @@ -125,16 +115,6 @@ enum { TEST_IN_BIT9, TEST_IN_BIT10, TEST_IN_BIT11, - TEST_IN_BIT12, - TEST_IN_BIT13, - TEST_IN_BIT14, - TEST_IN_BIT15, - TEST_IN_BIT16, - TEST_IN_BIT17, - TEST_IN_BIT18, - TEST_IN_BIT19, - TEST_IN_BIT20, - TEST_IN_BIT21, TEST_IN_BIT22, TEST_IN_FULL_SUPP_TRACE, @@ -199,30 +179,6 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, ioam6h->nodelen != 2 || ioam6h->remlen; - case TEST_OUT_BIT12: - case TEST_IN_BIT12: - case TEST_OUT_BIT13: - case TEST_IN_BIT13: - case TEST_OUT_BIT14: - case TEST_IN_BIT14: - case TEST_OUT_BIT15: - case TEST_IN_BIT15: - case TEST_OUT_BIT16: - case TEST_IN_BIT16: - case TEST_OUT_BIT17: - case TEST_IN_BIT17: - case TEST_OUT_BIT18: - case TEST_IN_BIT18: - case TEST_OUT_BIT19: - case TEST_IN_BIT19: - case TEST_OUT_BIT20: - case TEST_IN_BIT20: - case TEST_OUT_BIT21: - case TEST_IN_BIT21: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen != 1; - case TEST_OUT_BIT22: case TEST_IN_BIT22: return ioam6h->overflow || @@ -326,6 +282,66 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, *p += sizeof(__u32); } + if (ioam6h->type.bit12) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit13) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit14) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit15) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit16) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit17) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit18) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit19) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit20) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + + if (ioam6h->type.bit21) { + if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + return 1; + *p += sizeof(__u32); + } + if (ioam6h->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; @@ -455,26 +471,6 @@ static int str2id(const char *tname) return TEST_OUT_BIT10; if (!strcmp("out_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit12", tname)) - return TEST_OUT_BIT12; - if (!strcmp("out_bit13", tname)) - return TEST_OUT_BIT13; - if (!strcmp("out_bit14", tname)) - return TEST_OUT_BIT14; - if (!strcmp("out_bit15", tname)) - return TEST_OUT_BIT15; - if (!strcmp("out_bit16", tname)) - return TEST_OUT_BIT16; - if (!strcmp("out_bit17", tname)) - return TEST_OUT_BIT17; - if (!strcmp("out_bit18", tname)) - return TEST_OUT_BIT18; - if (!strcmp("out_bit19", tname)) - return TEST_OUT_BIT19; - if (!strcmp("out_bit20", tname)) - return TEST_OUT_BIT20; - if (!strcmp("out_bit21", tname)) - return TEST_OUT_BIT21; if (!strcmp("out_bit22", tname)) return TEST_OUT_BIT22; if (!strcmp("out_full_supp_trace", tname)) @@ -509,26 +505,6 @@ static int str2id(const char *tname) return TEST_IN_BIT10; if (!strcmp("in_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit12", tname)) - return TEST_IN_BIT12; - if (!strcmp("in_bit13", tname)) - return TEST_IN_BIT13; - if (!strcmp("in_bit14", tname)) - return TEST_IN_BIT14; - if (!strcmp("in_bit15", tname)) - return TEST_IN_BIT15; - if (!strcmp("in_bit16", tname)) - return TEST_IN_BIT16; - if (!strcmp("in_bit17", tname)) - return TEST_IN_BIT17; - if (!strcmp("in_bit18", tname)) - return TEST_IN_BIT18; - if (!strcmp("in_bit19", tname)) - return TEST_IN_BIT19; - if (!strcmp("in_bit20", tname)) - return TEST_IN_BIT20; - if (!strcmp("in_bit21", tname)) - return TEST_IN_BIT21; if (!strcmp("in_bit22", tname)) return TEST_IN_BIT22; if (!strcmp("in_full_supp_trace", tname)) @@ -606,16 +582,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_OUT_BIT9] = check_ioam_header_and_data, [TEST_OUT_BIT10] = check_ioam_header_and_data, [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT12] = check_ioam_header, - [TEST_OUT_BIT13] = check_ioam_header, - [TEST_OUT_BIT14] = check_ioam_header, - [TEST_OUT_BIT15] = check_ioam_header, - [TEST_OUT_BIT16] = check_ioam_header, - [TEST_OUT_BIT17] = check_ioam_header, - [TEST_OUT_BIT18] = check_ioam_header, - [TEST_OUT_BIT19] = check_ioam_header, - [TEST_OUT_BIT20] = check_ioam_header, - [TEST_OUT_BIT21] = check_ioam_header, [TEST_OUT_BIT22] = check_ioam_header_and_data, [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_IN_UNDEF_NS] = check_ioam_header, @@ -633,16 +599,6 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { [TEST_IN_BIT9] = check_ioam_header_and_data, [TEST_IN_BIT10] = check_ioam_header_and_data, [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT12] = check_ioam_header, - [TEST_IN_BIT13] = check_ioam_header, - [TEST_IN_BIT14] = check_ioam_header, - [TEST_IN_BIT15] = check_ioam_header, - [TEST_IN_BIT16] = check_ioam_header, - [TEST_IN_BIT17] = check_ioam_header, - [TEST_IN_BIT18] = check_ioam_header, - [TEST_IN_BIT19] = check_ioam_header, - [TEST_IN_BIT20] = check_ioam_header, - [TEST_IN_BIT21] = check_ioam_header, [TEST_IN_BIT22] = check_ioam_header_and_data, [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index bd6288302094..b599003eb5ba 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -28,6 +28,7 @@ #include <unistd.h> #include <time.h> #include <errno.h> +#include <getopt.h> #include <linux/xfrm.h> #include <linux/ipsec.h> @@ -101,6 +102,8 @@ struct sock_args { struct sockaddr_in6 v6; } md5_prefix; unsigned int prefix_len; + /* 0: default, -1: force off, +1: force on */ + int bind_key_ifindex; /* expected addresses and device index for connection */ const char *expected_dev; @@ -271,11 +274,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args } memcpy(&md5sig.tcpm_addr, addr, alen); - if (args->ifindex) { + if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) { opt = TCP_MD5SIG_EXT; md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; md5sig.tcpm_ifindex = args->ifindex; + log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex); + } else { + log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex); } rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig)); @@ -1822,6 +1828,14 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) } #define GETOPT_STR "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbq" +#define OPT_FORCE_BIND_KEY_IFINDEX 1001 +#define OPT_NO_BIND_KEY_IFINDEX 1002 + +static struct option long_opts[] = { + {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX}, + {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX}, + {0, 0, 0, 0} +}; static void print_usage(char *prog) { @@ -1858,6 +1872,10 @@ static void print_usage(char *prog) " -M password use MD5 sum protection\n" " -X password MD5 password for client mode\n" " -m prefix/len prefix and length to use for MD5 key\n" + " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n" + " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n" + " (default: only if -I is passed)\n" + "\n" " -g grp multicast group (e.g., 239.1.1.1)\n" " -i interactive mode (default is echo and terminate)\n" "\n" @@ -1893,7 +1911,7 @@ int main(int argc, char *argv[]) * process input args */ - while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) { + while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) { switch (rc) { case 'B': both_mode = 1; @@ -1966,6 +1984,12 @@ int main(int argc, char *argv[]) case 'M': args.password = optarg; break; + case OPT_FORCE_BIND_KEY_IFINDEX: + args.bind_key_ifindex = 1; + break; + case OPT_NO_BIND_KEY_IFINDEX: + args.bind_key_ifindex = -1; + break; case 'X': args.client_pw = optarg; break; diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index 427d94816f2d..d4ffebb989f8 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -199,7 +199,6 @@ fi # test basic connectivity if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 - bash exit 1 fi diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh index d7e07f4c3d7f..da1c1e4b6c86 100755 --- a/tools/testing/selftests/netfilter/nft_nat.sh +++ b/tools/testing/selftests/netfilter/nft_nat.sh @@ -741,6 +741,149 @@ EOF return $lret } +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + echo ROUTER | ip netns exec "$ns0" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_r=$! + + echo CLIENT | ip netns exec "$ns2" nc -w 5 -u -l -p 1405 >/dev/null 2>&1 & + nc_c=$! + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" nc -w 1 -p 1405 -u "$daddrc" 41404 > /dev/null + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "" | ip netns exec "$ns1" nc -w 1 -p 41404 -u "$daddrs" 1405) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $nc_r $nc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family filter { + chain forward { + type filter hook forward priority 0; policy accept; + meta iif veth1 udp sport 1405 drop + } +} +EOF + test_port_shadow "port-filter" "ROUTER" + + ip netns exec "$ns0" nft delete table $family filter +} + +# This prevents port shadow of router service via notrack. +test_port_shadow_notrack() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family raw { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 udp dport 1405 notrack + udp dport 1405 notrack + } + chain output { + type filter hook output priority -300; policy accept; + udp sport 1405 notrack + } +} +EOF + test_port_shadow "port-notrack" "ROUTER" + + ip netns exec "$ns0" nft delete table $family raw +} + +# This prevents port shadow of router service via sport remap. +test_port_shadow_pat() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family pat { + chain postrouting { + type nat hook postrouting priority -1; policy accept; + meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random + } +} +EOF + test_port_shadow "pat" "ROUTER" + + ip netns exec "$ns0" nft delete table $family pat +} + +test_port_shadowing() +{ + local family="ip" + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" + + # test packet filter based mitigation: prevent forwarding of + # packets claiming to come from the service port. + test_port_shadow_filter "$family" + + # test conntrack based mitigation: connections going or coming + # from router:service bypass connection tracking. + test_port_shadow_notrack "$family" + + # test nat based mitigation: fowarded packets coming from service port + # are masqueraded with random highport. + test_port_shadow_pat "$family" + + ip netns exec "$ns0" nft delete table $family nat +} # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in 0 1 2; do @@ -861,6 +1004,8 @@ reset_counters $test_inet_nat && test_redirect inet $test_inet_nat && test_redirect6 inet +test_port_shadowing + if [ $ret -ne 0 ];then echo -n "FAIL: " nft --version diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh new file mode 100755 index 000000000000..b9ab37380f33 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh @@ -0,0 +1,309 @@ +#!/bin/bash +# +# Test connection tracking zone and NAT source port reallocation support. +# + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# Don't increase too much, 2000 clients should work +# just fine but script can then take several minutes with +# KASAN/debug builds. +maxclients=100 + +have_iperf=1 +ret=0 + +# client1---. +# veth1-. +# | +# NAT Gateway --veth0--> Server +# | | +# veth2-' | +# client2---' | +# .... | +# clientX----vethX---' + +# All clients share identical IP address. +# NAT Gateway uses policy routing and conntrack zones to isolate client +# namespaces. Each client connects to Server, each with colliding tuples: +# clientsaddr:10000 -> serveraddr:dport +# NAT Gateway is supposed to do port reallocation for each of the +# connections. + +sfx=$(mktemp -u "XXXXXXXX") +gw="ns-gw-$sfx" +cl1="ns-cl1-$sfx" +cl2="ns-cl2-$sfx" +srv="ns-srv-$sfx" + +v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) +v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) +v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) +v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null) +v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null) +v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) + +cleanup() +{ + ip netns del $gw + ip netns del $srv + for i in $(seq 1 $maxclients); do + ip netns del ns-cl$i-$sfx 2>/dev/null + done + + sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null +} + +nft --version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +conntrack -V > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +iperf3 -v >/dev/null 2>&1 +if [ $? -ne 0 ];then + have_iperf=0 +fi + +ip netns add "$gw" +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $gw" + exit $ksft_skip +fi +ip -net "$gw" link set lo up + +trap cleanup EXIT + +ip netns add "$srv" +if [ $? -ne 0 ];then + echo "SKIP: Could not create server netns $srv" + exit $ksft_skip +fi + +ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" +ip -net "$gw" link set veth0 up +ip -net "$srv" link set lo up +ip -net "$srv" link set eth0 up + +sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null + +for i in $(seq 1 $maxclients);do + cl="ns-cl$i-$sfx" + + ip netns add "$cl" + if [ $? -ne 0 ];then + echo "SKIP: Could not create client netns $cl" + exit $ksft_skip + fi + ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip + fi +done + +for i in $(seq 1 $maxclients);do + cl="ns-cl$i-$sfx" + echo netns exec "$cl" ip link set lo up + echo netns exec "$cl" ip link set eth0 up + echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 + echo netns exec "$gw" ip link set veth$i up + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0 + + # clients have same IP addresses. + echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 + echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 + echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 + + # NB: same addresses on client-facing interfaces. + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i + echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i + + # gw: policy routing + echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i)) + echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i)) + echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i)) +done | ip -batch /dev/stdin + +ip -net "$gw" addr add 10.3.0.1/24 dev veth0 +ip -net "$gw" addr add dead:3::1/64 dev veth0 + +ip -net "$srv" addr add 10.3.0.99/24 dev eth0 +ip -net "$srv" addr add dead:3::99/64 dev eth0 + +ip netns exec $gw nft -f /dev/stdin<<EOF +table inet raw { + map iiftomark { + type ifname : mark + } + + map iiftozone { + typeof iifname : ct zone + } + + set inicmp { + flags dynamic + type ipv4_addr . ifname . ipv4_addr + } + set inflows { + flags dynamic + type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service + } + + set inflows6 { + flags dynamic + type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service + } + + chain prerouting { + type filter hook prerouting priority -64000; policy accept; + ct original zone set meta iifname map @iiftozone + meta mark set meta iifname map @iiftomark + + tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter } + add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter } + ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter } + } + + chain nat_postrouting { + type nat hook postrouting priority 0; policy accept; + ct mark set meta mark meta oifname veth0 masquerade + } + + chain mangle_prerouting { + type filter hook prerouting priority -100; policy accept; + ct direction reply meta mark set ct mark + } +} +EOF + +( echo add element inet raw iiftomark \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth$i\" : $i, + done + echo \"veth$maxclients\" : $maxclients \} + echo add element inet raw iiftozone \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth$i\" : $i, + done + echo \"veth$maxclients\" : $maxclients \} +) | ip netns exec $gw nft -f /dev/stdin + +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null + +# useful for debugging: allows to use 'ping' from clients to gateway. +ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null + +for i in $(seq 1 $maxclients); do + cl="ns-cl$i-$sfx" + ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & + if [ $? -ne 0 ]; then + echo FAIL: Ping failure from $cl 1>&2 + ret=1 + break + fi +done + +wait + +for i in $(seq 1 $maxclients); do + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }" + if [ $? -ne 0 ];then + ret=1 + echo "FAIL: counter icmp mismatch for veth$i" 1>&2 + ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 + break + fi +done + +ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" +if [ $? -ne 0 ];then + ret=1 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }" + ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 +fi + +if [ $ret -eq 0 ]; then + echo "PASS: ping test from all $maxclients namespaces" +fi + +if [ $have_iperf -eq 0 ];then + echo "SKIP: iperf3 not installed" + if [ $ret -ne 0 ];then + exit $ret + fi + exit $ksft_skip +fi + +ip netns exec $srv iperf3 -s > /dev/null 2>&1 & +iperfpid=$! +sleep 1 + +for i in $(seq 1 $maxclients); do + if [ $ret -ne 0 ]; then + break + fi + cl="ns-cl$i-$sfx" + ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null + if [ $? -ne 0 ]; then + echo FAIL: Failure to connect for $cl 1>&2 + ip netns exec $gw conntrack -S 1>&2 + ret=1 + fi +done +if [ $ret -eq 0 ];then + echo "PASS: iperf3 connections for all $maxclients net namespaces" +fi + +kill $iperfpid +wait + +for i in $(seq 1 $maxclients); do + ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null + if [ $? -ne 0 ];then + ret=1 + echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 + break + fi +done +if [ $ret -eq 0 ];then + echo "PASS: Found client connection for all $maxclients net namespaces" +fi + +ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null +if [ $? -ne 0 ];then + ret=1 + echo "FAIL: cannot find return entry on veth0" 1>&2 +fi + +exit $ret diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh new file mode 100755 index 000000000000..ac646376eb01 --- /dev/null +++ b/tools/testing/selftests/netfilter/nft_zones_many.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +# Test insertion speed for packets with identical addresses/ports +# that are all placed in distinct conntrack zones. + +sfx=$(mktemp -u "XXXXXXXX") +ns="ns-$sfx" + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +zones=20000 +have_ct_tool=0 +ret=0 + +cleanup() +{ + ip netns del $ns +} + +ip netns add $ns +if [ $? -ne 0 ];then + echo "SKIP: Could not create net namespace $gw" + exit $ksft_skip +fi + +trap cleanup EXIT + +conntrack -V > /dev/null 2>&1 +if [ $? -eq 0 ];then + have_ct_tool=1 +fi + +ip -net "$ns" link set lo up + +test_zones() { + local max_zones=$1 + +ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 +ip netns exec $ns nft -f /dev/stdin<<EOF +flush ruleset +table inet raw { + map rndzone { + typeof numgen inc mod $max_zones : ct zone + } + + chain output { + type filter hook output priority -64000; policy accept; + udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone + } +} +EOF + ( + echo "add element inet raw rndzone {" + for i in $(seq 1 $max_zones);do + echo -n "$i : $i" + if [ $i -lt $max_zones ]; then + echo "," + else + echo "}" + fi + done + ) | ip netns exec $ns nft -f /dev/stdin + + local i=0 + local j=0 + local outerstart=$(date +%s%3N) + local stop=$outerstart + + while [ $i -lt $max_zones ]; do + local start=$(date +%s%3N) + i=$((i + 10000)) + j=$((j + 1)) + dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null + if [ $? -ne 0 ] ;then + ret=1 + break + fi + + stop=$(date +%s%3N) + local duration=$((stop-start)) + echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)" + done + + if [ $have_ct_tool -eq 1 ]; then + local count=$(ip netns exec "$ns" conntrack -C) + local duration=$((stop-outerstart)) + + if [ $count -eq $max_zones ]; then + echo "PASS: inserted $count entries from packet path in $duration ms total" + else + ip netns exec $ns conntrack -S 1>&2 + echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries" + ret=1 + fi + fi + + if [ $ret -ne 0 ];then + echo "FAIL: insert $max_zones entries from packet path" 1>&2 + fi +} + +test_conntrack_tool() { + local max_zones=$1 + + ip netns exec $ns conntrack -F >/dev/null 2>/dev/null + + local outerstart=$(date +%s%3N) + local start=$(date +%s%3N) + local stop=$start + local i=0 + while [ $i -lt $max_zones ]; do + i=$((i + 1)) + ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1 + if [ $? -ne 0 ];then + ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null + echo "FAIL: conntrack -I returned an error" + ret=1 + break + fi + + if [ $((i%10000)) -eq 0 ];then + stop=$(date +%s%3N) + + local duration=$((stop-start)) + echo "PASS: added 10000 entries in $duration ms (now $i total)" + start=$stop + fi + done + + local count=$(ip netns exec "$ns" conntrack -C) + local duration=$((stop-outerstart)) + + if [ $count -eq $max_zones ]; then + echo "PASS: inserted $count entries via ctnetlink in $duration ms" + else + ip netns exec $ns conntrack -S 1>&2 + echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)" + ret=1 + fi +} + +test_zones $zones + +if [ $have_ct_tool -eq 1 ];then + test_conntrack_tool $zones +else + echo "SKIP: Could not run ctnetlink insertion test without conntrack tool" + if [ $ret -eq 0 ];then + exit $ksft_skip + fi +fi + +exit $ret diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S index bd1ca25febe4..aed632d29fff 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S +++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S @@ -1,5 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#include <ppc-asm.h> +#include <basic_asm.h> #include <asm/unistd.h> .text @@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended) 1: li r3, -1 blr + + +.macro scv level + .long (0x44000001 | (\level) << 5) +.endm + +FUNC_START(getppid_scv_tm_active) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + scv 0 + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr + +FUNC_START(getppid_scv_tm_suspended) + PUSH_BASIC_STACK(0) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + scv 0 + tresume. + tend. + POP_BASIC_STACK(0) + blr +1: + li r3, -1 + POP_BASIC_STACK(0) + blr diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c index 467a6b3134b2..b763354c2eb4 100644 --- a/tools/testing/selftests/powerpc/tm/tm-syscall.c +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -19,23 +19,36 @@ #include "utils.h" #include "tm.h" +#ifndef PPC_FEATURE2_SCV +#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */ +#endif + extern int getppid_tm_active(void); extern int getppid_tm_suspended(void); +extern int getppid_scv_tm_active(void); +extern int getppid_scv_tm_suspended(void); unsigned retries = 0; #define TEST_DURATION 10 /* seconds */ -pid_t getppid_tm(bool suspend) +pid_t getppid_tm(bool scv, bool suspend) { int i; pid_t pid; for (i = 0; i < TM_RETRIES; i++) { - if (suspend) - pid = getppid_tm_suspended(); - else - pid = getppid_tm_active(); + if (suspend) { + if (scv) + pid = getppid_scv_tm_suspended(); + else + pid = getppid_tm_suspended(); + } else { + if (scv) + pid = getppid_scv_tm_active(); + else + pid = getppid_tm_active(); + } if (pid >= 0) return pid; @@ -82,15 +95,24 @@ int tm_syscall(void) * Test a syscall within a suspended transaction and verify * that it succeeds. */ - FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */ /* * Test a syscall within an active transaction and verify that * it fails with the correct failure code. */ - FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */ FAIL_IF(!failure_is_persistent()); /* ...persistently... */ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + + /* Now do it all again with scv if it is available. */ + if (have_hwcap2(PPC_FEATURE2_SCV)) { + FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */ + FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + } + gettimeofday(&now, 0); } diff --git a/tools/testing/selftests/vm/split_huge_page_test.c b/tools/testing/selftests/vm/split_huge_page_test.c index 1af16d2c2a0a..52497b7b9f1d 100644 --- a/tools/testing/selftests/vm/split_huge_page_test.c +++ b/tools/testing/selftests/vm/split_huge_page_test.c @@ -341,7 +341,7 @@ void split_file_backed_thp(void) } /* write something to the file, so a file-backed THP can be allocated */ - num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc)); + num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1); close(fd); if (num_written < 1) { diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 10ab56c2484a..60aa1a4fc69b 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -414,9 +414,6 @@ static void uffd_test_ctx_init_ext(uint64_t *features) uffd_test_ops->allocate_area((void **)&area_src); uffd_test_ops->allocate_area((void **)&area_dst); - uffd_test_ops->release_pages(area_src); - uffd_test_ops->release_pages(area_dst); - userfaultfd_open(features); count_verify = malloc(nr_pages * sizeof(unsigned long long)); @@ -437,6 +434,26 @@ static void uffd_test_ctx_init_ext(uint64_t *features) *(area_count(area_src, nr) + 1) = 1; } + /* + * After initialization of area_src, we must explicitly release pages + * for area_dst to make sure it's fully empty. Otherwise we could have + * some area_dst pages be errornously initialized with zero pages, + * hence we could hit memory corruption later in the test. + * + * One example is when THP is globally enabled, above allocate_area() + * calls could have the two areas merged into a single VMA (as they + * will have the same VMA flags so they're mergeable). When we + * initialize the area_src above, it's possible that some part of + * area_dst could have been faulted in via one huge THP that will be + * shared between area_src and area_dst. It could cause some of the + * area_dst won't be trapped by missing userfaults. + * + * This release_pages() will guarantee even if that happened, we'll + * proactively split the thp and drop any accidentally initialized + * pages within area_dst. + */ + uffd_test_ops->release_pages(area_dst); + pipefd = malloc(sizeof(int) * nr_cpus * 2); if (!pipefd) err("pipefd"); diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index cec6f5a738e1..fa927ad16f8a 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -332,8 +332,6 @@ static void test_no_sockets(const struct test_opts *opts) read_vsock_stat(&sockets); check_no_sockets(&sockets); - - free_sock_stat(&sockets); } static void test_listen_socket_server(const struct test_opts *opts) diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 67766bfe176f..2a3638c0a008 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -282,6 +282,7 @@ static void test_stream_msg_peek_server(const struct test_opts *opts) } #define MESSAGES_CNT 7 +#define MSG_EOR_IDX (MESSAGES_CNT / 2) static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) { int fd; @@ -294,7 +295,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) /* Send several messages, one with MSG_EOR flag */ for (int i = 0; i < MESSAGES_CNT; i++) - send_byte(fd, 1, 0); + send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0); control_writeln("SENDDONE"); close(fd); @@ -324,6 +325,11 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) perror("message bound violated"); exit(EXIT_FAILURE); } + + if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) { + perror("MSG_EOR"); + exit(EXIT_FAILURE); + } } close(fd); diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index 9db867df7679..f9c52b7fab7b 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -10,10 +10,9 @@ override CFLAGS+= $(call cc-option,-O3,-O1) ${WARNFLAGS} # Add "-fstack-protector" only if toolchain supports it. override CFLAGS+= $(call cc-option,-fstack-protector-strong) CC?= $(CROSS_COMPILE)gcc -PKG_CONFIG?= pkg-config +PKG_CONFIG?= $(CROSS_COMPILE)pkg-config override CFLAGS+=-D VERSION=\"$(VERSION)\" -LDFLAGS+= TARGET=tmon INSTALL_PROGRAM=install -m 755 -p @@ -33,7 +32,6 @@ override CFLAGS += $(shell $(PKG_CONFIG) --cflags $(STATIC) panelw ncursesw 2> / $(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null) OBJS = tmon.o tui.o sysfs.o pid.o -OBJS += tmon: $(OBJS) Makefile tmon.h $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(TARGET) $(TMON_LIBS) @@ -42,15 +40,13 @@ valgrind: tmon sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET) 1> /dev/null install: - - mkdir -p $(INSTALL_ROOT)/$(BINDIR) - - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" + - $(INSTALL_PROGRAM) -D "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" uninstall: $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" clean: - find . -name "*.o" | xargs $(DEL_FILE) - rm -f $(TARGET) + rm -f $(TARGET) $(OBJS) dist: git tag v$(VERSION) diff --git a/tools/usb/testusb.c b/tools/usb/testusb.c index ee8208b2f946..69c3ead25313 100644 --- a/tools/usb/testusb.c +++ b/tools/usb/testusb.c @@ -265,12 +265,6 @@ nomem: } entry->ifnum = ifnum; - - /* FIXME update USBDEVFS_CONNECTINFO so it tells about high speed etc */ - - fprintf(stderr, "%s speed\t%s\t%u\n", - speed(entry->speed), entry->name, entry->ifnum); - entry->next = testdevs; testdevs = entry; return 0; @@ -299,6 +293,14 @@ static void *handle_testdev (void *arg) return 0; } + status = ioctl(fd, USBDEVFS_GET_SPEED, NULL); + if (status < 0) + fprintf(stderr, "USBDEVFS_GET_SPEED failed %d\n", status); + else + dev->speed = status; + fprintf(stderr, "%s speed\t%s\t%u\n", + speed(dev->speed), dev->name, dev->ifnum); + restart: for (i = 0; i < TEST_CASES; i++) { if (dev->test != -1 && dev->test != i) diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c index 0517c744b04e..f62f10c988db 100644 --- a/tools/vm/page-types.c +++ b/tools/vm/page-types.c @@ -1331,7 +1331,7 @@ int main(int argc, char *argv[]) if (opt_list && opt_list_mapcnt) kpagecount_fd = checked_open(PROC_KPAGECOUNT, O_RDONLY); - if (opt_mark_idle && opt_file) + if (opt_mark_idle) page_idle_fd = checked_open(SYS_KERNEL_MM_PAGE_IDLE, O_RDWR); if (opt_list && opt_pid) |